diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index 02164d7a..2dc768ec 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -274,14 +274,10 @@ async function realCompactSession( } if (resp?.status === 'busy') return { status: 'busy' } if (resp?.status === 'overflow') { - // Accepts both `message` and (legacy) `reason` during rollout. - const wire = resp as { message?: unknown; reason?: unknown } const message = - typeof wire.message === 'string' - ? wire.message - : typeof wire.reason === 'string' - ? wire.reason - : 'unknown summariser error' + typeof resp.message === 'string' + ? resp.message + : 'unknown summariser error' return { status: 'overflow', message } } if (resp?.status === 'empty') return surfaceEmpty() diff --git a/harness/README.md b/harness/README.md index 3a8bfcd6..ee050fa5 100644 --- a/harness/README.md +++ b/harness/README.md @@ -13,7 +13,7 @@ alongside `harness` over the iii bus. | Folder | Bus surface | Role | |---|---|---| | `src/harness/` | `ui::subscribe`/`unsubscribe`, `harness::fs::read_inline`, `policy::check_permissions` | Meta-worker; loads `iii-permissions.yaml`; spins up `ui::*` fanout pumps. | -| `src/approval-gate/` | `approval::resolve` | Routes operator decisions to per-call `turn::approval_resume` fns (registered by turn-orchestrator). | +| `src/approval-gate/` | `approval::resolve` | Persists operator decisions to scope `approvals` (turn-orchestrator reacts via `turn::on_approval`). | | `src/turn-orchestrator/` | `run::start`, `turn::{state}`, `turn::get_state` | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | | `src/session/` | `session-tree::*` (11 fns), `session-inbox::*` (3 fns) | Branching session storage + per-session inbox queues. | | `src/llm-budget/` | `budget::*` (14 fns) | Workspace + agent LLM spend caps. | diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 16db4fc1..d390ec13 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -1,7 +1,7 @@ # harness architecture `harness` is the Node/TypeScript port of the iii harness stack. It ships -as one pnpm package containing 11 workers (one folder per worker, one feature +as one pnpm package containing 15 workers (one folder per worker, one feature per file) plus a shared `runtime/` SDK helper layer and a `types/` wire-type mirror of `harness/crates/harness-types`. Each worker is independently runnable as `pnpm dev:` (development) or `iii-` (production binary); @@ -20,17 +20,19 @@ workers. |---|---|---|---| | harness | [src/harness/](harness/src/harness/) | Meta-worker; loads `iii-permissions.yaml`, exposes `harness::trigger` (WS ingestion bridge — see [Telemetry & trace correlation](#telemetry--trace-correlation)) / `policy::check_permissions` / `ui::*`, spins up `agent::events` fan-out. | [workers/harness.md](harness/docs/workers/harness.md) | | turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | -| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve` and shared approval wire schemas; routes decisions to per-call `turn::approval_resume` fns owned by the turn-orchestrator. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | +| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve`; persists decisions to scope `approvals`. Wake via `turn::on_approval` state trigger. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | | session | [src/session/](harness/src/session/) | Branching session storage (`session-tree::*`) plus per-session inbox queues (`session-inbox::*`). | [workers/session.md](harness/docs/workers/session.md) | | llm-budget | [src/llm-budget/](harness/src/llm-budget/) | Workspace + agent LLM spend caps with alerts, forecast, period rollover. | [workers/llm-budget.md](harness/docs/workers/llm-budget.md) | | hook-fanout | [src/hook-fanout/](harness/src/hook-fanout/) | Generic publish-and-collect primitive over a stream topic. | [workers/hook-fanout.md](harness/docs/workers/hook-fanout.md) | | auth-credentials | [src/auth-credentials/](harness/src/auth-credentials/) | File-backed multi-provider credential store. | [workers/auth-credentials.md](harness/docs/workers/auth-credentials.md) | | models-catalog | [src/models-catalog/](harness/src/models-catalog/) | Static model-capability catalogue (state-first, embedded fallback). | [workers/models-catalog.md](harness/docs/workers/models-catalog.md) | +| provider-config | [src/provider-config/](harness/src/provider-config/) | Runtime provider settings store on the iii bus (`provider_config::*` — base URL / max tokens overrides). | [workers/provider-config.md](harness/docs/workers/provider-config.md) | | provider-anthropic | [src/provider-anthropic/](harness/src/provider-anthropic/) | Anthropic Messages API SSE → channel writer. | [workers/provider-anthropic.md](harness/docs/workers/provider-anthropic.md) | | provider-openai | [src/provider-openai/](harness/src/provider-openai/) | OpenAI Chat Completions SSE → channel writer. | [workers/provider-openai.md](harness/docs/workers/provider-openai.md) | | provider-kimi | [src/provider-kimi/](harness/src/provider-kimi/) | Kimi Chat Completions SSE → channel writer. | [workers/provider-kimi.md](harness/docs/workers/provider-kimi.md) | | provider-lmstudio | [src/provider-lmstudio/](harness/src/provider-lmstudio/) | LM Studio (localhost) Chat Completions SSE → channel writer. | [workers/provider-lmstudio.md](harness/docs/workers/provider-lmstudio.md) | -| context-compaction | [src/context-compaction/](harness/src/context-compaction/) | Optional `agent::events` side-car that compacts session history when running token count crosses a threshold. | [workers/context-compaction.md](harness/docs/workers/context-compaction.md) | +| provider-llamacpp | [src/provider-llamacpp/](harness/src/provider-llamacpp/) | llama.cpp `llama-server` (localhost) Chat Completions SSE → channel writer. | [workers/provider-llamacpp.md](harness/docs/workers/provider-llamacpp.md) | +| context-compaction | [src/context-compaction/](harness/src/context-compaction/) | Optional `agent::turn_end` side-car that compacts session history when running token count crosses a threshold. | [workers/context-compaction.md](harness/docs/workers/context-compaction.md) | ## System diagram @@ -69,14 +71,13 @@ flowchart LR turnOrch -- "provider::*::stream" --> provKimi turnOrch -- "provider::*::stream" --> provLms turnOrch -- "consultBefore: policy::check_permissions" --> harness - turnOrch -- "publishAfter → hook-fanout::publish_collect (after-hook)" --> hook turnOrch -- "session-tree::* mirror" --> session turnOrch -- "state::* persistence" --> state client -- "approval::resolve" --> approval - approval -- "trigger turn::approval_resume::/" --> turnOrch - turnOrch -- "state::set approvals//" --> state - turnOrch -- "iii.trigger turn::step" --> turnOrch + approval -- "state::set approvals//" --> state + state -- "state trigger (scope=approvals)" --> turnOrch + turnOrch -- "enqueue turn::{state} on turn-step queue" --> turnOrch provAnth -- "auth::get_token" --> auth provOAI -- "auth::get_token" --> auth @@ -85,8 +86,7 @@ flowchart LR state -- "agent::events stream" --> harness state -- "agent::events stream" --> compact - state -- "state trigger (scope=agent, abort_signal)" --> turnOrch - state -- "state trigger (scope=agent, turn_state created)" --> harness + state -- "state trigger (scope=turn_state)" --> harness harness -- "ui::session::event::" --> client compact -- "session-tree::compact" --> session ``` @@ -94,41 +94,84 @@ flowchart LR ## Turn FSM [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) -defines an 11-state durable FSM. Every transition is driven by the -`turn::step` durable subscriber, which is woken by a publish to the -`turn::step_requested` topic — either by the orchestrator itself -(re-publish at the end of a step), by a per-call -`turn::approval_resume` handler (when a human decision or abort lands), or by -the orchestrator's own `abort_signal` state trigger. +defines a 7-state durable FSM. Each state is a registered `turn::{state}` +function executed via `runTransition` and enqueued onto the `turn-step` FIFO +queue from `saveRecord` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). +`saveRecord` calls `shouldWakeStep` then enqueues `turn::{newState}` when the persisted state +transitions to a stepable state. Paused sessions are woken when `approval::resolve` writes +scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval`. ```mermaid stateDiagram-v2 [*] --> provisioning - provisioning --> awaiting_assistant - awaiting_assistant --> assistant_streaming - assistant_streaming --> assistant_finished - assistant_finished --> function_prepare: has function calls - assistant_finished --> steering_check: no function calls - function_prepare --> function_execute - function_execute --> function_finalize: all calls resolved (allow/deny) - function_execute --> function_awaiting_approval: any call needs_approval - function_awaiting_approval --> function_awaiting_approval: decision(s) still missing - function_awaiting_approval --> function_execute: all decisions written - function_finalize --> steering_check - steering_check --> awaiting_assistant: continue - steering_check --> tearing_down: stop or max turns - tearing_down --> stopped + provisioning --> assistant_streaming + assistant_streaming --> function_execute: has function calls + assistant_streaming --> steering_check: no function calls + assistant_streaming --> stopped: error or aborted via finishSession + function_execute --> function_awaiting_approval: any call needs approval + function_execute --> steering_check: batch complete + function_execute --> stopped: all calls terminate session via finishSession + function_awaiting_approval --> function_execute: awaiting empty, batch incomplete + function_awaiting_approval --> steering_check: awaiting empty, batch complete + steering_check --> assistant_streaming: continue turn + steering_check --> stopped: stop or max turns via finishSession stopped --> [*] + failed --> [*] ``` +`failed` is a terminal state set by `runTransition` when a handler throws +unexpectedly (unless it opts into queue retry via `TransientError`). + ## Approval flow The orchestrator consults `policy::check_permissions` directly inside `consultBefore` — `allow`, `deny`, or `pending`. There is no hook fanout on -the before path. The orchestrator parks the turn in `function_awaiting_approval`, -registers a `turn::approval_resume` function per pending call, and waits until -`approval::resolve` (or abort) triggers that function, which persists the -decision and invokes `turn::step`. +the before path. The orchestrator parks the turn in `function_awaiting_approval` +when any call in the batch needs approval, then resumes as each parked call +receives `approval::resolve` (decisions may arrive independently and out of +batch order). Each `approval::resolve` persists the decision; the `turn::on_approval` +state trigger enqueues `turn::function_awaiting_approval`. + +### Parallel batch during `function_execute` + +When the assistant message contains multiple tool calls, `runBatch` does not +stop at the first `pending`. For each call in assistant tool order: + +- already in `work.executed` or listed in `awaiting_approval[]` → skip +- policy `allow` (or immediate policy `deny`) → dispatch, checkpoint, emit + `function_execution_end` +- policy `needs_approval` → emit `function_execution_start`, append the call + to `awaiting_approval[]`, **continue** remaining siblings + +After the loop: if any call is still awaiting approval, transition to +`function_awaiting_approval`; otherwise finalize the batch or re-enter +`function_execute` when the batch is incomplete but nothing is parked. + +Example batch A, B, C: A → pending, B → allow (executes immediately), C → +pending → `awaiting_approval = [A, C]`, B recorded in `work.executed`, turn +parked until A and C are resolved. + +### Durability and reload + +| Surface | Location | Role | +|---|---|---| +| Open approvals | `turn_state/` → `awaiting_approval[]` | Which calls are parked and their args | +| Decisions | `approvals//` | Written by `approval::resolve`; read on each wake | +| UI mirror | `turn_state_changed` on `agent::events` | Console shows pending modals from `TurnStateView.awaiting_approval` | +| Reload | `turn::get_state` | One-shot lean view after refresh (no direct iii state reads) | + +A page refresh does not lose pending approvals as long as iii state persists. +Operators can still approve from the console after reload; each decision write +fires `turn::on_approval` to enqueue the parked turn step while the worker is running. + +### Resume semantics + +- Decisions may arrive in any order (e.g. resolve call C before call A). +- On `allow`, the parked call executes with `skipStart: true` — the + `function_execution_start` event was already emitted when the call first + returned `pending`. +- A duplicate `approval::resolve` for the same call re-wakes the handler; + resolved entries are pruned idempotently so execution is not doubled. ```mermaid sequenceDiagram @@ -138,30 +181,33 @@ sequenceDiagram participant Gate as approval-gate participant User + Note over Turn: function_execute: runBatch walks all tool calls.
pending calls append to awaiting_approval[];
allowed siblings execute in the same pass. + Turn->>Harness: policy::check_permissions(function_id, args) [5s timeout] alt rule.action == allow Harness-->>Turn: allow → dispatch the call else rule.action == deny - Harness-->>Turn: deny + DenialEnvelope → DenialResult + Harness-->>Turn: deny + DenialEnvelope → error FunctionResult else no rule (needs_approval) - Harness-->>Turn: needs_approval → park in function_awaiting_approval - Note over Turn,Bus: Orchestrator stops re-publishing turn::step_requested.
The TurnStateRecord.awaiting_approval list pins the open calls. + Harness-->>Turn: needs_approval → append to awaiting_approval[], continue batch + Note over Turn,Bus: When the batch pass finishes with any awaiting calls,
saveRecord parks in function_awaiting_approval (no wake on park). User->>Gate: approval::resolve(decision, reason) - Gate->>Turn: trigger turn::approval_resume::/ - Turn->>Bus: state::set approvals// = {decision, reason} - Turn->>Turn: turn::step → function_awaiting_approval reads
approvals// for each pending entry - Turn->>Turn: fold decisions into prepared snapshot,
transition back to function_execute + Gate->>Bus: state::set approvals// = {decision, reason} + Gate->>Turn: enqueue turn::function_awaiting_approval + Turn->>Turn: function_awaiting_approval executes
that call immediately (skipStart), removes it from awaiting_approval[] + alt more calls still awaiting + Turn->>Turn: stay in function_awaiting_approval + else awaiting empty and batch incomplete + Turn->>Turn: transition to function_execute + else awaiting empty and batch complete + Turn->>Turn: finalizeBatch → steering_check / stopped + end end ``` Fail-closed: policy unreachable (transport error or 5 s timeout) → `consultBefore` denies the call with a `gate_unavailable` envelope. -Abort: `router::abort` writes `session//abort_signal = true` (waking -the orchestrator through its own `agent`-scope state trigger) and, if the -turn is paused on approvals, triggers each registered -`turn::approval_resume` function with `{decision: 'aborted'}`. - ## Kernel deny list [iii-permissions.yaml](iii-permissions.yaml) at the workspace root is the @@ -175,7 +221,7 @@ Deny shorthands (`!function_id` in the YAML): `approval::resolve`, `state::update`, `state::delete`, `stream::set`, `iii::durable::publish`, `auth::set_token`, `auth::delete_token`, `oauth::anthropic::login`, `oauth::openai-codex::login`, `run::start`, -`router::stream_assistant`, `router::abort`. +`router::stream_assistant`. Bare-string allow rules: `state::get`, `state::list`, `models::list`, `models::get`, `models::supports`, `auth::get_token`, @@ -230,7 +276,6 @@ flowchart TD provOAI --> turnOrch provKimi --> turnOrch provLms --> turnOrch - hook[hook-fanout] --> approval session --> compact[context-compaction] provAnth --> compact provOAI --> compact diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 17579004..6f4e6a54 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -1,8 +1,7 @@ # approval-gate Registers `approval::resolve` and shared wire schemas for the approval path. -Per-call resume functions (`turn::approval_resume::/`) live in -the turn-orchestrator. +The turn-orchestrator reacts via the reactive `turn::on_approval` state trigger. ## Purpose @@ -10,34 +9,29 @@ The approval gate is the bus entry point for human decisions on parked tool calls. It does **not** intercept function calls on the bus — the turn-orchestrator consults `policy::check_permissions` directly inside `consultBefore`. The gate's job is to accept operator input from the console -and route it to the correct per-call resume function. +and persist the decision where the orchestrator can read it. | Policy outcome (in orchestrator) | Orchestrator effect | |---|---| | `allow` | dispatch proceeds immediately | | `deny` | dispatch short-circuits with a `DenialEnvelope` | -| `needs_approval` | orchestrator parks the call in `function_awaiting_approval` and registers a resume fn | +| `needs_approval` | orchestrator parks the call in `function_awaiting_approval` | ## Resolution flow -1. While parked, the orchestrator calls `registerApprovalResume` for each - pending call (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts)). +1. While parked, the orchestrator keeps pending calls in `awaiting_approval[]` on the turn record. 2. The console calls `approval::resolve` with `{ session_id, function_call_id, decision, reason? }`. -3. `approval::resolve` triggers `turn::approval_resume::/` with the decision payload. -4. The resume handler writes `approvals//` (if not already set), invokes `turn::step`, and unregisters the resume fn. -5. `handleAwaitingApproval` reads all decisions, folds them into the prepared snapshot, and returns to `function_execute`. - -Abort uses the same resume path: `performAbortSideEffects` triggers each -registered resume fn with `{ decision: 'aborted', reason: 'session_aborted' }` -instead of calling `approval::resolve`. +3. `approval::resolve` writes `approvals//` via `state::set`. +4. The `turn::on_approval` state trigger (scope `approvals`) enqueues `turn::function_awaiting_approval`. +5. `function_awaiting_approval` executes each resolved call immediately, removes it from `awaiting_approval[]`, and stays parked until none remain; then finalizes the batch or returns to `function_execute`. ## Registered functions -- `approval::resolve` — Validates the payload and triggers the per-call resume function. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. +- `approval::resolve` — Validates the payload and persists the decision to scope `approvals`. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. -Per-call resume functions are registered by the turn-orchestrator, not this worker: +Reactive wake is owned by the turn-orchestrator: -- `turn::approval_resume::/` — Persists the decision to scope `approvals` and wakes `turn::step`. +- `turn::on_approval` — State trigger on scope `approvals`; enqueues `turn::{state}` for the parked session. ## State keys @@ -46,7 +40,7 @@ All decision records use scope `approvals` (constant `STATE_SCOPE` in | Key shape | Value | Purpose | |---|---|---| -| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by the resume handler when an operator resolves or abort fires. `handleAwaitingApproval` reads these keys while the turn is in `function_awaiting_approval`. | +| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by `approval::resolve`. `function_awaiting_approval` reads these keys while the turn is in `function_awaiting_approval`. | Pending calls are tracked on the turn record (`awaiting_approval[]`), not as separate rows under `approvals` until a decision lands. @@ -86,12 +80,12 @@ no explicit dependency block. | File | Purpose | |---|---| | [src/approval-gate/main.ts](harness/src/approval-gate/main.ts) | Binary entry point (`iii-approval-gate`). | -| [src/approval-gate/resolve.ts](harness/src/approval-gate/resolve.ts) | Registers `approval::resolve`; triggers per-call resume fns. | -| [src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts) | `STATE_SCOPE`, wire schemas, `parsePolicyReply`, `pendingKey`, `approvalResumeFnId`, `ResolvePayloadSchema`. | +| [src/approval-gate/resolve.ts](harness/src/approval-gate/resolve.ts) | Registers `approval::resolve`; persists decisions to scope `approvals`. | +| [src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts) | `STATE_SCOPE`, wire schemas, `parsePolicyReply`, `pendingKey`, `ApprovalDecisionSchema`, `ResolvePayloadSchema`. | | [src/approval-gate/denial.ts](harness/src/approval-gate/denial.ts) | `permissionsDenyEnvelope` and related helpers. | | [src/approval-gate/redact.ts](harness/src/approval-gate/redact.ts) | `redact` / `clip` for safe `args_excerpt` on denials. | | [src/approval-gate/iii.worker.yaml](harness/src/approval-gate/iii.worker.yaml) | Worker manifest. | Related orchestrator code: -[approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts), +[function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) (registers `turn::on_approval`), [hook.ts](harness/src/turn-orchestrator/hook.ts). diff --git a/harness/docs/workers/context-compaction.md b/harness/docs/workers/context-compaction.md index 5cda2227..997ca07b 100644 --- a/harness/docs/workers/context-compaction.md +++ b/harness/docs/workers/context-compaction.md @@ -1,9 +1,11 @@ # context-compaction -Out-of-band session-history compactor (v2). Watches `agent::events` for -`TurnEnd` frames and summarises older turns when the session approaches the -model's usable context limit. Also exposes a sync pre-turn path that the -turn-orchestrator calls to compact before a turn that would overflow. +Out-of-band session-history compactor (v2). Subscribes to the dedicated +`agent::turn_end` stream (mirrored by the event producer) and summarises older +turns when the session approaches the model's usable context limit — one wake +per turn instead of one per `agent::events` frame. Also exposes a sync +pre-turn path that the turn-orchestrator calls to compact before a turn that +would overflow. ## Purpose @@ -27,7 +29,8 @@ This worker is optional. Without it, sessions keep their full transcript. ### `context-compaction::on_agent_event` -Internal stream subscriber. Fires on every `agent::events` message. +Internal stream subscriber on `agent::turn_end` — fires once per turn (kept +under the historical `on_agent_event` name). **Payload** (camelCase or snake_case envelope): ``` @@ -138,8 +141,7 @@ usable = max(0, model.input_limit − COMPACT_RESERVED_TOKENS) ``` If `model.input_limit` is zero, it falls back to -`model.context_window − model.output_tokens`. `COMPACT_TRIGGER_TOKENS` (deprecated) -acts as a hard cap on the result if set, preserving old behaviour. +`model.context_window − model.output_tokens`. A session with a 200 k-token model reserves 20 k by default and triggers at 180 k. A 32 k model triggers at 12 k with the same defaults. @@ -175,7 +177,7 @@ scratch, so the summary converges rather than growing without bound. `COMPACT_PRUNE_PROTECT` goes into the prune queue. 4. If the queue would free fewer than `COMPACT_PRUNE_MIN_FREE` tokens, it skips entirely (no-op). -5. Calls `session-tree::update_part` to null out each pruned output. +5. Calls `session-tree::update_parts` to null out each pruned output (batched, one load). Tools listed in `COMPACT_PRUNE_PROTECTED_TOOLS` are never pruned. @@ -218,21 +220,22 @@ All knobs are env-driven; no `config.yaml` fields are read. | `COMPACT_TOOL_OUTPUT_MAX_CHARS` | `2000` | Per-output character cap applied before sending to the summariser. | | `COMPACT_BUSY_TIMEOUT_MS` | `30000` | Max ms `compact_now` / `compact_session` waits for the compaction lease before returning `{ status: 'busy' }`. Sized to cover a typical summariser stream (10–30s) so user-initiated `/compact` doesn't race the async TurnEnd path. | | `COMPACT_PRUNE_PROTECTED_TOOLS` | _(empty)_ | Comma-separated function IDs whose outputs are never pruned. | -| `COMPACT_TRIGGER_TOKENS` | _(deprecated)_ | If set, caps `usable()` to this value. Preserves pre-v2 behaviour. Prefer `COMPACT_RESERVED_TOKENS` instead. | The summariser provider and model are always inherited from the session's own selection. Routing goes through `turn-orchestrator/provider-router`, so adding a provider there automatically covers `/compact`. -## State keys +## State scopes -All keys live under iii state scope `agent`: +Compaction-related keys use dedicated scopes (key = `session_id`): -| Key shape | Purpose | +| Scope | Purpose | |---|---| -| `session//compaction_lease` | `{ nonce, ts }` — held for up to `LEASE_TTL_SECS = 300 s`. Acquired by writing a unique nonce and reading it back; the first writer whose nonce survives wins. | -| `session//prune_lease` | Same nonce-and-readback pattern, separate key so the prune path does not block async compaction. | -| `session//last_compaction_at` | Wall-clock ms of the most recent successful compaction. Stamped by `stampLastCompaction`. | +| `compaction_lease` | `{ nonce, ts }` — held for up to `LEASE_TTL_SECS = 300 s`. | +| `prune_lease` | Same nonce-and-readback pattern, separate scope so the prune path does not block async compaction. | +| `last_compaction_at` | Wall-clock ms of the most recent successful compaction. Stamped by `stampLastCompaction`. | + +Flat transcript rewrites use scope `messages`, key `session_id` (see [flat-state.ts](harness/src/context-compaction/flat-state.ts)). ## Observability @@ -259,7 +262,7 @@ outer `instrumentHandler` wrapper. | `session-tree::compact` | Append a Compaction entry (summary + `tail_start_id` + `tokens_before`). | | `session-tree::compactions` | Load existing Compaction entries for prior-summary anchor. | | `session-tree::append_synthetic` | Append the "Continue…" prompt after sync compaction. | -| `session-tree::update_part` | Null out pruned tool outputs in-place. | +| `session-tree::update_parts` | Null out pruned tool outputs in-place (batched). | | `models::get` | Resolve `context_window` / `max_output_tokens` for model-adaptive threshold. | Worker manifest deps (`iii.worker.yaml`): @@ -274,6 +277,8 @@ Worker manifest deps (`iii.worker.yaml`): | `src/context-compaction/config.ts` | Reads all `COMPACT_*` env vars. | | `src/context-compaction/handler-async.ts` | Async TurnEnd path: envelope decode, overflow check, lease, prune, summarise. | | `src/context-compaction/handler-sync.ts` | Sync pre-turn path: lease-with-wait, extract replay, prune, summarise, reinject. | +| `src/context-compaction/handler-pipeline.ts` | Shared prune → summarise → flat-state rewrite pipeline used by both handlers. | +| `src/context-compaction/flat-state.ts` | Rewrites scope `messages` after compaction so the next turn reads the new flat transcript. | | `src/context-compaction/model-resolver.ts` | Shared model-resolution helpers: `fetchModelLimit` (catalog lookup) and `resolveModelFromSession` (session-scan + catalog lookup). | | `src/context-compaction/prune.ts` | Tool-output pruning (`prune`). | | `src/context-compaction/summarize.ts` | `summarizeAndAppend`: load → select tail → summarise → append Compaction entry. | diff --git a/harness/docs/workers/harness.md b/harness/docs/workers/harness.md index 9245181a..a3384754 100644 --- a/harness/docs/workers/harness.md +++ b/harness/docs/workers/harness.md @@ -24,13 +24,12 @@ that drive transitions; its fan-out trigger is a passive stream subscriber. - `harness::fs::read_inline` — Read a host file via shell::fs::read, drain its channel, and return a `{content:[{text}], details:{size, truncated, bytes_read}}` envelope (max 256 KiB inline by default). - `policy::check_permissions` — Evaluate a function call against the current `iii-permissions.yaml`. Returns `{ decision: "allow" | "deny" | "needs_approval", rule_id?, matched_constraint? }`. - `harness::fanout::agent_event_handler` — Internal: `agent::events` fanout handler. -- `harness::session::is_create_event` — Internal condition function bound to the sessions state trigger; matches `state:created` writes to `session//turn_state`. -- `harness::fanout::session_created` — Internal handler invoked by the sessions state trigger; fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. +- `harness::fanout::session_created` — Internal handler invoked by the sessions state trigger; fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. Gates in-handler on the `state:created` marker. ## Triggers - **Stream subscriber** on `agent::events` → `harness::fanout::agent_event_handler`. Registered by [src/harness/fanout/agent-events.ts](harness/src/harness/fanout/agent-events.ts). -- **State trigger** on `scope: agent` gated by `condition_function_id: harness::session::is_create_event` → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). This replaced the previous 1 Hz `state::list` diff loop: new sessions now reach all-sessions subscribers reactively, on the same `turn_state` write that creates them. +- **State trigger** on `scope: turn_state` (no `condition_function_id`) → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). The handler gates on `state:created` events where key = session id — the first persist of a turn record signals session creation. (This replaced the earlier `session_index` marker scope.) The fanout handler forwards every `agent::events` frame to the per-browser endpoint `ui::session::event::` for each browser whose @@ -41,7 +40,7 @@ evicted from the in-process subscription set. ## State keys The harness reads state but doesn't own any keys. The sessions state -trigger observes `session//turn_state` writes — those entries are +trigger observes `turn_state` scope `state:created` events — those entries are owned by the orchestrator (see [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md)). @@ -84,5 +83,5 @@ From [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml): | [src/harness/policy/types.ts](harness/src/harness/policy/types.ts) | `RuleSpec`, `ConstraintSpec`, `Decision`, `MatchedConstraint` types for `iii-permissions.yaml` rules and evaluation results. | | [src/harness/fanout/index.ts](harness/src/harness/fanout/index.ts) | Spawns the two fan-out pumps. | | [src/harness/fanout/agent-events.ts](harness/src/harness/fanout/agent-events.ts) | `agent::events` stream subscriber → per-browser fan-out. | -| [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts) | State-trigger handler that detects `session//turn_state` creates and fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. (Filename kept for history; the implementation is no longer a poll loop.) | +| [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts) | State-trigger handler on scope `turn_state` that fans new session ids to every all-sessions subscriber via `ui::sessions::changed::`. | | [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml) | iii worker manifest (dependencies, install/start scripts). | diff --git a/harness/docs/workers/session.md b/harness/docs/workers/session.md index 60809967..7ac2feef 100644 --- a/harness/docs/workers/session.md +++ b/harness/docs/workers/session.md @@ -36,6 +36,10 @@ backend. - `session-tree::messages` — Load every AgentMessage on the active path of a session, paired with its entry_id, oldest first. - `session-tree::reconcile` — Mirror missing messages from a state-snapshot into session-tree. - `session-tree::list` — List sessions with optional pagination and ordering. +- `session-tree::compactions` — Return all Compaction entries for a session, sorted by timestamp ascending. +- `session-tree::append_synthetic` — Append a synthetic user-role message entry to a session (used by the context-compaction replay path). +- `session-tree::update_part` — Replace the content of a `function_result` message entry with compacted output. +- `session-tree::update_parts` — Batch variant of `update_part`; loads target entries once and rewrites all of them. ### `session-inbox::*` @@ -62,11 +66,11 @@ resumed approval replies in the correct transcript position when their ids are non-monotonic relative to wall-clock order. `session-inbox::*` (under the configured `session.state_scope`, default -`agent`): +`inbox`): -| Key shape | Value | -|---|---| -| `session//` | An append-only JSON array of opaque items. | +| Scope | Key | Value | +|---|---|---| +| `inbox` | `/` | An append-only JSON array of opaque items. | ## Configuration @@ -74,7 +78,7 @@ From the `session` section of [config.yaml](harness/config.yaml): - `store_backend` (default `iii_state`; alternative `memory`) — which `SessionStore` implementation `register()` instantiates. -- `state_scope` (default `agent`) — iii state scope used by +- `state_scope` (default `inbox`) — iii state scope used by `session-inbox::*`. Note: the tree backend uses its own hard-coded scopes (`session_tree:*`, `session_tree_meta`); only the inbox honours this setting. @@ -91,10 +95,10 @@ From [src/session/iii.worker.yaml](harness/src/session/iii.worker.yaml): | [src/session/main.ts](harness/src/session/main.ts) | Binary entry point (`iii-session`). | | [src/session/register.ts](harness/src/session/register.ts) | Picks the backend and wires both sub-surfaces. | | [src/session/config.ts](harness/src/session/config.ts) | Loads the `session` config section. | -| [src/session/tree/register.ts](harness/src/session/tree/register.ts) | Registers all 11 `session-tree::*` functions; exports `FUNCTION_IDS`. | +| [src/session/tree/register.ts](harness/src/session/tree/register.ts) | Registers all 15 `session-tree::*` functions; exports `FUNCTION_IDS`. | | [src/session/tree/operations.ts](harness/src/session/tree/operations.ts) | Pure tree algorithms: create, fork, clone, compact, active path, messages, reconcile, tree, export_html, list. | | [src/session/tree/store.ts](harness/src/session/tree/store.ts) | `SessionStore` interface + `InMemoryStore` + `IiiStateSessionStore`. | | [src/session/tree/types.ts](harness/src/session/tree/types.ts) | `SessionEntry` (`message` / `custom_message` / `branch_summary` / `compaction`, each with an explicit `timestamp`), `SessionMeta`, `TreeNode`, `ReconcileResult`, `SessionError`, plus the `entryTimestamp` helper used by the `(timestamp, id)` sort. | | [src/session/inbox/handlers.ts](harness/src/session/inbox/handlers.ts) | Registers the three `session-inbox::*` functions. | -| [src/session/inbox/key.ts](harness/src/session/inbox/key.ts) | `inboxKey(name, session_id) → "session//"`. | +| [src/session/inbox/key.ts](harness/src/session/inbox/key.ts) | `inboxKey(name, session_id) → "/"` under scope `inbox`. | | [src/session/iii.worker.yaml](harness/src/session/iii.worker.yaml) | Worker manifest. | diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 4619cbd3..5a164596 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -1,96 +1,121 @@ # turn-orchestrator Durable `run::start` state machine that drives each agent turn through -provisioning, assistant, function-execute, steering, and tearing-down. +provisioning, assistant, function-execute, steering, and session finish. ## Purpose This is the heart of the bundle. `run::start` opens a session and returns -immediately; the rest of the work happens inside the durable `turn::step` -state machine, woken once per state transition by a publish to the -`turn::step_requested` topic. The FSM provisions the sandbox, streams the -assistant turn from a provider, executes any returned function calls -through `dispatchWithHook`, emits `agent::events` for the -harness fanout, and persists everything to iii state so the run survives -restarts. - -`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) is the single -dispatcher every agent-issued tool call passes through. It runs `consultBefore` before forwarding to the target function -id. `consultBefore` triggers `policy::check_permissions` directly (5 s -timeout) and maps the reply to allow / deny / pending. Fail-closed: policy +immediately; the rest of the work happens inside per-state durable functions +(`turn::provisioning`, `turn::assistant_streaming`, …), each enqueued onto +the `turn-step` FIFO queue inline from `saveRecord`. +Saving the record with a new non-terminal, non-parking state automatically +enqueues the next handler (`saveRecord` in +[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) calls `shouldWakeStep` then enqueues on the `turn-step` FIFO). + +Every per-state handler is wrapped by `runTransition` +([run-transition.ts](harness/src/turn-orchestrator/run-transition.ts)): +load record → null-check → stale-skip → handle → save. This owns the +crash-isolation contract: + +- An unexpected handler throw routes the session to the `failed` terminal + (acked so the durable queue stops retrying) and surfaces `message_complete{stop_reason:'error'}` + `agent_end` to the UI. +- A handler may throw `TransientError` + ([errors.ts](harness/src/turn-orchestrator/errors.ts)) to opt into the + queue's retry/backoff/DLQ instead of the terminal path. + +`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) +is the single chokepoint every agent-issued function call passes through. +It runs `consultBefore` before forwarding to the target function id. +`consultBefore` triggers `policy::check_permissions` directly (5 s timeout) +and maps the reply to `allow` / `deny` / `pending`. Fail-closed: policy unreachable → deny with a `gate_unavailable` `DenialEnvelope`. ## Registered functions -- `run::start` — Start a durable agent session and return immediately. -- `turn::step` — Run one durable state machine transition for a session. -- `turn::get_state` — Read the current `TurnStateRecord` for a session (or null for unknown sessions). UI clients use this on reload to recover any in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. -- `turn::is_abort_signal_set` — Condition function bound to the agent-scope state trigger; matches `state:created`/`state:updated` writes that set `session//abort_signal` to `true`. -- `turn::on_abort_signal` — State trigger adapter: publishes `turn::step_requested` when the abort signal is set so the FSM advances on the next safe boundary. -- `turn::is_stepable_record_write` — Condition function bound to the record-written state trigger; matches `turn_state` writes whose `new_value.state` is non-terminal and non-parking (i.e. excludes `stopped` and `function_awaiting_approval`). -- `turn::on_record_written` — State trigger adapter: directly triggers `turn::step` for the affected session, so saving the record is itself the wake-up event. -- `turn::is_turn_state_write` — Condition function bound to the turn-state-changed trigger; matches every `state:created` / `state:updated` write to `session//turn_state` regardless of FSM state. -- `turn::on_turn_state_changed` — State trigger adapter: emits a `turn_state_changed` agent event carrying the full new (and prior) `TurnStateRecord` so the UI can derive pending approvals from state. +- `run::start` — Persist run config and messages, seed `turn_state` to + `provisioning`, and wake the FSM via `saveRecord`. +- `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. +- `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `stopped` (via `finishSession`). +- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call (skip already-executed and awaiting-approval ids), checkpoint per-call via `writeRecord`; if `pending` → append to `awaiting_approval` and keep dispatching the remaining calls (pending does not block siblings); park to `function_awaiting_approval` when any call awaits approval; finalize results into messages + emit `turn_end` when the batch completes → `steering_check` / `stopped` (via `finishSession`). +- `turn::function_awaiting_approval` — FSM step: on each wake, read decisions for individual `awaiting_approval[]` entries; execute each resolved call immediately (`allow` → dispatch pre-approved; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. +- `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `stopped` via `finishSession`), route to `assistant_streaming` / `stopped`. +- `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. ## Triggers -- **Durable subscriber** on `turn::step_requested` → `turn::step`. Registered in [src/turn-orchestrator/subscriber.ts](harness/src/turn-orchestrator/subscriber.ts). Each `step` loads the `TurnStateRecord`, runs one transition, saves it back, and re-publishes `turn::step_requested` unless the run is terminal **or** paused on approvals (`function_awaiting_approval`). Paused turns are woken when `approval::resolve` or abort triggers a per-call `turn::approval_resume` function (see [workers/approval-gate.md](workers/approval-gate.md)). -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_abort_signal_set` → `turn::on_abort_signal`. Registered in [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts). Publishes `turn::step_requested` the moment `session//abort_signal` is set to `true`, so the FSM advances to `steering_check` (and observes the abort) on the next safe boundary without waiting for the current step to time out. -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_stepable_record_write` → `turn::on_record_written`. Registered in [src/turn-orchestrator/on-record-written.ts](harness/src/turn-orchestrator/on-record-written.ts). Directly triggers `turn::step` for the affected session on every non-terminal, non-parking `session//turn_state` write. Replaces the imperative `publishStep` self-publish — saving the record is now the wake. -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_turn_state_write` → `turn::on_turn_state_changed`. Registered in [src/turn-orchestrator/on-turn-state-changed.ts](harness/src/turn-orchestrator/on-turn-state-changed.ts). Fires on every `session//turn_state` write (created or updated) and emits a `turn_state_changed` event to `agent::events` carrying the full new (and prior) record so the UI can derive pending approvals from state rather than from a signal event. +The record-written wake is inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state enqueues `turn::{newState}` on the `turn-step` FIFO. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. + +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes scope `approvals`, which fires `turn::on_approval` (registered in [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts); see [workers/approval-gate.md](workers/approval-gate.md)). ## Turn FSM -The full FSM, transitions, and dispatch table lives in -[src/turn-orchestrator/transitions.ts](harness/src/turn-orchestrator/transitions.ts). -The 11 states from -[src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts): +Each state is a registered `turn::{state}` function executed via +`runTransition` and enqueued onto the `turn-step` FIFO queue from `saveRecord` when `shouldWakeStep` allows. +The 7 states from [state.ts](harness/src/turn-orchestrator/state.ts): -| State | Handler | Role | +| State | Handler file | Role | |---|---|---| -| `provisioning` | [states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | Boot the sandbox, prime the system prompt, fetch function schemas. | -| `awaiting_assistant` | [states/assistant.ts](harness/src/turn-orchestrator/states/assistant.ts) | Request an assistant turn via `provider::::stream`. | -| `assistant_streaming` | same | Drain the provider channel; relay `message_update` (token/thinking deltas) on `agent::events`. Tool args appear at `function_execution_start` when execute runs — no `turn_start` or streaming `function_execution_update` events. | -| `assistant_finished` | same | Persist the final `AssistantMessage`; pick next state. | -| `function_prepare` | [states/functions.ts](harness/src/turn-orchestrator/states/functions.ts) | Snapshot the pending function calls. | -| `function_execute` | same | Run each call via `dispatchWithHook` (pre-approved resume calls use `triggerFunctionCall` and skip the gate). If the gate returns `pending`, append the call to `awaiting_approval` and transition to `function_awaiting_approval` (the rest of the batch is left for the resumed step). Each call is bracketed by a `function_execution_start` / `function_execution_end` pair; the `end` event carries `duration_ms` (wall-clock between the matching start and end), persisted on `ExecutedEntry` so resumed runs replay the original timing instead of the ~0ms it takes to re-emit. Approval wait time is naturally excluded — pending calls return without an end emit, and the resumed step re-emits a fresh start that resets the timer. | -| `function_awaiting_approval` | same (`handleAwaitingApproval`) | Read `approvals//` for every entry in `awaiting_approval`. While any decision is still missing, return without stepping (the next `turn::approval_resume` invoke will wake `turn::step`). When all decisions are present, fold them into the prepared snapshot — `allow` → `pre_approved: true`, `deny`/`aborted` → `blocked` with a denial result — clear `awaiting_approval`, and transition back to `function_execute`. | -| `function_finalize` | same | Persist results; emit `function_call_end` + `turn_end` events. | -| `steering_check` | [states/steering.ts](harness/src/turn-orchestrator/states/steering.ts) | Decide whether to continue, stop, or hit `max_turns`. | -| `tearing_down` | [states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | Emit `agent_end` once, free the sandbox if any. | -| `stopped` | (no-op) | Terminal. Idempotent. | +| `provisioning` | [provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | Fetch skills index + default-skill bodies, build system prompt, write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | +| `assistant_streaming` | [assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistantTurn` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `stopped` via `finishSession` (error/aborted). | +| `function_execute` | [function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed or awaiting approval, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval` and continue other calls; park to `function_awaiting_approval` when any call awaits; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `stopped` via `finishSession`. | +| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | On each wake: for each `awaiting_approval[]` entry with a decision, execute immediately (`allow` → pre-approved dispatch; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. | +| `steering_check` | [steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | Priority route: steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped` via `finishSession`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | +| `stopped` | (no handler) | Terminal. Idempotent. Session teardown (`agent_end`) happens inline via `TurnStatePorts.finishSession` before entering this state. | +| `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | + +`NON_STEPABLE_STATES` in [store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) are +`stopped`, `failed`, and `function_awaiting_approval` — `saveRecord` does not +enqueue a handler for these. + +`dispatchWithHook` returns `{ kind: 'result', result }` or `{ kind: 'pending' }`. +Policy denies are returned as `{ kind: 'result' }` with a denied `FunctionResult`. +`pending` triggers the `function_awaiting_approval` park. Multiple calls may +await approval concurrently; each is executed individually as its decision +arrives. + +## State scopes + +Session-scoped iii state uses semantic scopes from +[state.ts](harness/src/turn-orchestrator/state.ts) with +`session_id` as the key. I/O goes through +[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) (`TurnStore`). + +| Scope | Key | Purpose | +|---|---|---| +| `turn_state` | `` | Serialised `TurnStateRecord` (incl. `work?: TurnWork` and `error?: {kind, message}`). | +| `messages` | `` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save (inline in `TurnStore.saveMessages` / `appendMessages`). | +| `run_request` | `` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | +| `session_tree_mirror_len` | `` | High-water mark so the session-tree messages mirror is incremental. | +| `event_counter` | `` | Monotonic counter for `agent::events` sequence numbers. | -`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) -now returns one of three shapes: `{ kind: 'result' }`, `{ kind: 'deny' }`, -or `{ kind: 'pending' }`. Pending is what triggers the -`function_awaiting_approval` park. +Keys that no longer exist: `function_prepared`, `function_executed`, +`function_schemas` (standalone), `tool_prepared`, `tool_executed`, +`tool_schemas`, `sandbox_id`, `last_compaction_at`, +`last_compaction_consumed_at` — these were removed in the rewrite. -## State keys +The `TurnStateRecord` carries `work?: TurnWork` (inline `{ prepared: PreparedCall[]; executed: Record }`) in place of the former separate state keys. `PreparedCall`, `ExecutedCall`, and `TurnWork` are defined in [function-execute/types.ts](harness/src/turn-orchestrator/function-execute/types.ts). -All keys live under iii state scope `agent`. From -[src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts): +## UI events -| Key shape | Purpose | -|---|---| -| `session//turn_state` | Serialised `TurnStateRecord`. | -| `session//messages` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save. | -| `session//run_request` | The original `run::start` payload (provider, model, system_prompt, mode, image, idle_timeout_secs). | -| `session//sandbox_id` | Active sandbox handle. | -| `session//function_schemas` | Cached tool schemas exposed to the model. | -| `session//tool_schemas` | Legacy alias of `function_schemas`. | -| `session//session_tree_mirror_len` | High-water mark so the messages mirror is incremental. | -| `session//last_compaction_at` | Last entry id the compactor wrote. | -| `session//last_compaction_consumed_at` | Last compaction the loader applied. | -| `session//event_counter` | Monotonic counter for `agent::events` sequence numbers. | -| `session//abort_signal` | Set by `router::abort` to interrupt a streaming turn. | -| `session//function_prepared` | Snapshot of pending function calls for the current turn. Each entry carries `pre_approved` / `blocked` flags so resumed approvals can short-circuit re-dispatch. | -| `session//function_executed` | Results of the current turn's function calls. | -| `session//tool_prepared`, `session//tool_executed` | Legacy aliases of the two above. | - -The `TurnStateRecord` also carries an optional `awaiting_approval: -AwaitingApprovalEntry[]` field — populated when `function_execute` is -parked, drained when `function_awaiting_approval` folds the resolved -decisions back into the prepared snapshot. +`turn_state_changed` is emitted inline by `TurnStore.saveRecord` on every +persist that goes through the full save path. It carries a lean +`TurnStateView` (not the full `TurnStateRecord`) as `new_value` (and +`old_value` when updating). `TurnStateView` is defined in +[schemas.ts](harness/src/turn-orchestrator/schemas.ts) and contains: +`session_id`, `state`, `turn_count`, `max_turns`, `awaiting_approval`, `error`. + +`turn::get_state` also returns a `TurnStateView` (via `toView`), not the full +record, so heavy internal fields (`work`, `last_assistant`) are never sent to +consumers. + +## Approval chokepoint + +Unchanged from prior design: `dispatchWithHook` → `consultBefore` → +`policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` +reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the +session to `function_awaiting_approval`. `approval::resolve` writes the +decision to scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval` on the `turn-step` queue. ## Configuration @@ -98,14 +123,14 @@ From the top-level `turn-orchestrator` section of [config.yaml](harness/config.yaml): - `system_default_skills` (default `["iii://iii-directory/index"]`) — - skills the bootstrap step downloads into the session's system prompt + skill URIs the bootstrap step downloads into the session's system prompt context. ## Dependencies From [src/turn-orchestrator/iii.worker.yaml](harness/src/turn-orchestrator/iii.worker.yaml): -`session ^0.2.0`, `hook-fanout ^0.2.0`, `provider-anthropic ^0.2.0`, +`session ^0.2.0`, `provider-anthropic ^0.2.0`, `provider-openai ^0.2.0`. ## Source layout @@ -113,24 +138,28 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes `run::start`, per-state `turn::{state}` handlers, abort-signal trigger, and kicks off the bootstrap. | -| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state`, and wakes the FSM via the record-written state trigger. | -| [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader that returns the current `TurnStateRecord` for a session. UI clients call this on reload to recover in-progress modals; the orchestrator owns the state schema/key layout so clients never read iii state directly. | -| [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | The dispatcher chokepoint; `dispatchWithHook` runs `consultBefore` before triggering the function and returns `result` / `deny` / `pending`. | -| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — calls `policy::check_permissions` directly (5 s timeout) and maps the reply via `parsePolicyReply` (`approval-gate/schemas.ts`) to `allow` / `pending` / `deny`; fails closed with a `gate_unavailable` envelope. `publishAfter` still routes through `hook-fanout::publish_collect` for the after-hook fanout path. | -| [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration, handler (persist + `turn::step`), and startup recovery for parked sessions. | -| [src/turn-orchestrator/abort.ts](harness/src/turn-orchestrator/abort.ts) | `performAbortSideEffects` — writes `session//abort_signal = true` and, for turns paused on approvals, triggers each `turn::approval_resume` fn with `{decision: 'aborted'}`. | -| [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts) | State trigger adapter — `turn::is_abort_signal_set` (condition) + `turn::on_abort_signal` (handler) — publishes `turn::step_requested` whenever `session//abort_signal` is set to `true`. | -| [src/turn-orchestrator/subscriber.ts](harness/src/turn-orchestrator/subscriber.ts) | `turn::step` durable subscriber. Skips the auto re-publish of `turn::step_requested` while the record is in `function_awaiting_approval` (per-call resume fns own that wake). | -| [src/turn-orchestrator/transitions.ts](harness/src/turn-orchestrator/transitions.ts) | State → handler dispatch table. | -| [src/turn-orchestrator/states/*.ts](harness/src/turn-orchestrator/states/) | One file per FSM state; `states/functions.ts` owns `function_prepare`, `function_execute`, `function_awaiting_approval`, and `function_finalize`. | -| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord` (now with `awaiting_approval?: AwaitingApprovalEntry[]`), state-key helpers. | -| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers + the `session-tree::*` messages mirror. `PreparedEntry` now carries `pre_approved` so resumed turns can dispatch the call without re-asking the gate. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, `turn::on_approval`, `turn::get_state`. | +| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | +| [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | +| [src/turn-orchestrator/state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) | `TurnStore` / `createTurnStore` — agent-scope load/save, `shouldWakeStep`, inline FIFO enqueue from `saveRecord`. | +| [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of scope `run_request` (includes `function_schemas`). | +| [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | +| [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | +| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. | +| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` FSM step + `turn::on_approval` state trigger on scope `approvals`. | +| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | +| [src/turn-orchestrator/state-runtime/ports.ts](harness/src/turn-orchestrator/state-runtime/ports.ts) | `TurnStatePorts` / `createTurnStatePorts` — shared dependency ports for per-state handlers (incl. `finishSession`). | +| [src/turn-orchestrator/provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | `turn::provisioning` handler and provisioning pipeline. | +| [src/turn-orchestrator/assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | `turn::assistant_streaming` handler and stream orchestration. | +| [src/turn-orchestrator/function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | `turn::function_execute` handler. | +| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` handler. | +| [src/turn-orchestrator/steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | `turn::steering_check` handler. | +| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`. | +| [src/turn-orchestrator/errors.ts](harness/src/turn-orchestrator/errors.ts) | `TransientError` (opt into queue retry), `ContextOverflowError`, `CompactionBusyError`. | | [src/turn-orchestrator/events.ts](harness/src/turn-orchestrator/events.ts) | `emit(iii, sid, event)` — appends a sequenced `AgentEvent` to the `agent::events` stream. | -| [src/turn-orchestrator/on-record-written.ts](harness/src/turn-orchestrator/on-record-written.ts) | State-trigger adapter — `turn::is_stepable_record_write` (condition) + `turn::on_record_written` (handler) — directly triggers `turn::step` on every non-terminal, non-parking `turn_state` write. Replaces the imperative `publishStep` self-publish so saving the record is itself the wake. | -| [src/turn-orchestrator/on-turn-state-changed.ts](harness/src/turn-orchestrator/on-turn-state-changed.ts) | State-trigger adapter — `turn::is_turn_state_write` (condition) + `turn::on_turn_state_changed` (handler) — emits `turn_state_changed` to `agent::events` on every `turn_state` write (created or updated). Carries the full new (and prior) `TurnStateRecord` so the console can derive pending approvals from state rather than from a signal event. | -| [src/turn-orchestrator/provider-router.ts](harness/src/turn-orchestrator/provider-router.ts) | Picks `provider::::stream` for the run's `provider` field. | -| [src/turn-orchestrator/system-prompt.ts](harness/src/turn-orchestrator/system-prompt.ts) | Builds the system prompt from `run_request.system_prompt` + bootstrap skills. | -| [src/turn-orchestrator/bootstrap.ts](harness/src/turn-orchestrator/bootstrap.ts) | Best-effort skill download via `directory::skills::download`. | +| [src/turn-orchestrator/preflight.ts](harness/src/turn-orchestrator/preflight.ts) | `runPreflight` — context-compaction check before each provider call. | +| [src/turn-orchestrator/provider-router.ts](harness/src/turn-orchestrator/provider-router.ts) | `decide` + `targetFunctionId` — pick `provider::::stream` for the run's `provider` field. | +| [src/turn-orchestrator/system-prompt.ts](harness/src/turn-orchestrator/system-prompt.ts) | `buildSystemPrompt` — assembles system prompt from request, bootstrap skills, skills index. | +| [src/turn-orchestrator/bootstrap.ts](harness/src/turn-orchestrator/bootstrap.ts) | Best-effort skill download via `directory::skills::download` at startup. | | [src/turn-orchestrator/config.ts](harness/src/turn-orchestrator/config.ts) | Loads the worker's config slice. | | [src/turn-orchestrator/iii.worker.yaml](harness/src/turn-orchestrator/iii.worker.yaml) | Worker manifest. | diff --git a/harness/src/approval-gate/iii.worker.yaml b/harness/src/approval-gate/iii.worker.yaml index 384ebc64..0e54c698 100644 --- a/harness/src/approval-gate/iii.worker.yaml +++ b/harness/src/approval-gate/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-approval-gate -description: Registers approval::resolve; routes decisions to per-call turn::approval_resume functions owned by the turn-orchestrator. +description: Registers approval::resolve; persists human decisions to the approvals scope (turn-orchestrator reacts via turn::on_approval). runtime: kind: node diff --git a/harness/src/approval-gate/resolve.ts b/harness/src/approval-gate/resolve.ts index 1827d42a..fb4f3a8b 100644 --- a/harness/src/approval-gate/resolve.ts +++ b/harness/src/approval-gate/resolve.ts @@ -1,14 +1,16 @@ /** - * Approval resolution handler. `approval::resolve` routes the decision to - * the per-call resume function owned by the turn-orchestrator. + * Approval resolution handler. `approval::resolve` persists the decision to the + * shared `approvals` scope; the turn-orchestrator's reactive trigger + * (turn::on_approval) wakes the parked session. */ import type { ISdk } from 'iii-sdk'; import { logger } from '../runtime/otel.js'; import { + STATE_SCOPE, type ResolvePayloadInput, ResolvePayloadSchema, - approvalResumeFnId, + pendingKey, resolveFunctionOptions, } from './schemas.js'; @@ -24,15 +26,18 @@ export async function handleResolveRequest( if (!parsed.success) return { ok: false, error: 'invalid_payload' }; const { session_id, function_call_id, decision, reason } = parsed.data; - const resumeFnId = approvalResumeFnId(session_id, function_call_id); try { await iii.trigger({ - function_id: resumeFnId, - payload: { decision, reason }, + function_id: 'state::set', + payload: { + scope: STATE_SCOPE, + key: pendingKey(session_id, function_call_id), + value: { decision, reason }, + }, }); } catch (err) { - logger.error('approval-gate: resume fn invoke failed', { err: String(err), resumeFnId }); + logger.error('approval-gate: decision write failed', { err: String(err), session_id }); return { ok: false, error: 'resume_failed' }; } return { ok: true }; diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index d3025e6a..13baebe1 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -36,52 +36,38 @@ const denialEnvelopeSchema = z.object({ export type DenialEnvelope = z.infer; /** - * Wire payload for `approval::resolve`. Accepts `function_call_id` or the - * legacy `tool_call_id` alias; output always has `function_call_id` set. - * Rejects "/" in either id at the boundary — it is the reserved separator in - * the state key, so a slashed id is refused here rather than thrown on later. + * Wire payload for `approval::resolve`. Rejects "/" in ids at the boundary — + * it is the reserved separator in the state key. */ export const ResolvePayloadSchema = z .object({ session_id: z.string().min(1), - function_call_id: z.string().min(1).optional(), - tool_call_id: z.string().min(1).optional(), + function_call_id: z.string().min(1), decision: wireDecisionSchema, reason: z.string().nullable().optional(), }) - .transform((v, ctx) => { - const fnId = v.function_call_id ?? v.tool_call_id; - if (!fnId) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['function_call_id'], - message: 'function_call_id or tool_call_id is required', - }); - return z.NEVER; - } + .superRefine((v, ctx) => { if (v.session_id.includes('/')) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['session_id'], message: 'session_id must not contain "/"', }); - return z.NEVER; } - if (fnId.includes('/')) { + if (v.function_call_id.includes('/')) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['function_call_id'], message: 'function_call_id must not contain "/"', }); - return z.NEVER; } - return { - session_id: v.session_id, - function_call_id: fnId, - decision: v.decision, - reason: v.reason ?? null, - }; - }); + }) + .transform((v) => ({ + session_id: v.session_id, + function_call_id: v.function_call_id, + decision: v.decision, + reason: v.reason ?? null, + })); export type ResolvePayloadInput = z.input; const policyReplySchema = z.discriminatedUnion('decision', [ @@ -116,17 +102,16 @@ export function pendingKey(session_id: string, function_call_id: string): string const approvalDecisionSchema = z.enum(['allow', 'deny', 'aborted']); -export const ApprovalResumePayloadSchema = z.object({ +export const ApprovalDecisionSchema = z.object({ decision: approvalDecisionSchema, reason: z.string().nullable(), }); -export function approvalResumeFnId(session_id: string, function_call_id: string): string { - return `turn::approval_resume::${pendingKey(session_id, function_call_id)}`; -} +/** @deprecated Use ApprovalDecisionSchema */ +export const ApprovalResumePayloadSchema = ApprovalDecisionSchema; export const resolveFunctionOptions = { description: - 'Flip an approval to allow or deny. Invokes the per-call resume function to persist and wake the turn.', + 'Flip an approval to allow or deny. Persists the decision to the approvals scope to wake the parked turn.', request_format: zodToJsonSchema(ResolvePayloadSchema, { name: 'ResolvePayload' }), } as RegisterFunctionOptions; diff --git a/harness/src/context-compaction/config.ts b/harness/src/context-compaction/config.ts index 19989cf4..b6241dfa 100644 --- a/harness/src/context-compaction/config.ts +++ b/harness/src/context-compaction/config.ts @@ -9,6 +9,20 @@ const DEFAULT_TOOL_OUTPUT_MAX_CHARS = 2_000; // `busy` to users when async compaction is mid-flight. const DEFAULT_BUSY_TIMEOUT_MS = 30_000; +export const MIN_PRESERVE_RECENT_TOKENS = DEFAULT_MIN_PRESERVE_RECENT_TOKENS; +export const MAX_PRESERVE_RECENT_TOKENS = DEFAULT_MAX_PRESERVE_RECENT_TOKENS; + +export type CompactionConfig = Readonly<{ + reservedTokens: number; + tailTurns: number; + preserveRecentTokensOverride: number | undefined; + pruneProtect: number; + pruneMinFree: number; + toolOutputMaxChars: number; + busyTimeoutMs: number; + pruneProtectedTools: string[]; +}>; + function intEnv(name: string, def: number): number { const v = process.env[name]; if (!v) return def; @@ -16,41 +30,14 @@ function intEnv(name: string, def: number): number { return Number.isFinite(n) && n > 0 ? n : def; } -export function reservedTokens(): number { - return intEnv('COMPACT_RESERVED_TOKENS', DEFAULT_RESERVED_TOKENS); -} - -export function tailTurns(): number { - return intEnv('COMPACT_TAIL_TURNS', DEFAULT_TAIL_TURNS); -} - -export function preserveRecentTokensOverride(): number | undefined { +function readPreserveRecentTokensOverride(): number | undefined { const v = process.env.COMPACT_PRESERVE_RECENT_TOKENS; if (!v) return undefined; const n = Number.parseInt(v, 10); return Number.isFinite(n) && n > 0 ? n : undefined; } -export const MIN_PRESERVE_RECENT_TOKENS = DEFAULT_MIN_PRESERVE_RECENT_TOKENS; -export const MAX_PRESERVE_RECENT_TOKENS = DEFAULT_MAX_PRESERVE_RECENT_TOKENS; - -export function pruneProtect(): number { - return intEnv('COMPACT_PRUNE_PROTECT', DEFAULT_PRUNE_PROTECT); -} - -export function pruneMinFree(): number { - return intEnv('COMPACT_PRUNE_MIN_FREE', DEFAULT_PRUNE_MIN_FREE); -} - -export function toolOutputMaxChars(): number { - return intEnv('COMPACT_TOOL_OUTPUT_MAX_CHARS', DEFAULT_TOOL_OUTPUT_MAX_CHARS); -} - -export function busyTimeoutMs(): number { - return intEnv('COMPACT_BUSY_TIMEOUT_MS', DEFAULT_BUSY_TIMEOUT_MS); -} - -export function pruneProtectedTools(): string[] { +function readPruneProtectedTools(): string[] { const v = process.env.COMPACT_PRUNE_PROTECTED_TOOLS; if (!v) return []; return v @@ -59,19 +46,15 @@ export function pruneProtectedTools(): string[] { .filter(Boolean); } -// Deprecated. Hard upper bound on usable() to keep existing deployments -// from regressing. One-shot warning on first read. -let deprecatedTriggerTokensWarned = false; -export function deprecatedTriggerTokensCap(): number | undefined { - const v = process.env.COMPACT_TRIGGER_TOKENS; - if (!v) return undefined; - if (!deprecatedTriggerTokensWarned) { - deprecatedTriggerTokensWarned = true; - // eslint-disable-next-line no-console - console.warn( - '[context-compaction] COMPACT_TRIGGER_TOKENS is deprecated; use COMPACT_RESERVED_TOKENS. Treating as hard cap on usable().', - ); - } - const n = Number.parseInt(v, 10); - return Number.isFinite(n) && n > 0 ? n : undefined; +export function compactionConfig(): CompactionConfig { + return { + reservedTokens: intEnv('COMPACT_RESERVED_TOKENS', DEFAULT_RESERVED_TOKENS), + tailTurns: intEnv('COMPACT_TAIL_TURNS', DEFAULT_TAIL_TURNS), + preserveRecentTokensOverride: readPreserveRecentTokensOverride(), + pruneProtect: intEnv('COMPACT_PRUNE_PROTECT', DEFAULT_PRUNE_PROTECT), + pruneMinFree: intEnv('COMPACT_PRUNE_MIN_FREE', DEFAULT_PRUNE_MIN_FREE), + toolOutputMaxChars: intEnv('COMPACT_TOOL_OUTPUT_MAX_CHARS', DEFAULT_TOOL_OUTPUT_MAX_CHARS), + busyTimeoutMs: intEnv('COMPACT_BUSY_TIMEOUT_MS', DEFAULT_BUSY_TIMEOUT_MS), + pruneProtectedTools: readPruneProtectedTools(), + }; } diff --git a/harness/src/context-compaction/emit.ts b/harness/src/context-compaction/emit.ts deleted file mode 100644 index 88d1ddaf..00000000 --- a/harness/src/context-compaction/emit.ts +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Shared helper for emitting `compaction_done` after sync/async - * handlers finish rewriting flat-state. Pre-extraction this exact - * try/catch + payload block lived byte-for-byte in both handlers; the - * helper keeps the two handlers in sync and gives the failure log a - * stable code for monitoring. - */ -import { logger } from '../runtime/otel.js'; -import type { ISdk } from '../runtime/iii.js'; -import { emit } from '../turn-orchestrator/events.js'; - -export type CompactionMode = 'sync' | 'async'; - -export interface CompactionDonePayload { - summary_text: string; - tokens_before: number; - compaction_entry_id: string; - /** First entry_id of the preserved tail; null when nothing was kept. */ - tail_start_id: string | null; -} - -/** - * Best-effort: a publish failure is logged but never thrown — the - * caller has already done the load-bearing work (rewriting flat - * state) and the UI marker is a nice-to-have. - */ -export async function emitCompactionDone( - iii: ISdk, - session_id: string, - mode: CompactionMode, - payload: CompactionDonePayload, -): Promise { - try { - await emit(iii, session_id, { - type: 'compaction_done', - mode, - summary_text: payload.summary_text, - tokens_before: payload.tokens_before, - compaction_entry_id: payload.compaction_entry_id, - tail_start_id: payload.tail_start_id, - }); - } catch (err) { - logger.warn(`handler-${mode}: compaction_done emit failed`, { - code: 'compaction_done_emit_failed', - session_id, - err: String(err), - }); - } -} diff --git a/harness/src/context-compaction/flat-state.ts b/harness/src/context-compaction/flat-state.ts index d6f5b5da..a0295dfe 100644 --- a/harness/src/context-compaction/flat-state.ts +++ b/harness/src/context-compaction/flat-state.ts @@ -1,20 +1,12 @@ /** - * Keep flatMessagesKey in sync with turn-orchestrator/state.ts::messagesKey. - * Importing it directly would create a package-layer cycle (orchestrator - * depends on context-compaction via preflight). A drift-guard test asserts - * the two stay identical. + * Rewrite flat transcript messages in scope `messages`. */ import type { ISdk } from '../runtime/iii.js'; import { stateSet } from '../runtime/state.js'; +import { MESSAGES_SCOPE } from '../turn-orchestrator/state.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; -const FLAT_STATE_SCOPE = 'agent'; - -export function flatMessagesKey(session_id: string): string { - return `session/${session_id}/messages`; -} - export function buildSummaryMessage(summary_text: string): AssistantMessage { return { role: 'assistant', @@ -39,5 +31,5 @@ export async function rewriteFlatMessages( session_id: string, messages: AgentMessage[], ): Promise { - await stateSet(iii, FLAT_STATE_SCOPE, flatMessagesKey(session_id), messages); + await stateSet(iii, MESSAGES_SCOPE, session_id, messages); } diff --git a/harness/src/context-compaction/handler-async.ts b/harness/src/context-compaction/handler-async.ts index 2504ff52..b025aff2 100644 --- a/harness/src/context-compaction/handler-async.ts +++ b/harness/src/context-compaction/handler-async.ts @@ -7,14 +7,16 @@ import { setCurrentSpanAttribute, withSpan } from 'iii-sdk/telemetry'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { emitCompactionDone } from './emit.js'; -import { pruneMinFree, pruneProtect, pruneProtectedTools, reservedTokens } from './config.js'; -import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import { compactionConfig } from './config.js'; +import { + isSummarizeOk, + persistCompactionFlatState, + publishCompactionDone, + runSummarizeCompaction, +} from './handler-pipeline.js'; import { acquireLease, releaseLease } from './lease.js'; import { fetchModelLimit } from './model-resolver.js'; -import { type ModelLimit, isOverflow } from './overflow.js'; -import { prune } from './prune.js'; -import { summarizeAndAppend } from './summarize.js'; +import { isOverflow } from './overflow.js'; export function extractEventPayload( payload: unknown, @@ -53,10 +55,9 @@ export function turnEndUsage(event: unknown): Record | null { type ResolvedModel = { providerID: string; modelID: string; - modelLimit: ModelLimit; + modelLimit: { context: number; input: number; output: number }; } | null; -// Priority: event.message → last assistant in session-tree → models::get. async function resolveModelFromEvent( iii: ISdk, session_id: string, @@ -144,7 +145,7 @@ export async function handleAsync(iii: ISdk, frame: unknown): Promise { !isOverflow({ tokens: usageObj, model: { id: model.modelID, limit: model.modelLimit }, - reserved: reservedTokens(), + reserved: compactionConfig().reservedTokens, }) ) { return; @@ -159,38 +160,22 @@ export async function handleAsync(iii: ISdk, frame: unknown): Promise { } try { - await prune(iii, payload.session_id, { - protectTokens: pruneProtect(), - minFree: pruneMinFree(), - protectedTools: pruneProtectedTools(), - }); - const result = await summarizeAndAppend( + const result = await runSummarizeCompaction( iii, payload.session_id, { mode: 'async' }, - { - providerID: model.providerID, - modelID: model.modelID, - modelLimit: model.modelLimit, - }, + model, ); - const succeeded = result !== 'empty' && result.kind === 'ok'; - setCurrentSpanAttribute('used_prior_summary', succeeded); - if (succeeded) { - await rewriteFlatMessages(iii, payload.session_id, [ - buildSummaryMessage(result.summary_text), - ...result.tail_messages, - ]); - // Tell the UI we just compacted so it can insert a marker and - // re-estimate context usage. Best-effort: a publish failure must - // not leak out of the background handler. - await emitCompactionDone(iii, payload.session_id, 'async', { - summary_text: result.summary_text, - tokens_before: result.tokens_before, - compaction_entry_id: result.compaction_entry_id, - tail_start_id: result.tail_start_id, - }); + setCurrentSpanAttribute('used_prior_summary', isSummarizeOk(result)); + if (isSummarizeOk(result)) { + await persistCompactionFlatState( + iii, + payload.session_id, + result.summary_text, + result.tail_messages, + ); + await publishCompactionDone(iii, payload.session_id, 'async', result); } } catch (err) { logger.warn('handler-async: compaction failed', { diff --git a/harness/src/context-compaction/handler-pipeline.ts b/harness/src/context-compaction/handler-pipeline.ts new file mode 100644 index 00000000..7613d7e3 --- /dev/null +++ b/harness/src/context-compaction/handler-pipeline.ts @@ -0,0 +1,116 @@ +/** + * Shared prune → summarize → flat-state rewrite path for sync and async handlers. + */ + +import { logger } from '../runtime/otel.js'; +import type { ISdk } from '../runtime/iii.js'; +import { emit } from '../turn-orchestrator/events.js'; +import type { AgentMessage } from '../types/agent-message.js'; +import { compactionConfig } from './config.js'; +import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import type { ModelLimit } from './overflow.js'; +import { prune } from './prune.js'; +import { + type SummarizeOk, + type SummarizeOptions, + type SummarizeOutcome, + summarizeAndAppend, +} from './summarize.js'; + +export type CompactionMode = 'sync' | 'async'; + +export interface CompactionDonePayload { + summary_text: string; + tokens_before: number; + compaction_entry_id: string; + /** First entry_id of the preserved tail; null when nothing was kept. */ + tail_start_id: string | null; +} + +/** + * Best-effort: a publish failure is logged but never thrown — the + * caller has already done the load-bearing work (rewriting flat + * state) and the UI marker is a nice-to-have. + */ +async function emitCompactionDone( + iii: ISdk, + session_id: string, + mode: CompactionMode, + payload: CompactionDonePayload, +): Promise { + try { + await emit(iii, session_id, { + type: 'compaction_done', + mode, + summary_text: payload.summary_text, + tokens_before: payload.tokens_before, + compaction_entry_id: payload.compaction_entry_id, + tail_start_id: payload.tail_start_id, + }); + } catch (err) { + logger.warn(`handler-${mode}: compaction_done emit failed`, { + code: 'compaction_done_emit_failed', + session_id, + err: String(err), + }); + } +} + +export type CompactionModel = { + providerID: string; + modelID: string; + modelLimit: ModelLimit; +}; + +export async function pruneSessionToolOutputs(iii: ISdk, session_id: string): Promise { + const cfg = compactionConfig(); + await prune(iii, session_id, { + protectTokens: cfg.pruneProtect, + minFree: cfg.pruneMinFree, + protectedTools: cfg.pruneProtectedTools, + }); +} + +export async function runSummarizeCompaction( + iii: ISdk, + session_id: string, + options: SummarizeOptions, + model: CompactionModel, +): Promise { + await pruneSessionToolOutputs(iii, session_id); + return summarizeAndAppend(iii, session_id, options, { + providerID: model.providerID, + modelID: model.modelID, + modelLimit: model.modelLimit, + }); +} + +export async function persistCompactionFlatState( + iii: ISdk, + session_id: string, + summary_text: string, + tail_messages: AgentMessage[], + extra?: AgentMessage[], +): Promise { + const messages: AgentMessage[] = [buildSummaryMessage(summary_text), ...tail_messages]; + if (extra) messages.push(...extra); + await rewriteFlatMessages(iii, session_id, messages); +} + +export async function publishCompactionDone( + iii: ISdk, + session_id: string, + mode: CompactionMode, + result: SummarizeOk, +): Promise { + await emitCompactionDone(iii, session_id, mode, { + summary_text: result.summary_text, + tokens_before: result.tokens_before, + compaction_entry_id: result.compaction_entry_id, + tail_start_id: result.tail_start_id, + }); +} + +export function isSummarizeOk(result: SummarizeOutcome): result is SummarizeOk { + return result !== 'empty' && result.kind === 'ok'; +} diff --git a/harness/src/context-compaction/handler-sync.ts b/harness/src/context-compaction/handler-sync.ts index fe23abdd..fce870f3 100644 --- a/harness/src/context-compaction/handler-sync.ts +++ b/harness/src/context-compaction/handler-sync.ts @@ -8,15 +8,16 @@ import { setCurrentSpanAttribute, withSpan } from 'iii-sdk/telemetry'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { emitCompactionDone } from './emit.js'; import type { AgentMessage } from '../types/agent-message.js'; -import { busyTimeoutMs, pruneMinFree, pruneProtect, pruneProtectedTools } from './config.js'; -import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import { compactionConfig } from './config.js'; +import { + persistCompactionFlatState, + publishCompactionDone, + runSummarizeCompaction, +} from './handler-pipeline.js'; import { acquireLeaseWithWait, releaseLease } from './lease.js'; import type { ModelLimit } from './overflow.js'; -import { prune } from './prune.js'; import { type MessageWithEntryId, extractReplayTarget, reinjectReplay } from './replay.js'; -import { summarizeAndAppend } from './summarize.js'; export type CompactNowInput = { session_id: string; @@ -43,14 +44,18 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise } @@ -73,14 +78,7 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise replay user msg -> synthetic continue. let lastEntryId = result.compaction_entry_id || null; if (replay) { lastEntryId = await reinjectReplay(iii, input.session_id, replay, lastEntryId); @@ -119,26 +114,15 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise).ts; - if (typeof ts === 'number') return Math.floor(ts / 1000); - } + if (!v || typeof v !== 'object') return 0; + const ts = (v as Record).ts; + if (typeof ts === 'number') return Math.floor(ts / 1000); return 0; } @@ -44,19 +44,20 @@ export async function acquireLease( session_id: string, kind: LeaseKind = 'compaction', ): Promise { - const key = leaseKey(session_id, kind); + const scope = leaseScope(kind); + const key = session_id; const now_ms = Date.now(); const now_secs = Math.floor(now_ms / 1000); // Fast path: skip the atomic set when a valid lease is clearly held. - const existing = await stateGet(iii, STATE_SCOPE, key); + const existing = await stateGet(iii, scope, key); if (existing && isLeaseActive(existing, now_secs)) return null; const nonce = mintLeaseNonce(); // path: '' targets FieldPath::root in the engine — set the whole value // atomically. Without `path`, the engine fails to deserialize the op and // stateUpdate falls into its catch + returns null. - const result = await stateUpdate(iii, STATE_SCOPE, key, [ + const result = await stateUpdate(iii, scope, key, [ { type: 'set', path: '', value: { nonce, ts: now_ms } }, ]); // stateUpdate swallows backend errors and returns null. Treat a null @@ -70,7 +71,7 @@ export async function acquireLease( // and bow out. stateUpdate is atomic, so only one caller can see // old_value == null (or expired) — exactly one winner. if (oldValue && isLeaseActive(oldValue, now_secs)) { - await stateSet(iii, STATE_SCOPE, key, oldValue); + await stateSet(iii, scope, key, oldValue); return null; } return nonce; @@ -87,8 +88,9 @@ export async function releaseLease( ourNonce: string, kind: LeaseKind = 'compaction', ): Promise { - const key = leaseKey(session_id, kind); - const stored = await stateGet(iii, STATE_SCOPE, key); + const scope = leaseScope(kind); + const key = session_id; + const stored = await stateGet(iii, scope, key); const storedNonce = stored && typeof stored === 'object' && @@ -96,12 +98,12 @@ export async function releaseLease( ? ((stored as Record).nonce as string) : null; if (storedNonce === ourNonce) { - await stateSet(iii, STATE_SCOPE, key, null); + await stateSet(iii, scope, key, null); } } export async function stampLastCompaction(iii: ISdk, session_id: string): Promise { - await stateSet(iii, STATE_SCOPE, `session/${session_id}/last_compaction_at`, Date.now()); + await stateSet(iii, LAST_COMPACTION_AT_SCOPE, session_id, Date.now()); } export async function acquireLeaseWithWait( diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index ea012bfc..996f1e2b 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -1,3 +1,4 @@ +import { RUN_REQUEST_SCOPE } from '../turn-orchestrator/state.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { ModelLimit } from './overflow.js'; @@ -88,7 +89,7 @@ export async function resolveModelFromSession( // Fallback when no assistant message carries provider/model yet (first-turn // sessions, error-only sessions). The orchestrator writes run_request at -// agent::session//run_request during run::start. +// `run_request` scope during run::start. export async function resolveModelFromRunRequest( iii: ISdk, session_id: string, @@ -96,7 +97,7 @@ export async function resolveModelFromRunRequest( try { const req = await iii.trigger({ function_id: 'state::get', - payload: { scope: 'agent', key: `session/${session_id}/run_request` }, + payload: { scope: RUN_REQUEST_SCOPE, key: session_id }, timeoutMs: 5_000, }); const providerID = typeof req?.provider === 'string' && req.provider ? req.provider : null; diff --git a/harness/src/context-compaction/overflow.ts b/harness/src/context-compaction/overflow.ts index 0d3e88ab..dfc9cca0 100644 --- a/harness/src/context-compaction/overflow.ts +++ b/harness/src/context-compaction/overflow.ts @@ -1,9 +1,7 @@ import { MAX_PRESERVE_RECENT_TOKENS, MIN_PRESERVE_RECENT_TOKENS, - deprecatedTriggerTokensCap, - preserveRecentTokensOverride, - reservedTokens, + compactionConfig, } from './config.js'; export type ModelLimit = { @@ -28,13 +26,12 @@ export type TokensLike = { export function usable(input: { model: ModelLike; reserved?: number }): number { const { model } = input; if (model.limit.context === 0) return 0; - const reserved = input.reserved ?? reservedTokens(); + const reserved = input.reserved ?? compactionConfig().reservedTokens; const base = model.limit.input > 0 ? Math.max(0, model.limit.input - reserved) : Math.max(0, model.limit.context - model.limit.output); - const cap = deprecatedTriggerTokensCap(); - return cap !== undefined ? Math.min(base, cap) : base; + return base; } export function isOverflow(input: { @@ -60,7 +57,7 @@ export function preserveRecentBudget(input: { reserved?: number; override?: number; }): number { - const ovr = input.override ?? preserveRecentTokensOverride(); + const ovr = input.override ?? compactionConfig().preserveRecentTokensOverride; if (ovr !== undefined) return ovr; const u = usable({ model: input.model, reserved: input.reserved }); return Math.min( diff --git a/harness/src/context-compaction/register.ts b/harness/src/context-compaction/register.ts index b103a00e..9d407cf6 100644 --- a/harness/src/context-compaction/register.ts +++ b/harness/src/context-compaction/register.ts @@ -1,6 +1,6 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { pruneMinFree, pruneProtect, pruneProtectedTools } from './config.js'; +import { compactionConfig } from './config.js'; import { handleAsync } from './handler-async.js'; import { type CompactNowInput, handleSync } from './handler-sync.js'; import { acquireLease, releaseLease } from './lease.js'; @@ -11,7 +11,10 @@ import { } from './model-resolver.js'; import { prune } from './prune.js'; -const AGENT_EVENTS_STREAM = 'agent::events'; +// Compaction only acts on turn_end, so it subscribes to the dedicated +// turn_end stream (mirrored by the producer) rather than the full +// agent::events firehose — one wake per turn instead of per event. +const TURN_END_STREAM = 'agent::turn_end'; // Sized so preserveRecentBudget clamps to its 2k minimum when the real // model is unknown — compaction is best-effort, not fatal. @@ -61,7 +64,7 @@ export async function register(iii: ISdk): Promise { }, { description: - 'Internal: subscribes to agent::events; triggers async compaction on TurnEnd when running tokens exceed usable(model).', + 'Internal: subscribes to agent::turn_end; triggers async compaction on TurnEnd when running tokens exceed usable(model).', }, ); @@ -115,10 +118,11 @@ export async function register(iii: ISdk): Promise { const nonce = await acquireLease(iii, session_id, 'prune'); if (!nonce) return { pruned_tokens: 0, pruned_parts: 0, scanned_parts: 0, busy: true }; try { + const cfg = compactionConfig(); return await prune(iii, session_id, { - protectTokens: pruneProtect(), - minFree: pruneMinFree(), - protectedTools: pruneProtectedTools(), + protectTokens: cfg.pruneProtect, + minFree: cfg.pruneMinFree, + protectedTools: cfg.pruneProtectedTools, }); } finally { await releaseLease(iii, session_id, nonce, 'prune'); @@ -183,6 +187,6 @@ export async function register(iii: ISdk): Promise { iii.registerTrigger({ type: 'stream', function_id: 'context-compaction::on_agent_event', - config: { stream_name: AGENT_EVENTS_STREAM }, + config: { stream_name: TURN_END_STREAM }, }); } diff --git a/harness/src/context-compaction/summarize.ts b/harness/src/context-compaction/summarize.ts index 2b634405..9e61a563 100644 --- a/harness/src/context-compaction/summarize.ts +++ b/harness/src/context-compaction/summarize.ts @@ -2,7 +2,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { decide, targetFunctionId } from '../turn-orchestrator/provider-router.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; -import { preserveRecentTokensOverride, tailTurns, toolOutputMaxChars } from './config.js'; +import { compactionConfig } from './config.js'; import { stampLastCompaction } from './lease.js'; import { type ModelLimit, preserveRecentBudget } from './overflow.js'; import { @@ -139,15 +139,16 @@ export async function summarizeAndAppend( const prior = completedCompactions(await loadCompactionEntries(iii, session_id)); const previousSummary = prior.at(-1)?.summary; + const cfg = compactionConfig(); const budget = preserveRecentBudget({ model: { id: model.modelID, limit: model.modelLimit }, - override: preserveRecentTokensOverride(), + override: cfg.preserveRecentTokensOverride, }); const sel = selectWithEntryIds({ entries, budget, - tailTurns: tailTurns(), + tailTurns: cfg.tailTurns, estimate: estimateTokenCount, }); if (sel.head.length === 0) { @@ -157,7 +158,7 @@ export async function summarizeAndAppend( const head_messages = sel.head.map((e) => e.message); const tail_messages: AgentMessage[] = entries.slice(sel.head.length).map((e) => e.message); const tokens_before = estimateTokenCount(head_messages); - const stripped = stripMedia(head_messages, { toolOutputMaxChars: toolOutputMaxChars() }); + const stripped = stripMedia(head_messages, { toolOutputMaxChars: cfg.toolOutputMaxChars }); const systemPrompt = buildPrompt({ previousSummary, context: [] }); const userPrompt = renderUserPrompt(stripped); diff --git a/harness/src/harness/fanout/sessions-poll.ts b/harness/src/harness/fanout/sessions-poll.ts index 94218d10..2355c68c 100644 --- a/harness/src/harness/fanout/sessions-poll.ts +++ b/harness/src/harness/fanout/sessions-poll.ts @@ -1,38 +1,28 @@ import type { ISdk, Trigger } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; +import { TURN_STATE_SCOPE } from '../../turn-orchestrator/state.js'; import type { FanoutState } from '../ui-subscribe.js'; export const SESSION_CREATED_HANDLER_FN_ID = 'harness::fanout::session_created'; -export const SESSION_CREATE_CONDITION_FN_ID = 'harness::session::is_create_event'; -const SESSION_RECORD_KEY_RE = /^session\/[^/]+\/turn_state$/; -function extractSessionId(key: string): string | null { - const m = SESSION_RECORD_KEY_RE.exec(key); - if (!m) return null; - return key.slice('session/'.length, key.length - '/turn_state'.length); +/** + * A new session is signalled by the first `state:created` write on scope + * `turn_state` (key = session id). The state trigger matches that scope in + * engine — no `condition_function_id` RPC per turn_state update — so this + * handler is the sole gate: it acts only on `state:created`. + */ +function sessionCreatedId(event: unknown): string | null { + const obj = (event ?? {}) as Record; + if (obj.event_type !== 'state:created') return null; + const key = typeof obj.key === 'string' ? obj.key : ''; + return key.length > 0 ? key : null; } export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { - const conditionRef = iii.registerFunction( - SESSION_CREATE_CONDITION_FN_ID, - async (event: unknown) => { - const obj = (event ?? {}) as Record; - const event_type = typeof obj.event_type === 'string' ? obj.event_type : null; - const key = typeof obj.key === 'string' ? obj.key : null; - return event_type === 'state:created' && !!key && SESSION_RECORD_KEY_RE.test(key); - }, - { - description: - 'Condition: state event is a new session record (event_type=state:created, scope=agent, key=session//turn_state).', - }, - ); - const handlerRef = iii.registerFunction( SESSION_CREATED_HANDLER_FN_ID, async (event: unknown) => { - const obj = (event ?? {}) as Record; - const key = typeof obj.key === 'string' ? obj.key : ''; - const session_id = extractSessionId(key); + const session_id = sessionCreatedId(event); if (!session_id) return null; const payload = { added: [session_id], removed: [] as string[] }; for (const browser_id of state.allSubscribers()) { @@ -48,7 +38,7 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { }, { description: - 'Internal: fans out a single newly-created session id to ui::sessions::changed::.', + 'Internal: fans out a newly-created session id to ui::sessions::changed::.', }, ); @@ -57,10 +47,7 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { trigger = iii.registerTrigger({ type: 'state', function_id: SESSION_CREATED_HANDLER_FN_ID, - config: { - scope: 'agent', - condition_function_id: SESSION_CREATE_CONDITION_FN_ID, - }, + config: { scope: TURN_STATE_SCOPE }, }); } catch (err) { logger.warn('sessions state trigger registration failed', { err: String(err) }); @@ -73,8 +60,5 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { try { handlerRef.unregister(); } catch {} - try { - conditionRef.unregister(); - } catch {} }; } diff --git a/harness/src/index.ts b/harness/src/index.ts index 78e7f15b..fed2b395 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -43,13 +43,13 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, steering, tearing-down.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', register: (iii, ctx) => registerTurnOrchestrator(iii, ctx), }, { name: 'approval-gate', description: - 'Registers approval::resolve; routes human decisions to per-call turn::approval_resume functions owned by the turn-orchestrator.', + 'Registers approval::resolve; persists human decisions to the approvals scope and enqueues turn::function_awaiting_approval.', register: (iii) => registerApprovalGate(iii), }, { diff --git a/harness/src/llm-budget/store.ts b/harness/src/llm-budget/store.ts index 4206f4d6..52b442d5 100644 --- a/harness/src/llm-budget/store.ts +++ b/harness/src/llm-budget/store.ts @@ -3,6 +3,7 @@ */ import type { ISdk } from '../runtime/iii.js'; +import { createState } from '../runtime/state.js'; import { type Budget, SCOPE, @@ -12,71 +13,45 @@ import { spendLogKey, } from './types.js'; -async function stateSet(iii: ISdk, key: string, value: unknown): Promise { - await iii.trigger({ - function_id: 'state::set', - payload: { scope: SCOPE, key, value }, - }); +function strictState(iii: ISdk) { + return createState(iii, { tolerant: false }); } -async function stateGetValue(iii: ISdk, key: string): Promise { - const resp = await iii.trigger({ - function_id: 'state::get', - payload: { scope: SCOPE, key }, - }); - if (resp === null || resp === undefined) return null; - if (resp && typeof resp === 'object' && 'value' in (resp as Record)) { - const v = (resp as Record).value; - return v === null || v === undefined ? null : v; - } - return resp; +function isBudget(v: unknown): v is Budget { + return ( + v !== null && + typeof v === 'object' && + typeof (v as Budget).id === 'string' && + typeof (v as Budget).ceiling_usd === 'number' + ); } -async function stateList(iii: ISdk, prefix: string): Promise { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: { scope: SCOPE, prefix }, - }); - if (Array.isArray(resp)) return resp; - if (resp && typeof resp === 'object') { - const items = (resp as Record).items; - if (Array.isArray(items)) return items; - } - return []; -} - -async function stateDelete(iii: ISdk, key: string): Promise { - await iii.trigger({ - function_id: 'state::delete', - payload: { scope: SCOPE, key }, - }); +function isSpendLogEntry(v: unknown, budget_id: string): v is SpendLogEntry { + return ( + v !== null && + typeof v === 'object' && + (v as SpendLogEntry).budget_id === budget_id && + !('ceiling_usd' in (v as Record)) + ); } export async function loadBudget(iii: ISdk, id: string): Promise { - const v = await stateGetValue(iii, budgetKey(id)); + const v = await strictState(iii).get({ scope: SCOPE, key: budgetKey(id) }); if (v === null) return null; - return v as Budget; + return isBudget(v) ? v : null; } export async function saveBudget(iii: ISdk, b: Budget): Promise { - await stateSet(iii, budgetKey(b.id), b); + await strictState(iii).set({ scope: SCOPE, key: budgetKey(b.id), value: b }); } export async function deleteBudgetRecord(iii: ISdk, id: string): Promise { - await stateDelete(iii, budgetKey(id)); + await strictState(iii).delete({ scope: SCOPE, key: budgetKey(id) }); } export async function listAllBudgets(iii: ISdk): Promise { - const items = await stateList(iii, 'budget:'); - const out: Budget[] = []; - for (const v of items) { - const inner = - v && typeof v === 'object' && 'value' in (v as Record) - ? (v as Record).value - : v; - if (inner && typeof inner === 'object' && (inner as Budget).id) out.push(inner as Budget); - } - return out; + const items = await strictState(iii).list({ scope: SCOPE }); + return items.filter(isBudget); } export async function saveSpendLog( @@ -85,7 +60,7 @@ export async function saveSpendLog( period_start: number, e: SpendLogEntry, ): Promise { - await stateSet(iii, spendLogKey(id, period_start), e); + await strictState(iii).set({ scope: SCOPE, key: spendLogKey(id, period_start), value: e }); } export async function saveResetLog( @@ -96,20 +71,14 @@ export async function saveResetLog( suffix: string, e: SpendLogEntry, ): Promise { - await stateSet(iii, resetLogKey(id, period_start, ts, suffix), e); + await strictState(iii).set({ + scope: SCOPE, + key: resetLogKey(id, period_start, ts, suffix), + value: e, + }); } export async function listSpendLogs(iii: ISdk, budget_id: string): Promise { - const items = await stateList(iii, `spend_log:${budget_id}:`); - const out: SpendLogEntry[] = []; - for (const v of items) { - const inner = - v && typeof v === 'object' && 'value' in (v as Record) - ? (v as Record).value - : v; - if (inner && typeof inner === 'object' && (inner as SpendLogEntry).budget_id === budget_id) { - out.push(inner as SpendLogEntry); - } - } - return out; + const items = await strictState(iii).list({ scope: SCOPE }); + return items.filter((v): v is SpendLogEntry => isSpendLogEntry(v, budget_id)); } diff --git a/harness/src/models-catalog/state.ts b/harness/src/models-catalog/state.ts index dee5e543..4e5644ab 100644 --- a/harness/src/models-catalog/state.ts +++ b/harness/src/models-catalog/state.ts @@ -6,7 +6,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { stateGet, stateList, stateSet } from '../runtime/state.js'; +import { stateGet, stateListValues, stateSet } from '../runtime/state.js'; import { type ListFilter, loadEmbeddedCatalog } from './catalog.js'; import { MODELS_KEY_PREFIX, MODELS_SCOPE, type Model, supportsModel } from './types.js'; @@ -22,7 +22,7 @@ export function modelKey(provider: string, id: string): string { export async function seedStateIfEmpty(iii: ISdk, _cfg: StateConfig): Promise { try { - const items = await stateList(iii, MODELS_SCOPE, MODELS_KEY_PREFIX); + const items = await stateListValues(iii, { scope: MODELS_SCOPE }); if (items.length > 0) return; const catalog = await loadEmbeddedCatalog(); for (const m of catalog) { @@ -37,10 +37,9 @@ export async function seedStateIfEmpty(iii: ISdk, _cfg: StateConfig): Promise { - const items = await stateList(iii, MODELS_SCOPE, MODELS_KEY_PREFIX); - const fromState = items - .map((v) => v as Model | null) - .filter((m): m is Model => Boolean(m && typeof m === 'object' && m.id)); + const fromState = (await stateListValues(iii, { scope: MODELS_SCOPE })).filter( + (m): m is Model => Boolean(m && typeof m === 'object' && m.id), + ); const source = fromState.length > 0 ? fromState : await loadEmbeddedCatalog(); return source .filter((m) => filter.provider === undefined || m.provider === filter.provider) diff --git a/harness/src/runtime/state.ts b/harness/src/runtime/state.ts index 6a47279d..01762249 100644 --- a/harness/src/runtime/state.ts +++ b/harness/src/runtime/state.ts @@ -1,43 +1,55 @@ /** - * Tiny `state::*` wrappers. Mirrors - * `turn-orchestrator/src/persistence.rs::state_get` / `state_set`. + * `state::*` client aligned with `iii-sdk/state` (`IState`). * - * All helpers are tolerant: trigger errors degrade to `null` / `[]` and - * are logged at warn level so a single failed read never aborts a turn. + * Tolerant helpers (default) mirror turn-orchestrator persistence: trigger + * errors degrade to `null` / `[]` and are logged at warn level so a single + * failed read never aborts a turn. Use `createState(iii, { tolerant: false })` + * when storage errors should propagate (session store, llm-budget). */ import type { ISdk } from 'iii-sdk'; -import type { StateListInput } from 'iii-sdk/state'; +import type { UpdateOp } from 'iii-sdk/stream'; +import type { + DeleteResult, + IState, + StateDeleteInput, + StateGetInput, + StateListInput, + StateSetInput, + StateSetResult, + StateUpdateInput, + StateUpdateResult, +} from 'iii-sdk/state'; import { logger } from './otel.js'; -export type { StateListInput } from 'iii-sdk/state'; - -// Mirrors engine `UpdateOp` (sdk/packages/rust/iii/src/types.rs). Variants -// that target a JSON field (`set`, `increment`, `decrement`, `remove`) take -// a required `path` string — `""` (FieldPath::root) means "the whole value". -// `merge`/`append` accept an optional MergePath (string or array of strings). -export type StateUpdateOp = - | { type: 'set'; path: string; value: unknown } - | { type: 'merge'; path?: string | string[]; value: Record } - | { type: 'append'; path?: string | string[]; value: unknown } - | { type: 'increment'; path: string; by: number } - | { type: 'decrement'; path: string; by: number } - | { type: 'remove'; path: string } - | { type: string; [k: string]: unknown }; - -/** One row from a keyed `state::list` envelope (`{ items: [...] }`), when present. */ -export type StateListKeyedEntry = { - key?: string; - value?: unknown; +export type { UpdateOp } from 'iii-sdk/stream'; +export type { + DeleteResult, + IState, + StateDeleteInput, + StateGetInput, + StateListInput, + StateSetInput, + StateSetResult, + StateUpdateInput, + StateUpdateResult, +} from 'iii-sdk/state'; + +export type CreateStateOptions = { + /** When true (default), log and return null/[] on trigger failure. */ + tolerant?: boolean; }; +type StateListGroupsResult = { groups: string[] }; + +function normalizeGetResult(v: unknown): T | null { + if (v === null || v === undefined) return null; + return v as T; +} + /** Raw list rows before value unwrap; `null` when the response is not a list. */ export function stateListResponseRows(response: unknown): unknown[] | null { if (Array.isArray(response)) return response; - if (response && typeof response === 'object') { - const items = (response as Record).items; - if (Array.isArray(items)) return items; - } return null; } @@ -51,9 +63,8 @@ function unwrapStateListEntry(entry: unknown): T { /** * Normalizes a `state::list` trigger result to stored values. * - * Official iii returns a flat `T[]` ({@link StateListInput} only). Some - * deployments also wrap rows as `{ value }` or `{ items: [{ key, value }] }`; - * we accept those shapes so harness workers stay compatible. + * Official iii returns a flat `T[]`. Some bridge deployments wrap rows as + * `{ value }`; we accept that shape for compatibility. */ export function parseStateListValues(response: unknown): T[] { const arr = stateListResponseRows(response); @@ -61,106 +72,148 @@ export function parseStateListValues(response: unknown): T[] { return arr.map((entry) => unwrapStateListEntry(entry)); } -/** Keyed rows when the list response includes `key` (not returned by stock iii). */ -export function parseStateListKeyedEntries(response: unknown): StateListKeyedEntry[] { - const arr = stateListResponseRows(response); - if (!arr) return []; - return arr.map((entry) => { - if (entry && typeof entry === 'object') { - const row = entry as Record; - return { - key: typeof row.key === 'string' ? row.key : undefined, - value: row.value !== undefined ? row.value : entry, - }; +export function createState(iii: ISdk, opts: CreateStateOptions = {}): IState { + const tolerant = opts.tolerant !== false; + + async function run( + op: string, + context: Record, + fn: () => Promise, + fallback: T, + ): Promise { + try { + return await fn(); + } catch (err) { + if (tolerant) { + logger.warn(`${op} failed`, { ...context, err: String(err) }); + return fallback; + } + throw err; } - return { value: entry }; - }); + } + + return { + get: (input: StateGetInput): Promise => + run( + 'state::get', + { scope: input.scope, key: input.key }, + async () => { + const v = await iii.trigger({ + function_id: 'state::get', + payload: input, + }); + return normalizeGetResult(v); + }, + null, + ), + + set: (input: StateSetInput): Promise | null> => + run( + 'state::set', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger>({ + function_id: 'state::set', + payload: input, + }); + return result ?? null; + }, + null, + ), + + delete: (input: StateDeleteInput): Promise => + run( + 'state::delete', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger({ + function_id: 'state::delete', + payload: input, + }); + return result ?? {}; + }, + {}, + ), + + list: (input: StateListInput): Promise => + run( + 'state::list', + { scope: input.scope }, + async () => { + const resp = await iii.trigger({ + function_id: 'state::list', + payload: input, + }); + return parseStateListValues(resp); + }, + [], + ), + + update: (input: StateUpdateInput): Promise | null> => + run( + 'state::update', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger>({ + function_id: 'state::update', + payload: input, + }); + return result ?? null; + }, + null, + ), + }; } -export async function stateGet(iii: ISdk, scope: string, key: string): Promise { +/** Lists all scope names that contain state data. */ +export async function stateListGroups(iii: ISdk, opts: CreateStateOptions = {}): Promise { + const tolerant = opts.tolerant !== false; try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope, key }, + const result = await iii.trigger, StateListGroupsResult | string[]>({ + function_id: 'state::list_groups', + payload: {}, }); - if (v === null || v === undefined) return null; - return v; + if (Array.isArray(result)) return result; + return result?.groups ?? []; } catch (err) { - logger.warn('state::get failed', { scope, key, err: String(err) }); - return null; + if (tolerant) { + logger.warn('state::list_groups failed', { err: String(err) }); + return []; + } + throw err; } } +// --- Tolerant (scope, key) ergonomics for turn-orchestrator --- + +const tolerantState = (iii: ISdk) => createState(iii, { tolerant: true }); + +export async function stateGet(iii: ISdk, scope: string, key: string): Promise { + return tolerantState(iii).get({ scope, key }); +} + export async function stateSet( iii: ISdk, scope: string, key: string, value: unknown, -): Promise { - try { - await iii.trigger({ - function_id: 'state::set', - payload: { scope, key, value }, - }); - } catch (err) { - logger.warn('state::set failed', { scope, key, err: String(err) }); - } +): Promise | null> { + return tolerantState(iii).set({ scope, key, value }); } export async function stateDelete(iii: ISdk, scope: string, key: string): Promise { - try { - await iii.trigger({ - function_id: 'state::delete', - payload: { scope, key }, - }); - } catch (err) { - logger.warn('state::delete failed', { scope, key, err: String(err) }); - } + await tolerantState(iii).delete({ scope, key }); } -/** - * Lists all values in a scope using the iii SDK contract (`StateListInput`). - */ export async function stateListValues(iii: ISdk, input: StateListInput): Promise { - try { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: input, - }); - return parseStateListValues(resp); - } catch (err) { - logger.warn('state::list failed', { scope: input.scope, err: String(err) }); - return []; - } -} - -/** - * @deprecated Third argument `prefix` is not sent to iii (engine lists the - * whole scope). Kept for call-site stability; filter returned values locally - * if you need key-prefix semantics. - */ -export async function stateList(iii: ISdk, scope: string, _prefix?: string): Promise { - return stateListValues(iii, { scope }); + return tolerantState(iii).list(input); } -/** - * `state::update` applies one or more atomic ops and returns the - * `{ old_value, new_value }` envelope. - */ export async function stateUpdate( iii: ISdk, scope: string, key: string, - ops: StateUpdateOp[], -): Promise<{ old_value?: unknown; new_value?: unknown } | null> { - try { - const v = await iii.trigger({ - function_id: 'state::update', - payload: { scope, key, ops }, - }); - return v ?? null; - } catch (err) { - logger.warn('state::update failed', { scope, key, err: String(err) }); - return null; - } + ops: UpdateOp[], +): Promise | null> { + return tolerantState(iii).update({ scope, key, ops }); } diff --git a/harness/src/session/config.ts b/harness/src/session/config.ts index 21506dde..10592c7a 100644 --- a/harness/src/session/config.ts +++ b/harness/src/session/config.ts @@ -10,6 +10,6 @@ export function loadSessionConfig(cfg: Record): SessionConfig { const backend = getString(section, 'store_backend', 'iii_state'); return { store_backend: backend === 'memory' ? 'memory' : 'iii_state', - state_scope: getString(section, 'state_scope', 'agent'), + state_scope: getString(section, 'state_scope', 'inbox'), }; } diff --git a/harness/src/session/inbox/key.ts b/harness/src/session/inbox/key.ts index 1ba41883..cd1ea580 100644 --- a/harness/src/session/inbox/key.ts +++ b/harness/src/session/inbox/key.ts @@ -1,3 +1,3 @@ export function inboxKey(name: string, session_id: string): string { - return `session/${session_id}/${name}`; + return `${session_id}/${name}`; } diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index 72e3b7e2..d6c2cf44 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -14,7 +14,7 @@ import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; -import { parseStateListValues, stateListResponseRows } from '../../runtime/state.js'; +import { createState } from '../../runtime/state.js'; import { type SessionEntry, SessionError, type SessionMeta, entryTimestamp } from './types.js'; export interface SessionStore { @@ -76,14 +76,15 @@ function entriesScope(session_id: string): string { } export class IiiStateSessionStore implements SessionStore { - constructor(private readonly iii: ISdk) {} + private readonly state; + + constructor(iii: ISdk) { + this.state = createState(iii, { tolerant: false }); + } async create(meta: SessionMeta): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { scope: META_SCOPE, key: meta.session_id, value: meta }, - }); + await this.state.set({ scope: META_SCOPE, key: meta.session_id, value: meta }); } catch (e) { throw new SessionError('storage', `state::set meta: ${String(e)}`); } @@ -91,13 +92,10 @@ export class IiiStateSessionStore implements SessionStore { async append(session_id: string, entry: SessionEntry): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { - scope: entriesScope(session_id), - key: entry.id, - value: entry, - }, + await this.state.set({ + scope: entriesScope(session_id), + key: entry.id, + value: entry, }); } catch (e) { throw new SessionError('storage', `state::set entry: ${String(e)}`); @@ -114,19 +112,12 @@ export class IiiStateSessionStore implements SessionStore { } async loadEntries(session_id: string): Promise { - let resp: unknown; + let entries: SessionEntry[]; try { - resp = await this.iii.trigger({ - function_id: 'state::list', - payload: { scope: entriesScope(session_id) }, - }); + entries = await this.state.list({ scope: entriesScope(session_id) }); } catch (e) { throw new SessionError('storage', `state::list entries: ${String(e)}`); } - if (!stateListResponseRows(resp)) { - throw new SessionError('storage', 'state::list returned non-array'); - } - const entries = parseStateListValues(resp); // PR #150: sort by (timestamp, id) so resumed approval replies that // arrive after the session paused appear in correct transcript order // even when their entry ids are non-monotonic. @@ -139,46 +130,32 @@ export class IiiStateSessionStore implements SessionStore { } async loadMeta(session_id: string): Promise { - let resp: unknown; + let resp: SessionMeta | null; try { - resp = await this.iii.trigger({ - function_id: 'state::get', - payload: { scope: META_SCOPE, key: session_id }, - }); + resp = await this.state.get({ scope: META_SCOPE, key: session_id }); } catch (e) { throw new SessionError('storage', `state::get meta: ${String(e)}`); } - if (resp === null || resp === undefined) { + if (resp === null) { throw new SessionError('not_found', session_id); } - return resp as SessionMeta; + return resp; } async list(): Promise { - let resp: unknown; try { - resp = await this.iii.trigger({ - function_id: 'state::list', - payload: { scope: META_SCOPE }, - }); + return await this.state.list({ scope: META_SCOPE }); } catch (e) { throw new SessionError('storage', `state::list meta: ${String(e)}`); } - if (!stateListResponseRows(resp)) { - throw new SessionError('storage', 'state::list returned non-array'); - } - return parseStateListValues(resp); } async updateEntry(session_id: string, entry_id: string, updated: SessionEntry): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { - scope: entriesScope(session_id), - key: entry_id, - value: updated, - }, + await this.state.set({ + scope: entriesScope(session_id), + key: entry_id, + value: updated, }); } catch (e) { throw new SessionError('storage', `state::set updateEntry: ${String(e)}`); @@ -195,16 +172,9 @@ export class IiiStateSessionStore implements SessionStore { } private async refreshMetaUpdatedAt(session_id: string): Promise { - const value = await this.iii.trigger({ - function_id: 'state::get', - payload: { scope: META_SCOPE, key: session_id }, - }); - if (value === null || value === undefined) return; - const meta = value as SessionMeta; - meta.updated_at = Date.now(); - await this.iii.trigger({ - function_id: 'state::set', - payload: { scope: META_SCOPE, key: session_id, value: meta }, - }); + const value = await this.state.get({ scope: META_SCOPE, key: session_id }); + if (value === null) return; + const meta = { ...value, updated_at: Date.now() }; + await this.state.set({ scope: META_SCOPE, key: session_id, value: meta }); } } diff --git a/harness/src/turn-orchestrator/abort.ts b/harness/src/turn-orchestrator/abort.ts deleted file mode 100644 index 45b56281..00000000 --- a/harness/src/turn-orchestrator/abort.ts +++ /dev/null @@ -1,40 +0,0 @@ -/** - * `router::abort` side-effects. The abort path writes the per-session abort - * signal and, when a turn is paused on approvals, invokes each per-call resume - * function with an aborted decision (which persists and wakes turn::step). - */ - -import { approvalResumeFnId } from '../approval-gate/schemas.js'; -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import * as persistence from './persistence.js'; - -const STATE_SCOPE_AGENT = 'agent'; - -export async function performAbortSideEffects(iii: ISdk, session_id: string): Promise { - await trigger(iii, 'state::set', { - scope: STATE_SCOPE_AGENT, - key: `session/${session_id}/abort_signal`, - value: true, - }); - - const rec = await persistence.loadRecord(iii, session_id); - if (!rec || rec.state !== 'function_awaiting_approval' || !rec.awaiting_approval?.length) { - return; - } - - for (const entry of rec.awaiting_approval) { - await trigger(iii, approvalResumeFnId(session_id, entry.function_call_id), { - decision: 'aborted', - reason: 'session_aborted', - }); - } -} - -async function trigger(iii: ISdk, function_id: string, payload: unknown): Promise { - try { - await iii.trigger({ function_id, payload }); - } catch (err) { - logger.warn(`abort side-effect failed: ${function_id}`, { err: String(err) }); - } -} diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index 6912b545..294ffd56 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -1,8 +1,8 @@ /** - * Agent tool-call dispatcher + approval chokepoint. + * Agent function-call dispatcher + approval chokepoint. * * `dispatchWithHook` is the single chokepoint for FSM-issued calls: every - * agent tool call goes through `consultBefore` before reaching the inner + * agent function call goes through `consultBefore` before reaching the inner * trigger. `triggerFunctionCall` is the shared trigger/decode/error path * used by both the hook gate and pre-approved resume execution. */ @@ -15,10 +15,7 @@ import { type DenialEnvelope, consultBefore, gateUnavailableEnvelope } from './h export const TOOL_NAME = 'agent_trigger'; -export type DispatchResult = - | { kind: 'result'; result: FunctionResult } - | { kind: 'deny'; result: FunctionResult } - | { kind: 'pending' }; +export type DispatchResult = { kind: 'result'; result: FunctionResult } | { kind: 'pending' }; export function missingFunctionResult(): FunctionResult { return errorResult({ @@ -28,7 +25,6 @@ export function missingFunctionResult(): FunctionResult { } export function unwrapAgentTrigger(fc: FunctionCall): FunctionCall { - if (fc.function_id !== TOOL_NAME) return fc; const args = (fc.arguments ?? {}) as Record; const fn = typeof args.function === 'string' ? args.function : ''; const payload = args.payload ?? {}; @@ -73,7 +69,7 @@ function denialResult(denial: DenialEnvelope): FunctionResult { }; } -export function decodeOrPassthrough(value: unknown): FunctionResult { +function decodeOrPassthrough(value: unknown): FunctionResult { if ( value && typeof value === 'object' && @@ -198,7 +194,7 @@ export async function dispatchWithHook( ): Promise { const outcome = await consultBefore(iii, function_call); if (outcome.kind === 'deny') { - return { kind: 'deny', result: denialResult(outcome.denial) }; + return { kind: 'result', result: denialResult(outcome.denial) }; } if (outcome.kind === 'pending') { return { kind: 'pending' }; diff --git a/harness/src/turn-orchestrator/approval-resume.ts b/harness/src/turn-orchestrator/approval-resume.ts deleted file mode 100644 index 93acf7b7..00000000 --- a/harness/src/turn-orchestrator/approval-resume.ts +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Per-call resume functions for parked approvals. Registered when a call - * enters `function_awaiting_approval`; invoked by `approval::resolve` or - * abort. Persists to scope `approvals` and enqueues `turn::{state}` via wakeFromRecord. - */ - -import { - ApprovalResumePayloadSchema, - STATE_SCOPE, - approvalResumeFnId, - pendingKey, -} from '../approval-gate/schemas.js'; -import type { FunctionRef, ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { - parseStateListKeyedEntries, - parseStateListValues, - stateGet, - stateSet, -} from '../runtime/state.js'; -import type { TurnStateRecord } from './state.js'; -import { wakeFromRecord } from './wake.js'; - -const resumeRefs = new Map(); -const TURN_STATE_KEY_RE = /^session\/[^/]+\/turn_state$/; - -function isTurnStateRecord(value: unknown): value is TurnStateRecord { - if (!value || typeof value !== 'object') return false; - const rec = value as Record; - return typeof rec.session_id === 'string' && typeof rec.state === 'string'; -} - -/** Agent-scope turn_state still parked on human approval. */ -function pausedApprovalCalls( - rec: TurnStateRecord, -): { session_id: string; function_call_ids: string[] } | null { - if (rec.state !== 'function_awaiting_approval') return null; - - const session_id = rec.session_id; - if (!session_id) return null; - - const function_call_ids = (rec.awaiting_approval ?? []) - .map((entry) => entry.function_call_id) - .filter((id) => id.length > 0); - - return function_call_ids.length > 0 ? { session_id, function_call_ids } : null; -} - -function hasStoredDecision(value: unknown): boolean { - if (!value || typeof value !== 'object') return false; - const decision = (value as Record).decision; - return decision === 'allow' || decision === 'deny' || decision === 'aborted'; -} - -function unregisterApprovalResume(fnId: string): void { - const ref = resumeRefs.get(fnId); - if (!ref) return; - try { - ref.unregister(); - } catch {} - resumeRefs.delete(fnId); -} - -async function handleApprovalResume( - iii: ISdk, - session_id: string, - function_call_id: string, - payload: unknown, -): Promise { - const fnId = approvalResumeFnId(session_id, function_call_id); - if (!resumeRefs.has(fnId)) { - return; - } - const parsed = ApprovalResumePayloadSchema.safeParse(payload); - if (!parsed.success) { - logger.warn('approval resume: malformed payload', { - fnId, - err: String(parsed.error.issues[0]?.message ?? 'unknown'), - }); - return; - } - - const key = pendingKey(session_id, function_call_id); - const existing = await stateGet(iii, STATE_SCOPE, key); - if (!hasStoredDecision(existing)) { - await stateSet(iii, STATE_SCOPE, key, { - decision: parsed.data.decision, - reason: parsed.data.reason, - }); - } - - try { - await wakeFromRecord(iii, session_id); - } catch (err) { - logger.warn('approval resume: turn step wake failed', { session_id, err: String(err) }); - } - - unregisterApprovalResume(fnId); -} - -export function registerApprovalResume( - iii: ISdk, - session_id: string, - function_call_id: string, -): FunctionRef { - const fnId = approvalResumeFnId(session_id, function_call_id); - const existing = resumeRefs.get(fnId); - if (existing) return existing; - - const ref = iii.registerFunction( - fnId, - async (payload: unknown) => handleApprovalResume(iii, session_id, function_call_id, payload), - { - description: - 'Resume a parked approval: persist decision to approvals scope and enqueue turn::{state}.', - }, - ); - resumeRefs.set(fnId, ref); - return ref; -} - -/** Clears in-memory resume refs (unit tests only). */ -export function clearApprovalResumeRegistry(): void { - resumeRefs.clear(); -} - -/** Turn_state rows from `state::list` on scope agent (not every value in the scope). */ -async function listTurnStateRecords(iii: ISdk): Promise { - try { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: { scope: 'agent' }, - }); - const keyed = parseStateListKeyedEntries(resp); - if (keyed.some((entry) => typeof entry.key === 'string')) { - return keyed - .filter((entry) => entry.key && TURN_STATE_KEY_RE.test(entry.key)) - .map((entry) => entry.value) - .filter(isTurnStateRecord); - } - return parseStateListValues(resp).filter(isTurnStateRecord); - } catch (err) { - logger.warn('approval resume: state::list failed during recovery', { err: String(err) }); - return []; - } -} - -/** Re-register resume fns for sessions still paused on approval after worker restart. */ -export async function recoverPendingApprovals(iii: ISdk): Promise { - const records = await listTurnStateRecords(iii); - - for (const rec of records) { - const paused = pausedApprovalCalls(rec); - if (!paused) continue; - - for (const function_call_id of paused.function_call_ids) { - registerApprovalResume(iii, paused.session_id, function_call_id); - } - } -} diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts new file mode 100644 index 00000000..75688f29 --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -0,0 +1,130 @@ +/** + * Typed dependency ports and domain types for assistant_streaming. + */ + +import { z } from 'zod'; +import { logger } from '../../runtime/otel.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; +import type { AgentFunction } from '../../types/function.js'; +import type { AssistantMessageEvent } from '../../types/stream-event.js'; +import { AgentFunctionSchema } from '../../types/provider.js'; +import { emit } from '../events.js'; +import { runPreflight } from '../preflight.js'; +import { buildInput, targetFunctionId, type RouteDecision } from '../provider-router.js'; +import { streamProviderTurn } from '../provider-stream.js'; +import type { RunRequest } from '../run-request.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import { isDuplicateAssistant } from '../state-runtime/transcript.js'; + +export type StreamContext = { + session_id: string; + decision: RouteDecision; + system_prompt: string; + tools: AgentFunction[]; + messages: AgentMessage[]; +}; + +export type StreamTurnOutcome = { + final: AssistantMessage | null; + error: string | null; + body_streamed: boolean; +}; + +export type AssistantRoute = + | { kind: 'stopped'; reason: 'error' | 'aborted' } + | { kind: 'function_execute' } + | { kind: 'steering_check' }; + +export function parseFunctionSchemas(raw: unknown[]): AgentFunction[] { + return z.array(AgentFunctionSchema).parse(raw) as AgentFunction[]; +} + +export function hasFunctionCalls(asst: AssistantMessage): boolean { + return asst.content.some((b) => b.type === 'function_call'); +} + +export function isErrorOrAborted(asst: AssistantMessage): boolean { + return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; +} + +export type DeltaHandler = ( + partial: AssistantMessage, + event: AssistantMessageEvent, +) => Promise; + +export type AssistantStreamingPorts = TurnStatePorts & { + loadRunRequest(session_id: string): Promise; + runPreflight( + session_id: string, + messages: AgentMessage[], + provider: string, + model: string, + ): Promise<'ok' | 'compacted'>; + streamTurn( + ctx: StreamContext, + onDelta: DeltaHandler, + ): Promise<{ final: AssistantMessage | null; error: string | null }>; + emitMessageUpdate( + session_id: string, + message: AssistantMessage, + event: AssistantMessageEvent, + ): Promise; + emitMessageComplete( + session_id: string, + message: AssistantMessage, + body_streamed: boolean, + ): Promise; + persistAssistantIfNew(session_id: string, asst: AssistantMessage): Promise; +}; + +export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async runPreflight(session_id, messages, provider, model) { + return runPreflight(iii, session_id, messages, provider, model); + }, + + async streamTurn(ctx, onDelta) { + const { final, error } = await streamProviderTurn(iii, { + session_id: ctx.session_id, + targetFn: targetFunctionId(ctx.decision), + buildInput: (writerRef) => + buildInput(ctx.decision, writerRef, ctx.system_prompt, ctx.messages, ctx.tools), + onDelta, + }); + return { final, error }; + }, + + async emitMessageUpdate(session_id, message, event) { + await emit(iii, session_id, { + type: 'message_update', + message, + llm_event: event, + }); + }, + + async emitMessageComplete(session_id, message, body_streamed) { + await emit(iii, session_id, { + type: 'message_complete', + message, + body_streamed, + }); + }, + + async persistAssistantIfNew(session_id, asst) { + const messages = await base.loadMessages(session_id); + if (isDuplicateAssistant(messages, asst)) { + logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { + session_id, + timestamp: asst.timestamp, + }); + return; + } + await base.appendMessages(session_id, [asst]); + }, + }; +} diff --git a/harness/src/turn-orchestrator/assistant-streaming/process.ts b/harness/src/turn-orchestrator/assistant-streaming/process.ts new file mode 100644 index 00000000..74d29880 --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/process.ts @@ -0,0 +1,34 @@ +/** + * Register the assistant_streaming FSM step and run one durable transition. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { runTransition } from '../run-transition.js'; +import { + TurnStepPayloadSchema, + parseAssistantStreamingRecord, + type TurnStepPayload, +} from '../schemas.js'; +import type { TurnStateRecord } from '../state.js'; +import { createStreamingPorts } from './ports.js'; +import { runAssistantStreaming } from './run.js'; + +export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { + const streaming = parseAssistantStreamingRecord(rec); + const ports = createStreamingPorts(iii); + await runAssistantStreaming(ports, streaming); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::assistant_streaming', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'assistant_streaming', handleStreaming, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state assistant_streaming: start turn, stream provider response, finalize, and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts new file mode 100644 index 00000000..6532263b --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -0,0 +1,151 @@ +/** + * Stream one provider turn, persist the assistant message, and route onward. + */ + +import type { AssistantMessage } from '../../types/agent-message.js'; +import { decide } from '../provider-router.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { enterFunctionExecute } from '../function-execute/run.js'; +import { transitionTo, type AssistantStreamingTurnRecord } from '../state.js'; +import { + hasFunctionCalls, + isErrorOrAborted, + parseFunctionSchemas, + type AssistantRoute, + type AssistantStreamingPorts, + type StreamContext, + type StreamTurnOutcome, +} from './ports.js'; + +export function beginTurn(rec: AssistantStreamingTurnRecord): void { + rec.turn_count++; + rec.turn_end_emitted = false; + rec.assistant_body_streamed = false; +} + +export async function prepareStreamContext( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, +): Promise { + const request = await ports.loadRunRequest(rec.session_id); + let messages = await ports.loadMessages(rec.session_id); + const { provider, model, system_prompt, function_schemas } = request; + const decision = decide({ provider, model }); + const tools = parseFunctionSchemas(function_schemas); + + if ( + (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' + ) { + messages = await ports.loadMessages(rec.session_id); + } + + return { + session_id: rec.session_id, + decision, + system_prompt, + tools, + messages, + }; +} + +export async function runStreamTurn( + ports: AssistantStreamingPorts, + session_id: string, + ctx: StreamContext, +): Promise { + let body_streamed = false; + + const { final, error } = await ports.streamTurn(ctx, async (partial, event) => { + await ports.emitMessageUpdate(session_id, partial, event); + if (event.type === 'text_delta' || event.type === 'thinking_delta') { + body_streamed = true; + } + }); + + return { final, error, body_streamed }; +} + +export function resolveAssistantMessage( + outcome: StreamTurnOutcome, + decision: StreamContext['decision'], +): AssistantMessage { + if (outcome.final) return outcome.final; + + const reason = outcome.error ?? 'provider channel closed without final'; + return syntheticAssistant({ + stop_reason: 'error', + text: reason, + provider: decision.provider, + model: decision.model, + }); +} + +/** Reason text for a synthetic error update when the provider did not return a final message. */ +export function syntheticStreamReason(outcome: StreamTurnOutcome): string | null { + if (outcome.final) return null; + return outcome.error ?? 'provider channel closed without final'; +} + +export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { + if (isErrorOrAborted(asst)) { + return { + kind: 'stopped', + reason: asst.stop_reason === 'aborted' ? 'aborted' : 'error', + }; + } + if (hasFunctionCalls(asst)) { + return { kind: 'function_execute' }; + } + return { kind: 'steering_check' }; +} + +export async function finalizeAssistantTurn( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, + asst: AssistantMessage, +): Promise { + await ports.emitMessageComplete(rec.session_id, asst, rec.assistant_body_streamed === true); + + const route = routeAssistantTurn(asst); + + if (route.kind === 'stopped') { + await emitTurnEndOnce(ports, rec, asst); + await ports.finishSession(rec); + return; + } + + await ports.persistAssistantIfNew(rec.session_id, asst); + + if (route.kind === 'function_execute') { + rec.function_results = []; + enterFunctionExecute(rec, asst); + transitionTo(rec, 'function_execute'); + return; + } + + transitionTo(rec, 'steering_check'); +} + +export async function runAssistantStreaming( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, +): Promise { + beginTurn(rec); + const ctx = await prepareStreamContext(ports, rec); + const outcome = await runStreamTurn(ports, rec.session_id, ctx); + const asst = resolveAssistantMessage(outcome, ctx.decision); + rec.last_assistant = asst; + rec.assistant_body_streamed = outcome.body_streamed; + + const syntheticReason = syntheticStreamReason(outcome); + if (syntheticReason) { + await ports.emitMessageUpdate(rec.session_id, asst, { + type: 'text_delta', + partial: asst, + delta: syntheticReason, + }); + } + + await finalizeAssistantTurn(ports, rec, asst); +} diff --git a/harness/src/turn-orchestrator/bootstrap.ts b/harness/src/turn-orchestrator/bootstrap.ts index 26ba0291..948194da 100644 --- a/harness/src/turn-orchestrator/bootstrap.ts +++ b/harness/src/turn-orchestrator/bootstrap.ts @@ -1,18 +1,17 @@ /** - * Best-effort fetch of default skills at boot. Mirrors - * `turn-orchestrator/src/bootstrap.rs`. Failures are logged and never - * abort startup. + * Best-effort download of default-skill namespaces at boot. Failures are logged + * and never abort startup. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { TurnOrchestratorConfig } from './config.js'; +import { skillIdFromUri } from './system-prompt.js'; export async function run(iii: ISdk, cfg: TurnOrchestratorConfig): Promise { const namespaces = new Set(); for (const uri of cfg.system_default_skills) { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - const ns = id.split('/')[0]; + const ns = skillIdFromUri(uri).split('/')[0]; if (ns) namespaces.add(ns); } for (const ns of namespaces) { diff --git a/harness/src/turn-orchestrator/errors.ts b/harness/src/turn-orchestrator/errors.ts index 90be19e0..3a3a494e 100644 --- a/harness/src/turn-orchestrator/errors.ts +++ b/harness/src/turn-orchestrator/errors.ts @@ -16,3 +16,21 @@ export class CompactionBusyError extends Error { this.name = 'CompactionBusyError'; } } + +/** Thrown by a handler for a genuinely retryable failure. runTransition + * re-throws it so the turn-step queue applies backoff/retry/DLQ. Any other + * throw is treated as terminal and routes the session to `failed`. */ +export class TransientError extends Error { + constructor(message: string) { + super(message); + this.name = 'TransientError'; + } +} + +/** Persisted turn_state is missing fields required for the current FSM step. */ +export class TurnStateInvariantError extends Error { + constructor(message: string) { + super(message); + this.name = 'TurnStateInvariantError'; + } +} diff --git a/harness/src/turn-orchestrator/estimate.ts b/harness/src/turn-orchestrator/estimate.ts deleted file mode 100644 index 13233242..00000000 --- a/harness/src/turn-orchestrator/estimate.ts +++ /dev/null @@ -1,12 +0,0 @@ -/** - * Cheap chars/4 token estimate used for pre-flight overflow detection. - * Same heuristic as context-compaction's estimateTokenCount. - */ - -import type { AgentMessage } from '../types/agent-message.js'; - -export function estimateMessages(messages: AgentMessage[]): number { - let chars = 0; - for (const m of messages) chars += JSON.stringify(m).length; - return Math.floor(chars / 4); -} diff --git a/harness/src/turn-orchestrator/events.ts b/harness/src/turn-orchestrator/events.ts index 12bfb7db..75e0ab1c 100644 --- a/harness/src/turn-orchestrator/events.ts +++ b/harness/src/turn-orchestrator/events.ts @@ -1,27 +1,60 @@ /** - * Emit AgentEvent frames on `agent::events`. Mirrors - * `turn-orchestrator/src/events.rs`. + * Emit AgentEvent frames on `agent::events`, one per call with a per-session + * monotonic sequence number. `turn_end` frames are additionally mirrored onto + * the dedicated `agent::turn_end` stream (see TURN_END_STREAM). */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentEvent } from '../types/agent-event.js'; -import { eventCounterKey } from './state.js'; + +const EVENT_COUNTER_SCOPE = 'event_counter'; export const EVENTS_STREAM = 'agent::events'; -const STATE_SCOPE = 'agent'; +/** + * Dedicated stream carrying only `turn_end` frames. Compaction subscribes here + * instead of the full `agent::events` firehose so it wakes once per turn rather + * than on every event (token updates, function lifecycle, …). + */ +export const TURN_END_STREAM = 'agent::turn_end'; -export function formatItemId(session_id: string, seq: number): string { +function formatItemId(session_id: string, seq: number): string { return `${session_id}-${seq.toString().padStart(8, '0')}`; } +function isTurnEnd(event: AgentEvent): boolean { + return (event as { type?: string }).type === 'turn_end'; +} + +async function setStream( + iii: ISdk, + stream_name: string, + session_id: string, + item_id: string, + event: AgentEvent, +): Promise { + try { + await iii.trigger({ + function_id: 'stream::set', + payload: { stream_name, group_id: session_id, item_id, data: event }, + }); + } catch (err) { + logger.warn('stream::set failed', { + stream_name, + session_id, + item_id, + err: String(err), + }); + } +} + async function nextSeq(iii: ISdk, session_id: string): Promise { try { const resp = await iii.trigger({ function_id: 'state::update', payload: { - scope: STATE_SCOPE, - key: eventCounterKey(session_id), + scope: EVENT_COUNTER_SCOPE, + key: session_id, ops: [{ type: 'increment', path: '', by: 1 }], }, }); @@ -38,21 +71,8 @@ async function nextSeq(iii: ISdk, session_id: string): Promise { export async function emit(iii: ISdk, session_id: string, event: AgentEvent): Promise { const seq = await nextSeq(iii, session_id); const item_id = formatItemId(session_id, seq); - try { - await iii.trigger({ - function_id: 'stream::set', - payload: { - stream_name: EVENTS_STREAM, - group_id: session_id, - item_id, - data: event, - }, - }); - } catch (err) { - logger.warn('stream::set agent::events failed', { - session_id, - item_id, - err: String(err), - }); + await setStream(iii, EVENTS_STREAM, session_id, item_id, event); + if (isTurnEnd(event)) { + await setStream(iii, TURN_END_STREAM, session_id, item_id, event); } } diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts new file mode 100644 index 00000000..350d3877 --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts @@ -0,0 +1,31 @@ +/** + * Typed dependency ports and domain types for function_awaiting_approval. + */ + +import { ApprovalDecisionSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { z } from 'zod'; +export type ApprovalDecision = z.infer; + +/** Decode stored approval decision from `state::get` (scope `approvals`). */ +export function parseApprovalDecision(value: unknown): ApprovalDecision | null { + const parsed = ApprovalDecisionSchema.safeParse(value); + return parsed.success ? parsed.data : null; +} + +export type AwaitingApprovalPorts = { + readDecision(session_id: string, function_call_id: string): Promise; +}; + +export function createAwaitingApprovalPorts(iii: ISdk): AwaitingApprovalPorts { + return { + async readDecision(session_id, function_call_id) { + const key = `${session_id}/${function_call_id}`; + const raw = await iii.trigger({ + function_id: 'state::get', + payload: { scope: STATE_SCOPE, key }, + }); + return parseApprovalDecision(raw); + }, + }; +} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts new file mode 100644 index 00000000..0081769b --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -0,0 +1,72 @@ +/** + * Read approval decisions, execute resolved calls individually, and register the FSM step. + */ + +import { TriggerAction, type ISdk } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import { createPorts } from '../function-execute/ports.js'; +import { runTransition } from '../run-transition.js'; +import { + ApprovalDecisionEventSchema, + TurnStepPayloadSchema, + parseFunctionBatchRecord, + type TurnStepPayload, +} from '../schemas.js'; +import { TURN_STEP_QUEUE } from '../state-runtime/store.js'; +import type { TurnStateRecord } from '../state.js'; +import { createAwaitingApprovalPorts } from './ports.js'; +import { processResolvedApprovals, routeAfterApprovalProcessing } from './run.js'; + +export async function handleApprovalStateWrite(iii: ISdk, event: unknown): Promise { + const parsed = ApprovalDecisionEventSchema.safeParse(event); + if (!parsed.success) return; + try { + await iii.trigger({ + function_id: 'turn::function_awaiting_approval', + payload: { session_id: parsed.data.session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + } catch (err) { + logger.warn('turn::on_approval: wake failed', { + session_id: parsed.data.session_id, + err: String(err), + }); + } +} + +export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { + const batch = parseFunctionBatchRecord(rec); + const executePorts = createPorts(iii); + const readPorts = createAwaitingApprovalPorts(iii); + await processResolvedApprovals(readPorts, executePorts, batch); + await routeAfterApprovalProcessing(executePorts, batch); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_awaiting_approval', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_awaiting_approval: execute each call as its approval decision arrives.', + }, + ); + + iii.registerFunction( + 'turn::on_approval', + async (event: unknown) => handleApprovalStateWrite(iii, event), + { + description: + 'State trigger on scope=approvals; enqueues turn::function_awaiting_approval when a decision is written.', + }, + ); + + iii.registerTrigger({ + type: 'state', + function_id: 'turn::on_approval', + config: { scope: 'approvals' }, + }); +} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts new file mode 100644 index 00000000..ebd5aabc --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -0,0 +1,91 @@ +/** + * Resolve approval decisions and route the batch after each decision. + */ + +import { text } from '../../types/content.js'; +import type { FunctionResult } from '../../types/function.js'; +import { finalizeBatch, runOneCall } from '../function-execute/run.js'; +import type { FunctionExecutePorts } from '../function-execute/ports.js'; +import type { PreparedCall } from '../function-execute/types.js'; +import { isBatchComplete } from '../function-execute/types.js'; +import { transitionTo, type FunctionBatchTurnRecord } from '../state.js'; +import type { ApprovalDecision, AwaitingApprovalPorts } from './ports.js'; + +export function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { + const reason = + decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); + const message = + decision.decision === 'aborted' + ? `Function call aborted: ${reason}` + : `Permission denied by user: ${reason}`; + return { + content: [text(message)], + details: { + approval_denied: true, + decision: decision.decision, + reason, + }, + terminate: false, + }; +} + +export function applyDecisionToPrepared( + current: PreparedCall, + decision: ApprovalDecision, +): PreparedCall { + if (decision.decision === 'allow') { + return { route: 'pre_approved', call: current.call }; + } + return { + route: 'synthetic', + call: current.call, + result: denialResultFromDecision(decision), + }; +} + +export async function processResolvedApprovals( + readPorts: AwaitingApprovalPorts, + executePorts: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + const work = rec.work; + let awaiting = [...rec.awaiting_approval]; + const executed = { ...work.executed }; + + for (const entry of [...awaiting]) { + const callId = entry.function_call_id; + + if (executed[callId]) { + awaiting = awaiting.filter((e) => e.function_call_id !== callId); + continue; + } + + const decision = await readPorts.readDecision(rec.session_id, callId); + if (!decision) continue; + + const current = work.prepared.find((p) => p.call.id === callId)!; + const resolved = applyDecisionToPrepared(current, decision); + await runOneCall(executePorts, rec.session_id, resolved, executed, { skipStart: true }); + + awaiting = awaiting.filter((e) => e.function_call_id !== callId); + rec.work = { prepared: work.prepared, executed }; + await executePorts.checkpoint(rec); + } + + rec.awaiting_approval = awaiting; +} + +export async function routeAfterApprovalProcessing( + executePorts: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + if (rec.awaiting_approval.length > 0) { + return; + } + + if (isBatchComplete(rec.work)) { + await finalizeBatch(executePorts, rec); + } else { + transitionTo(rec, 'function_execute'); + } +} diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts new file mode 100644 index 00000000..28dde52f --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -0,0 +1,94 @@ +/** + * Typed dependency ports for function_execute — production wiring and test doubles. + */ + +import { z } from 'zod'; +import type { DispatchResult } from '../agent-trigger.js'; +import { dispatchWithHook, triggerFunctionCall } from '../agent-trigger.js'; +import { emit } from '../events.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { FunctionCall, FunctionResult } from '../../types/function.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import type { ExecutedCall } from './types.js'; + +const RoutingEnvelopeSchema = z + .object({ + session_id: z.string(), + function_call_id: z.string(), + function_id: z.string(), + function_call: z.object({ + id: z.string(), + function_id: z.string(), + arguments: z.unknown(), + }), + }) + .catchall(z.unknown()); + +type RoutingEnvelope = z.infer; + +function baseArgs(arguments_: FunctionCall['arguments']): Record { + if (arguments_ && typeof arguments_ === 'object' && !Array.isArray(arguments_)) { + return { ...(arguments_ as Record) }; + } + return { arguments: arguments_ }; +} + +/** Attach session + call identity to arguments for policy and target functions. */ +export function withRoutingEnvelope(call: FunctionCall, session_id: string): FunctionCall { + const envelope: RoutingEnvelope = { + ...baseArgs(call.arguments), + session_id, + function_call_id: call.id, + function_id: call.function_id, + function_call: { id: call.id, function_id: call.function_id, arguments: call.arguments }, + }; + RoutingEnvelopeSchema.parse(envelope); + return { id: call.id, function_id: call.function_id, arguments: envelope }; +} + +export type FunctionExecutePorts = TurnStatePorts & { + emitStart(session_id: string, call: FunctionCall): Promise; + emitEnd(session_id: string, executed: ExecutedCall): Promise; + dispatch(call: FunctionCall, session_id: string): Promise; + triggerPreApproved(call: FunctionCall): Promise; +}; + +function buildFunctionExecutionEnd(executed: ExecutedCall) { + return { + type: 'function_execution_end' as const, + function_call_id: executed.call.id, + function_id: executed.call.function_id, + result: executed.result, + is_error: executed.is_error, + duration_ms: executed.duration_ms, + }; +} + +export function createPorts(iii: ISdk): FunctionExecutePorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async emitStart(session_id, call) { + await emit(iii, session_id, { + type: 'function_execution_start', + function_call_id: call.id, + function_id: call.function_id, + args: call.arguments, + }); + }, + + async emitEnd(session_id, executed) { + await emit(iii, session_id, buildFunctionExecutionEnd(executed)); + }, + + async dispatch(call, session_id) { + return dispatchWithHook(iii, withRoutingEnvelope(call, session_id)); + }, + + async triggerPreApproved(call) { + return triggerFunctionCall(iii, call); + }, + }; +} diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts new file mode 100644 index 00000000..dbec00d2 --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -0,0 +1,51 @@ +/** + * Run prepared function calls, finalize results, route onward, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { runTransition } from '../run-transition.js'; +import { + TurnStepPayloadSchema, + parseFunctionBatchRecord, + type TurnStepPayload, +} from '../schemas.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { finalizeBatch, runBatch } from './run.js'; +import { createPorts } from './ports.js'; + +export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { + const batch = parseFunctionBatchRecord(rec); + const ports = createPorts(iii); + + const outcome = await runBatch(ports, batch); + batch.work = outcome.work; + + if (outcome.kind === 'incomplete') { + const ids = new Set(batch.awaiting_approval.map((entry) => entry.function_call_id)); + const merged = [...batch.awaiting_approval]; + for (const pending of outcome.newPending) { + if (ids.has(pending.function_call_id)) continue; + ids.add(pending.function_call_id); + merged.push(pending); + } + batch.awaiting_approval = merged; + transitionTo(batch, 'function_awaiting_approval'); + return; + } + + await finalizeBatch(ports, batch); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_execute', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_execute', handleExecute, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_execute: dispatch prepared calls and finalize results.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts new file mode 100644 index 00000000..9ab75b54 --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -0,0 +1,233 @@ +/** + * Plan, execute, and finalize function call batches. + */ + +import { logger } from '../../runtime/otel.js'; +import type { AssistantMessage, FunctionResultMessage } from '../../types/agent-message.js'; +import type { FunctionCallContent } from '../../types/content.js'; +import type { FunctionCall, FunctionResult } from '../../types/function.js'; +import { + TOOL_NAME, + isErrorResult, + missingFunctionResult, + unwrapAgentTrigger, +} from '../agent-trigger.js'; +import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { persistedTrailingResultIds } from '../state-runtime/transcript.js'; +import { + transitionTo, + type AwaitingApprovalEntry, + type FunctionBatchTurnRecord, + type TurnStateRecord, +} from '../state.js'; +import type { FunctionExecutePorts } from './ports.js'; +import { + preparedCallId, + type BatchOutcome, + type ExecutedCall, + type FunctionBatchWork, + type PendingApproval, + type PreparedCall, + type ResolveCallResult, + type RunOneCallResult, +} from './types.js'; + +function isFunctionCallBlock( + block: AssistantMessage['content'][number], +): block is FunctionCallContent { + return block.type === 'function_call'; +} + +function toPreparedCall(block: FunctionCallContent): PreparedCall { + const call: FunctionCall = { + id: block.id, + function_id: block.function_id, + arguments: block.arguments, + }; + if (block.function_id !== TOOL_NAME) { + return { route: 'synthetic', call, result: missingFunctionResult() }; + } + const unwrapped = unwrapAgentTrigger(call); + if (!unwrapped.function_id) { + return { route: 'synthetic', call: unwrapped, result: missingFunctionResult() }; + } + return { route: 'dispatch', call: unwrapped }; +} + +/** Set fields expected when entering `function_execute` (mirrors assistant_streaming finalize). */ +export function enterFunctionExecute(rec: TurnStateRecord, asst: AssistantMessage): void { + const batch = rec as TurnStateRecord & { + awaiting_approval: AwaitingApprovalEntry[]; + last_assistant: AssistantMessage; + work: FunctionBatchWork; + }; + batch.awaiting_approval = []; + batch.last_assistant = asst; + batch.work = { + prepared: asst.content.filter(isFunctionCallBlock).map(toPreparedCall), + executed: {}, + }; +} + +async function resolvePreparedCall( + ports: FunctionExecutePorts, + prepared: PreparedCall, + session_id: string, +): Promise { + switch (prepared.route) { + case 'synthetic': + return { kind: 'resolved', result: prepared.result, is_error: true }; + case 'pre_approved': { + const result = await ports.triggerPreApproved(prepared.call); + return { kind: 'resolved', result, is_error: isErrorResult(result) }; + } + case 'dispatch': { + const out = await ports.dispatch(prepared.call, session_id); + if (out.kind === 'pending') { + return { kind: 'pending' }; + } + return { kind: 'resolved', result: out.result, is_error: isErrorResult(out.result) }; + } + } +} + +export type RunOneCallOptions = { + /** Skip `function_execution_start` — used when resuming after approval (start already emitted). */ + skipStart?: boolean; +}; + +export async function runOneCall( + ports: FunctionExecutePorts, + session_id: string, + prepared: PreparedCall, + executed: Record, + opts?: RunOneCallOptions, +): Promise { + const call: FunctionCall = prepared.call; + + const prior = executed[call.id]; + if (prior) { + await ports.emitEnd(session_id, prior); + return { kind: 'skipped' }; + } + + if (!opts?.skipStart) { + await ports.emitStart(session_id, call); + } + const startedAt = Date.now(); + + const resolved = await resolvePreparedCall(ports, prepared, session_id); + if (resolved.kind === 'pending') { + return { kind: 'pending', call }; + } + + const entry: ExecutedCall = { + call, + result: resolved.result, + is_error: resolved.is_error, + duration_ms: Date.now() - startedAt, + }; + executed[call.id] = entry; + await ports.emitEnd(session_id, entry); + return { kind: 'executed', entry }; +} + +export async function runBatch( + ports: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + const { prepared } = rec.work; + const executed = { ...rec.work.executed }; + const awaitingIds = new Set(rec.awaiting_approval.map((entry) => entry.function_call_id)); + const newPending: PendingApproval[] = []; + + for (const item of prepared) { + const callId = preparedCallId(item); + if (executed[callId]) continue; + if (awaitingIds.has(callId)) continue; + + const outcome = await runOneCall(ports, rec.session_id, item, executed); + + if (outcome.kind === 'pending') { + newPending.push({ + function_call_id: outcome.call.id, + function_id: outcome.call.function_id, + args: outcome.call.arguments, + }); + continue; + } + + if (outcome.kind === 'executed') { + rec.work = { prepared, executed }; + await ports.checkpoint(rec); + } + } + + const batchWork = { prepared, executed }; + if (newPending.length > 0 || awaitingIds.size > 0) { + return { kind: 'incomplete', work: batchWork, newPending }; + } + return { kind: 'completed', work: batchWork }; +} + +function toFunctionResultMessage( + entry: ExecutedCall, + result: FunctionResult, +): FunctionResultMessage { + return { + role: 'function_result', + function_call_id: entry.call.id, + function_id: entry.call.function_id, + content: result.content, + details: result.details, + is_error: entry.is_error, + timestamp: Date.now(), + }; +} + +/** Collect executed entries in batch order (caller must only invoke when batch is complete). */ +function executedInBatchOrder(work: FunctionBatchWork): ExecutedCall[] { + return work.prepared.map((item) => work.executed[preparedCallId(item)]!); +} + +export async function finalizeBatch( + ports: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + const executed = executedInBatchOrder(rec.work); + const function_results: FunctionResultMessage[] = []; + let allTerminate = true; + const lastAssistant = rec.last_assistant; + + for (const entry of executed) { + const result = entry.result; + if (!result.terminate) allTerminate = false; + function_results.push(toFunctionResultMessage(entry, result)); + } + + const messages = await ports.loadMessages(rec.session_id); + const alreadyPersisted = persistedTrailingResultIds(messages); + const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); + if (fresh.length < function_results.length) { + logger.warn('finalizeBatch: skipped duplicate function_results (re-entry detected)', { + session_id: rec.session_id, + total: function_results.length, + skipped: function_results.length - fresh.length, + }); + } + if (fresh.length > 0) { + await ports.appendMessages(rec.session_id, fresh); + } + + rec.function_results = function_results; + + await emitTurnEndOnce(ports, rec, lastAssistant, function_results); + + if (allTerminate) { + await ports.finishSession(rec); + } else { + transitionTo(rec, 'steering_check'); + } + + (rec as TurnStateRecord).work = undefined; +} diff --git a/harness/src/turn-orchestrator/function-execute/types.ts b/harness/src/turn-orchestrator/function-execute/types.ts new file mode 100644 index 00000000..e6b67143 --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/types.ts @@ -0,0 +1,59 @@ +/** + * Domain types for the function_execute pipeline. + */ + +import type { FunctionCall, FunctionResult } from '../../types/function.js'; + +/** Exactly one execution route per prepared call: dispatch, pre_approved, or synthetic. */ +export type PreparedCall = + | { route: 'dispatch'; call: FunctionCall } + | { route: 'pre_approved'; call: FunctionCall } + | { route: 'synthetic'; call: FunctionCall; result: FunctionResult }; + +export type ExecutedCall = { + call: FunctionCall; + result: FunctionResult; + is_error: boolean; + duration_ms: number; +}; + +/** Durable mid-batch state persisted on TurnStateRecord.work. */ +export type FunctionBatchWork = { + prepared: readonly PreparedCall[]; + executed: Record; +}; + +export type PendingApproval = { + function_call_id: string; + function_id: string; + args: FunctionCall['arguments']; +}; + +/** Batch loop outcome — explicit control flow instead of early return + void. */ +export type BatchOutcome = + | { kind: 'completed'; work: FunctionBatchWork } + | { kind: 'incomplete'; work: FunctionBatchWork; newPending: PendingApproval[] }; + +export type RunOneCallResult = + | { kind: 'skipped' } + | { kind: 'executed'; entry: ExecutedCall } + | { kind: 'pending'; call: FunctionCall }; + +export type ResolveCallResult = + | { kind: 'pending' } + | { kind: 'resolved'; result: FunctionResult; is_error: boolean }; + +/** Extract the FunctionCall from any PreparedCall variant. */ +export function preparedCallId(prepared: PreparedCall): string { + return prepared.call.id; +} + +/** Empty durable work for a fresh batch. */ +export function emptyBatchWork(prepared: readonly PreparedCall[]): FunctionBatchWork { + return { prepared, executed: {} }; +} + +/** True when every prepared call has a committed entry in `executed`. */ +export function isBatchComplete(work: FunctionBatchWork): boolean { + return work.prepared.every((prepared) => work.executed[preparedCallId(prepared)] !== undefined); +} diff --git a/harness/src/turn-orchestrator/get-state.ts b/harness/src/turn-orchestrator/get-state.ts index 927c6c80..c59c9f14 100644 --- a/harness/src/turn-orchestrator/get-state.ts +++ b/harness/src/turn-orchestrator/get-state.ts @@ -6,11 +6,17 @@ */ import type { ISdk } from '../runtime/iii.js'; -import * as persistence from './persistence.js'; -import { GetStatePayloadSchema, type GetStatePayload, type GetStateResult } from './schemas.js'; +import { + GetStatePayloadSchema, + type GetStatePayload, + type GetStateResult, + toView, +} from './schemas.js'; +import { createTurnStore } from './state-runtime/store.js'; export async function execute(iii: ISdk, payload: GetStatePayload): Promise { - return persistence.loadRecord(iii, payload.session_id); + const rec = await createTurnStore(iii).loadRecord(payload.session_id); + return rec ? toView(rec) : null; } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/hook.ts b/harness/src/turn-orchestrator/hook.ts index b76fcc10..416d14fb 100644 --- a/harness/src/turn-orchestrator/hook.ts +++ b/harness/src/turn-orchestrator/hook.ts @@ -2,9 +2,6 @@ * Approval consultation. Calls `policy::check_permissions` directly and maps * the reply to allow / deny / pending. Fail-closed on transport errors: * unreachable policy → deny with `gate_unavailable`. - * - * `publishAfter` still goes through hook-fanout because the after-hook is a - * pluggable merge point with multiple potential consumers. */ import { permissionsDenyEnvelope } from '../approval-gate/denial.js'; @@ -18,9 +15,8 @@ export type { DenialEnvelope } from '../approval-gate/schemas.js'; import { logger } from '../runtime/otel.js'; import type { FunctionCall } from '../types/function.js'; -export const TOPIC_AFTER = 'agent::after_function_call'; - -export const HOOK_TIMEOUT_MS = 500; +/** Fail-closed budget for the synchronous policy consult before a call. */ +export const POLICY_TIMEOUT_MS = 5_000; export type HookOutcome = | { kind: 'allow' } @@ -45,7 +41,7 @@ export async function consultBefore(iii: ISdk, function_call: FunctionCall): Pro function_id: function_call.function_id, args: function_call.arguments as CheckPermissionsPayload['args'], }, - timeoutMs: 5_000, + timeoutMs: POLICY_TIMEOUT_MS, }); switch (reply.decision) { case 'allow': @@ -77,24 +73,3 @@ export async function consultBefore(iii: ISdk, function_call: FunctionCall): Pro }; } } - -export async function publishAfter( - iii: ISdk, - function_call: FunctionCall, - result: unknown, -): Promise { - try { - const resp = await iii.trigger({ - function_id: 'hook-fanout::publish_collect', - payload: { - topic: TOPIC_AFTER, - payload: { function_call, result }, - merge_rule: 'field_merge', - timeout_ms: HOOK_TIMEOUT_MS, - }, - }); - return resp.merged; - } catch { - return null; - } -} diff --git a/harness/src/turn-orchestrator/iii.worker.yaml b/harness/src/turn-orchestrator/iii.worker.yaml index 5341f13f..4d9be949 100644 --- a/harness/src/turn-orchestrator/iii.worker.yaml +++ b/harness/src/turn-orchestrator/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-turn-orchestrator -description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, steering, and tearing-down. +description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, and steering. runtime: kind: node @@ -15,6 +15,5 @@ scripts: dependencies: session: "^0.2.0" - hook-fanout: "^0.2.0" provider-anthropic: "^0.2.0" provider-openai: "^0.2.0" diff --git a/harness/src/turn-orchestrator/main.ts b/harness/src/turn-orchestrator/main.ts index 983cf93c..9880794f 100644 --- a/harness/src/turn-orchestrator/main.ts +++ b/harness/src/turn-orchestrator/main.ts @@ -5,6 +5,6 @@ import { register } from './register.js'; await bootstrapWorker({ name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, steering, tearing-down.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', register: (iii, ctx) => register(iii, ctx), }); diff --git a/harness/src/turn-orchestrator/on-abort-signal.ts b/harness/src/turn-orchestrator/on-abort-signal.ts deleted file mode 100644 index a5d2788b..00000000 --- a/harness/src/turn-orchestrator/on-abort-signal.ts +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Reactive abort wake. A `state` trigger on `scope: 'agent'` filtered by - * the abort_signal key shape (`session//abort_signal`) and a - * `new_value === true` write fires this adapter, which publishes - * `turn::{state}` on the durable FIFO queue so the orchestrator's FSM advances to - * `steering_check` and observes the abort flag promptly. - * - * Without this wake, a session mid-streaming would only check - * `abort_signal` after the current step completes naturally. The reactive - * trigger doesn't preempt the running step (durable subscriber publishes - * queue), but it guarantees the orchestrator runs another FSM step as - * soon as the current one finishes — which is the earliest moment we - * can safely react. - * - * **Incoming**: agent-scope `state:created` / `state:updated` on - * `session//abort_signal` with `new_value === true` (from `state::set` via - * `performAbortSideEffects` / `router::abort`). Same envelope the engine passes - * to state trigger adapters. - * - * **Outgoing**: `wakeFromRecord` enqueues `{ session_id }` on the `turn-step` queue. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { AbortSignalWriteEventSchema, type ParsedAbortSignalWrite } from './schemas.js'; -import { wakeFromRecord } from './wake.js'; - -export function parseAbortSignalWrite(event: unknown): ParsedAbortSignalWrite | null { - const result = AbortSignalWriteEventSchema.safeParse(event); - return result.success ? result.data : null; -} - -export function isAbortSignalWrite(event: unknown): boolean { - return parseAbortSignalWrite(event) !== null; -} - -export async function execute(iii: ISdk, write: ParsedAbortSignalWrite): Promise { - try { - await wakeFromRecord(iii, write.session_id); - } catch (err) { - logger.warn('turn::on_abort_signal: wake failed', { - session_id: write.session_id, - err: String(err), - }); - } -} - -export async function handleAbortSignalWrite(iii: ISdk, event: unknown): Promise { - const write = parseAbortSignalWrite(event); - if (!write) return; - await execute(iii, write); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::is_abort_signal_set', - async (event: unknown) => isAbortSignalWrite(event), - { - description: - 'Condition: state event sets session//abort_signal = true (state:created or state:updated).', - }, - ); - - iii.registerFunction( - 'turn::on_abort_signal', - async (event: unknown) => handleAbortSignalWrite(iii, event), - { - description: - 'State trigger adapter on scope=agent for abort_signal writes; enqueues turn::{state} so the orchestrator picks up the abort promptly.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: 'turn::on_abort_signal', - config: { - scope: 'agent', - condition_function_id: 'turn::is_abort_signal_set', - }, - }); -} diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts deleted file mode 100644 index 9a1c785e..00000000 --- a/harness/src/turn-orchestrator/persistence.ts +++ /dev/null @@ -1,281 +0,0 @@ -/** - * State load/save helpers. Mirrors `turn-orchestrator/src/persistence.rs`. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import type { AgentMessage } from '../types/agent-message.js'; -import type { FunctionCall, FunctionResult } from '../types/function.js'; -import { type RunRequest, parseRunRequest } from './run-request.js'; -import { - type TurnStateRecord, - functionSchemasKey, - lastSessionTreeLenKey, - messagesKey, - runRequestKey, - turnStateKey, -} from './state.js'; -import { emitTurnStateChanged } from './turn-state-write.js'; -import { shouldWakeStep, wakeState } from './wake.js'; - -const SCOPE = 'agent'; - -async function stateGet(iii: ISdk, key: string): Promise { - try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope: SCOPE, key }, - }); - return v === null || v === undefined ? null : v; - } catch (err) { - logger.warn('persistence state::get failed', { key, err: String(err) }); - return null; - } -} - -async function stateSet(iii: ISdk, key: string, value: unknown): Promise { - try { - await iii.trigger({ - function_id: 'state::set', - payload: { scope: SCOPE, key, value }, - }); - } catch (err) { - logger.warn('persistence state::set failed', { key, err: String(err) }); - } -} - -export async function loadRecord(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, turnStateKey(session_id)); - if (!v || typeof v !== 'object') return null; - return v as TurnStateRecord; -} - -/** - * Persist turn_state and emit UI event — no FSM wake (mid-handler saves). - * Pass `previous` (the pre-write record) to skip the `state::get` that would - * otherwise re-read it; omit it and the prior value is loaded here. - */ -export async function persistRecord( - iii: ISdk, - rec: TurnStateRecord, - previous?: TurnStateRecord | null, -): Promise { - const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); - const eventType = prev === null ? 'state:created' : 'state:updated'; - - await stateSet(iii, turnStateKey(rec.session_id), rec); - - await emitTurnStateChanged( - iii, - rec.session_id, - eventType, - rec as unknown as Record, - prev !== null ? (prev as unknown as Record) : undefined, - ); -} - -export async function saveRecord( - iii: ISdk, - rec: TurnStateRecord, - previous?: TurnStateRecord | null, -): Promise { - const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); - await persistRecord(iii, rec, prev); - - if (shouldWakeStep(prev?.state ?? null, rec.state)) { - await wakeState(iii, rec.session_id, rec.state); - } -} - -export async function loadMessages(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, messagesKey(session_id)); - return Array.isArray(v) ? (v as AgentMessage[]) : []; -} - -export async function saveMessages( - iii: ISdk, - session_id: string, - messages: AgentMessage[], -): Promise { - await stateSet(iii, messagesKey(session_id), messages); - await mirrorMessagesToSessionTree(iii, session_id, messages); -} - -async function mirrorMessagesToSessionTree( - iii: ISdk, - session_id: string, - messages: AgentMessage[], -): Promise { - const lastKey = lastSessionTreeLenKey(session_id); - const last = await stateGet(iii, lastKey); - const alreadyMirrored = typeof last === 'number' ? last : 0; - if (messages.length <= alreadyMirrored) return; - if (alreadyMirrored === 0) { - try { - await iii.trigger({ - function_id: 'session-tree::ensure', - payload: { session_id }, - }); - } catch (err) { - logger.warn('session-tree::ensure failed; mirror skipped', { - session_id, - err: String(err), - }); - return; - } - } - let lastAppended: string | null = null; - if (alreadyMirrored > 0) { - try { - const resp = await iii.trigger }>({ - function_id: 'session-tree::messages', - payload: { session_id }, - }); - const arr = resp?.messages; - if (Array.isArray(arr) && arr.length > 0) { - const tail = arr[arr.length - 1]; - lastAppended = tail?.entry_id ?? null; - } - } catch (err) { - logger.warn('session-tree::messages read failed mid-mirror; skipping', { - session_id, - err: String(err), - }); - return; - } - } - for (const msg of messages.slice(alreadyMirrored)) { - try { - const resp = await iii.trigger({ - function_id: 'session-tree::append', - payload: { session_id, parent_id: lastAppended, message: msg }, - }); - lastAppended = resp?.entry_id ?? lastAppended; - } catch (err) { - logger.warn('session-tree::append mirror failed', { session_id, err: String(err) }); - return; - } - } - await stateSet(iii, lastKey, messages.length); -} - -export async function saveRunRequest( - iii: ISdk, - session_id: string, - request: unknown, -): Promise { - await stateSet(iii, runRequestKey(session_id), request); -} - -export async function loadRunRequest(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, runRequestKey(session_id)); - return parseRunRequest(v && typeof v === 'object' ? (v as Record) : {}); -} - -export async function saveFunctionSchemas( - iii: ISdk, - session_id: string, - schemas: unknown, -): Promise { - await stateSet(iii, functionSchemasKey(session_id), schemas); -} - -export async function loadFunctionSchemas(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, functionSchemasKey(session_id)); - return Array.isArray(v) ? v : []; -} - -const PREPARED_KEY = 'function_prepared'; -const EXECUTED_KEY = 'function_executed'; - -const stagingKey = (sid: string, suffix: string) => `session/${sid}/${suffix}`; - -async function stagingGet(iii: ISdk, session_id: string, suffix: string): Promise { - const v = await stateGet(iii, stagingKey(session_id, suffix)); - return Array.isArray(v) ? v : []; -} - -export type PreparedEntry = { - function_call: FunctionCall; - blocked: FunctionResult | null; - pre_approved?: boolean; -}; -export type ExecutedEntry = { - function_call: FunctionCall; - result: FunctionResult; - is_error: boolean; - /** Wall-clock ms between the matching function_execution_start and end. - * Persisted so resumed runs replay the original timing instead of the - * ~0ms it takes to re-emit the end event. Defaults to 0 in - * loadExecutedCalls so records persisted by an older binary survive - * the upgrade. */ - duration_ms: number; -}; - -export async function savePreparedCalls( - iii: ISdk, - session_id: string, - prepared: PreparedEntry[], -): Promise { - const payload = prepared.map((e) => ({ - function_call: e.function_call, - blocked: e.blocked, - pre_approved: e.pre_approved ?? false, - })); - await stateSet(iii, stagingKey(session_id, PREPARED_KEY), payload); -} - -export async function loadPreparedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGet(iii, session_id, PREPARED_KEY); - const out: PreparedEntry[] = []; - for (const it of items) { - if (!it || typeof it !== 'object') continue; - const obj = it as Record; - const fc = obj.function_call as FunctionCall | undefined; - if (!fc) continue; - const blocked = (obj.blocked as FunctionResult | null) ?? null; - const pre_approved = obj.pre_approved === true; - out.push({ function_call: fc, blocked, pre_approved }); - } - return out; -} - -export async function saveExecutedCalls( - iii: ISdk, - session_id: string, - executed: ExecutedEntry[], -): Promise { - await stateSet(iii, stagingKey(session_id, EXECUTED_KEY), executed); -} - -export async function loadExecutedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGet(iii, session_id, EXECUTED_KEY); - const out: ExecutedEntry[] = []; - for (const it of items) { - if (!it || typeof it !== 'object') continue; - const obj = it as Record; - const fc = obj.function_call as FunctionCall | undefined; - const result = obj.result as FunctionResult | undefined; - if (!fc || !result) continue; - out.push({ - function_call: fc, - result, - is_error: typeof obj.is_error === 'boolean' ? obj.is_error : false, - duration_ms: typeof obj.duration_ms === 'number' ? obj.duration_ms : 0, - }); - } - return out; -} - -export function findExecutedCall( - executed: ExecutedEntry[], - function_call_id: string, -): ExecutedEntry | undefined { - return executed.find((e) => e.function_call.id === function_call_id); -} - -export function upsertExecutedCall(executed: ExecutedEntry[], entry: ExecutedEntry): void { - const idx = executed.findIndex((e) => e.function_call.id === entry.function_call.id); - if (idx >= 0) executed[idx] = entry; - else executed.push(entry); -} diff --git a/harness/src/turn-orchestrator/preflight.ts b/harness/src/turn-orchestrator/preflight.ts index 17b0032c..5b5124f8 100644 --- a/harness/src/turn-orchestrator/preflight.ts +++ b/harness/src/turn-orchestrator/preflight.ts @@ -12,7 +12,13 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; import { CompactionBusyError, ContextOverflowError } from './errors.js'; -import { estimateMessages } from './estimate.js'; + +/** Cheap chars/4 token estimate — same heuristic as context-compaction's estimateTokenCount. */ +function estimateMessages(messages: AgentMessage[]): number { + let chars = 0; + for (const m of messages) chars += JSON.stringify(m).length; + return Math.floor(chars / 4); +} function findLastUserEntryId( entries: Array<{ entry_id?: string; message?: { role?: string } }>, diff --git a/harness/src/turn-orchestrator/provider-stream.ts b/harness/src/turn-orchestrator/provider-stream.ts new file mode 100644 index 00000000..95677c3a --- /dev/null +++ b/harness/src/turn-orchestrator/provider-stream.ts @@ -0,0 +1,152 @@ +/** + * Provider streaming. Turns an iii stream channel plus the provider trigger into + * a single final `AssistantMessage`, hiding the pull-based message pump behind an + * async iterator. + * + * `streamProviderTurn` owns channel creation, the concurrent provider trigger, + * and the read loop. The caller supplies how to build the provider input (it + * needs the channel's writer ref) and a per-delta callback used to emit UI + * `message_update` events. + */ + +import type { ISdk, StreamChannelRef } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import type { AssistantMessage } from '../types/agent-message.js'; +import type { ProviderStreamInput } from '../types/provider.js'; +import type { AssistantMessageEvent } from '../types/stream-event.js'; + +const PROVIDER_STREAM_TIMEOUT_MS = 300_000; + +type Channel = Awaited>; + +/** + * Bridges a push callback (`channel.reader.onMessage`) to async iteration. + * `push` buffers a message and wakes a pending `drain`; `end` terminates the + * iterator once the buffer is empty. + */ +class MessagePump { + private readonly items: string[] = []; + private wake: (() => void) | null = null; + private ended = false; + + push(item: string): void { + this.items.push(item); + this.signal(); + } + + end(): void { + this.ended = true; + this.signal(); + } + + private signal(): void { + if (this.wake) { + const wake = this.wake; + this.wake = null; + wake(); + } + } + + async *drain(): AsyncGenerator { + while (true) { + while (this.items.length > 0) { + const item = this.items.shift(); + if (item !== undefined) yield item; + } + if (this.ended) return; + await new Promise((resolve) => { + this.wake = resolve; + }); + } + } +} + +/** Outcome of a provider turn: the final message, or the reason none arrived. */ +export type ProviderTurnResult = { + final: AssistantMessage | null; + /** Set when the provider trigger threw; null when the channel just closed. */ + error: string | null; +}; + +/** Strip iii invocation-error prefixes so the surfaced message reads cleanly. */ +export function formatProviderError(err: unknown): string { + const raw = err instanceof Error ? err.message : String(err); + return raw + .replace(/^IIIInvocationError:\s*/i, '') + .replace(/^invocation_failed:\s*/i, '') + .trim(); +} + +/** The assistant message a stream event carries (final for done/error, else the partial). */ +function eventMessage(ev: AssistantMessageEvent): AssistantMessage | null { + if (ev.type === 'done') return ev.message; + if (ev.type === 'error') return ev.error; + if ('partial' in ev) return ev.partial; + return null; +} + +function parseEvent(text: string, session_id: string): AssistantMessageEvent | null { + try { + return JSON.parse(text) as AssistantMessageEvent; + } catch (err) { + logger.warn('decode AssistantMessageEvent failed', { session_id, err: String(err) }); + return null; + } +} + +export async function streamProviderTurn( + iii: ISdk, + params: { + session_id: string; + targetFn: string; + buildInput: (writerRef: StreamChannelRef) => ProviderStreamInput; + onDelta: (partial: AssistantMessage, event: AssistantMessageEvent) => Promise; + }, +): Promise { + let channel: Channel; + try { + channel = await iii.createChannel(); + } catch (err) { + logger.warn('createChannel failed; falling back to synthetic error', { err: String(err) }); + return { final: null, error: `create_channel failed: ${String(err)}` }; + } + + const pump = new MessagePump(); + channel.reader.onMessage((msg: string) => pump.push(msg)); + channel.reader.stream.resume(); + + let error: string | null = null; + const triggerPromise = iii + .trigger({ + function_id: params.targetFn, + payload: params.buildInput(channel.writerRef as StreamChannelRef), + timeoutMs: PROVIDER_STREAM_TIMEOUT_MS, + }) + .catch((err) => { + logger.warn('provider stream trigger failed', { + targetFn: params.targetFn, + err: String(err), + }); + error = formatProviderError(err); + pump.end(); + return null; + }); + + let final: AssistantMessage | null = null; + for await (const text of pump.drain()) { + const event = parseEvent(text, params.session_id); + if (!event) continue; + if (event.type === 'done' || event.type === 'error') { + final = eventMessage(event); + break; + } + const partial = eventMessage(event); + if (partial) { + final = partial; + await params.onDelta(partial, event); + } + } + pump.end(); + await triggerPromise; + return { final, error }; +} diff --git a/harness/src/turn-orchestrator/provisioning/load-skills.ts b/harness/src/turn-orchestrator/provisioning/load-skills.ts new file mode 100644 index 00000000..1b0fe51b --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/load-skills.ts @@ -0,0 +1,18 @@ +/** + * Load default skill bodies via provisioning ports. + */ + +import { defaultSkillBody, skillIdFromUri, type DefaultSkillBody } from '../system-prompt.js'; +import type { ProvisioningPorts } from './ports.js'; + +export async function loadDefaultSkillBodies( + ports: Pick, + uris: readonly string[], +): Promise { + const bodies: DefaultSkillBody[] = []; + for (const uri of uris) { + const body = await ports.fetchSkillBody(skillIdFromUri(uri)); + bodies.push(defaultSkillBody(uri, body)); + } + return bodies; +} diff --git a/harness/src/turn-orchestrator/provisioning/ports.ts b/harness/src/turn-orchestrator/provisioning/ports.ts new file mode 100644 index 00000000..e9edd7d5 --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/ports.ts @@ -0,0 +1,74 @@ +/** + * Typed dependency ports for provisioning. + */ + +import { logger } from '../../runtime/otel.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { TurnOrchestratorConfig } from '../config.js'; +import type { RunRequest } from '../run-request.js'; +import { createTurnStore } from '../state-runtime/store.js'; + +const FETCH_TIMEOUT_MS = 10_000; + +/** Decode directory skill responses from iii trigger payloads. */ +export function parseDirectoryBody(resp: unknown): string | null { + if (typeof resp === 'string') return resp; + if (resp && typeof resp === 'object') { + const body = (resp as { body?: unknown }).body; + if (typeof body === 'string') return body; + } + return null; +} + +export type ProvisioningPorts = { + defaultSkillUris: readonly string[]; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; + fetchSkillsIndex(): Promise; + fetchSkillBody(id: string): Promise; +}; + +export function createProvisioningPorts(iii: ISdk, cfg: TurnOrchestratorConfig): ProvisioningPorts { + const store = createTurnStore(iii); + + return { + defaultSkillUris: cfg.system_default_skills, + + loadRunRequest(session_id) { + return store.loadRunRequest(session_id); + }, + + saveRunRequest(session_id, request) { + return store.saveRunRequest(session_id, request); + }, + + async fetchSkillsIndex() { + try { + const resp = await iii.trigger({ + function_id: 'directory::skills::index', + payload: {}, + timeoutMs: FETCH_TIMEOUT_MS, + }); + const body = parseDirectoryBody(resp); + return body && body.length > 0 ? body : null; + } catch (err) { + logger.warn('directory::skills::index failed', { err: String(err) }); + return null; + } + }, + + async fetchSkillBody(id) { + try { + const resp = await iii.trigger({ + function_id: 'directory::skills::get', + payload: { id }, + timeoutMs: FETCH_TIMEOUT_MS, + }); + return parseDirectoryBody(resp); + } catch (err) { + logger.warn('directory::skills::get failed', { id, err: String(err) }); + return null; + } + }, + }; +} diff --git a/harness/src/turn-orchestrator/provisioning/process.ts b/harness/src/turn-orchestrator/provisioning/process.ts new file mode 100644 index 00000000..566e6f70 --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/process.ts @@ -0,0 +1,88 @@ +/** + * Load run request, fetch skills, build the provisioned RunRequest, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { agentTriggerTool } from '../agent-trigger.js'; +import type { TurnOrchestratorConfig } from '../config.js'; +import { runTransition } from '../run-transition.js'; +import type { RunRequest } from '../run-request.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { buildSystemPrompt } from '../system-prompt.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { loadDefaultSkillBodies } from './load-skills.js'; +import { createProvisioningPorts, type ProvisioningPorts } from './ports.js'; + +export type ProvisioningOutcome = { + kind: 'ready'; + runRequest: RunRequest; +}; + +export async function processProvisioning( + ports: ProvisioningPorts, + rec: TurnStateRecord, +): Promise { + const request = await ports.loadRunRequest(rec.session_id); + + const override = request.system_prompt.length > 0 ? request.system_prompt : null; + + const [skillsIndex, bodies] = await Promise.all([ + ports.fetchSkillsIndex(), + loadDefaultSkillBodies(ports, ports.defaultSkillUris), + ]); + const prompt = buildSystemPrompt(bodies, { override, mode: request.mode, skillsIndex }); + + return { + kind: 'ready', + runRequest: { + ...request, + system_prompt: prompt, + function_schemas: [agentTriggerTool()], + }, + }; +} + +export async function applyProvisioningOutcome( + ports: ProvisioningPorts, + rec: TurnStateRecord, + outcome: ProvisioningOutcome, +): Promise { + await ports.saveRunRequest(rec.session_id, outcome.runRequest); + transitionTo(rec, 'assistant_streaming'); +} + +export async function runProvisioning( + ports: ProvisioningPorts, + rec: TurnStateRecord, +): Promise { + const outcome = await processProvisioning(ports, rec); + await applyProvisioningOutcome(ports, rec, outcome); +} + +export async function handleProvisioning( + iii: ISdk, + cfg: TurnOrchestratorConfig, + rec: TurnStateRecord, +): Promise { + const ports = createProvisioningPorts(iii, cfg); + await runProvisioning(ports, rec); +} + +export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { + iii.registerFunction( + 'turn::provisioning', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition( + iii, + 'provisioning', + (i, rec) => handleProvisioning(i, cfg, rec), + parsed, + ); + }, + { + description: + 'Run one durable FSM transition for session in state provisioning: build the system prompt, attach the agent_trigger function schema, advance to assistant_streaming.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index de49c286..1d6a4f48 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -2,19 +2,13 @@ import { loadConfig } from '../runtime/config.js'; import type { ISdk } from '../runtime/iii.js'; import * as bootstrap from './bootstrap.js'; import { loadOrchestratorConfig } from './config.js'; +import { register as registerAssistantStreaming } from './assistant-streaming/process.js'; +import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; +import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; -import { register as registerOnAbortSignal } from './on-abort-signal.js'; import { register as registerRunStart } from './run-start.js'; -import { recoverPendingApprovals } from './approval-resume.js'; -import { - registerAssistantFinished, - registerAssistantStreaming, - registerFunctionAwaitingApproval, - registerFunctionExecute, - registerProvisioning, - registerSteeringCheck, - registerTearingDown, -} from './states/index.js'; +import { register as registerProvisioning } from './provisioning/process.js'; +import { register as registerSteeringCheck } from './steering-check/process.js'; export async function register(iii: ISdk, ctx: { configPath: string }): Promise { const cfg = await loadConfig(ctx.configPath); @@ -22,14 +16,10 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerRunStart(iii); registerProvisioning(iii, orchestratorCfg); registerAssistantStreaming(iii); - registerAssistantFinished(iii); registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); - registerTearingDown(iii); - await recoverPendingApprovals(iii); registerGetState(iii); - registerOnAbortSignal(iii); void bootstrap.run(iii, orchestratorCfg); } diff --git a/harness/src/turn-orchestrator/run-request.ts b/harness/src/turn-orchestrator/run-request.ts index 23d8ca5a..bcc820c2 100644 --- a/harness/src/turn-orchestrator/run-request.ts +++ b/harness/src/turn-orchestrator/run-request.ts @@ -1,28 +1,25 @@ /** * The persisted run request and its single typed parser. `loadRunRequest` - * (persistence) parses the raw `session//run_request` value through + * (persistence) parses the raw scope `run_request` value through * `parseRunRequest` once, so every consumer reads a fully-typed `RunRequest` * instead of re-guarding `unknown` fields. */ +import { z } from 'zod'; import type { Mode } from './system-prompt.js'; -export type RunRequest = { - provider: string; - model: string; - mode: Mode | null; - system_prompt: string; -}; +const RunRequestSchema = z.object({ + provider: z.string().catch(''), + model: z.string().catch(''), + mode: z + .unknown() + .transform((v): Mode | null => (v === 'plan' || v === 'ask' || v === 'agent' ? v : null)), + system_prompt: z.string().catch(''), + function_schemas: z.array(z.unknown()).catch([]), +}); -function parseMode(value: unknown): Mode | null { - return value === 'plan' || value === 'ask' || value === 'agent' ? value : null; -} +export type RunRequest = z.infer; -export function parseRunRequest(raw: Record): RunRequest { - return { - provider: typeof raw.provider === 'string' ? raw.provider : '', - model: typeof raw.model === 'string' ? raw.model : '', - mode: parseMode(raw.mode), - system_prompt: typeof raw.system_prompt === 'string' ? raw.system_prompt : '', - }; +export function parseRunRequest(raw: unknown): RunRequest { + return RunRequestSchema.parse(raw ?? {}); } diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index d6112a95..59d9a621 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -1,5 +1,5 @@ /** - * `run::start`. Mirrors `turn-orchestrator/src/run_start.rs`. + * `run::start`. Persist run config + messages and seed the FSM at `provisioning`. * * **Incoming**: flat run request from `harness::trigger` (`body.payload` after * `HarnessTriggerInputSchema` parse); console/web sends @@ -10,21 +10,23 @@ */ import type { ISdk } from '../runtime/iii.js'; -import * as persistence from './persistence.js'; import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; +import { createTurnStore } from './state-runtime/store.js'; import { newRecord } from './state.js'; export async function execute(iii: ISdk, payload: RunStartPayload): Promise { + const store = createTurnStore(iii); const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; - await persistence.saveRunRequest(iii, session_id, { + await store.saveRunRequest(session_id, { ...run, mode: run.mode ?? null, + function_schemas: [], }); - await persistence.saveMessages(iii, session_id, messages); + await store.saveMessages(session_id, messages); const record = newRecord(session_id, max_turns); - await persistence.saveRecord(iii, record); + await store.saveRecord(record); return { session_id }; } diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts index 0f142761..c9827a02 100644 --- a/harness/src/turn-orchestrator/run-transition.ts +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -3,17 +3,20 @@ * same load → null-check → stale-skip → handle → save sequence; this owns it so * each per-state file only contributes its handler. * - * The record loaded here is snapshotted before the handler mutates it and - * threaded into `saveRecord`, so the save path needs no extra `state::get` to - * compute the wake decision or the UI event's `old_value` — one read per - * transition instead of three. + * On an unexpected handler throw the session is routed to the `failed` + * terminal (acked, so the durable queue stops retrying) and the failure is + * surfaced to the UI. A handler may throw `TransientError` to opt into the + * queue's retry/backoff/DLQ instead. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import * as persistence from './persistence.js'; +import { TransientError } from './errors.js'; +import { emit } from './events.js'; import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; -import { type TurnState, type TurnStateRecord, cloneRecord } from './state.js'; +import { createTurnStore } from './state-runtime/store.js'; +import { type TurnState, type TurnStateRecord, transitionTo } from './state.js'; +import { syntheticAssistant } from './synthetic-assistant.js'; export type TransitionHandler = (iii: ISdk, rec: TurnStateRecord) => Promise; @@ -28,26 +31,63 @@ function staleSkipResult(expectedState: TurnState, rec: TurnStateRecord): TurnSt return { ok: true, skipped: true, reason: 'stale' }; } +async function failTransition( + iii: ISdk, + rec: TurnStateRecord, + previous: TurnStateRecord, + from_state: TurnState, + err: unknown, +): Promise { + const store = createTurnStore(iii); + const message = err instanceof Error ? err.message : String(err); + rec.error = { kind: 'transition_error', message: `from ${from_state}: ${message}` }; + transitionTo(rec, 'failed'); + await store.saveRecord(rec, previous); + + // Surface the failure to the live UI (mirrors the graceful error path): + // message_complete{stop_reason:'error'} → the translator emits a `stop-reason` + // event so the user sees WHY; a bare agent_end renders as a silent end. + // (The UI translator reads stop_reason, not error_kind.) + const failed = syntheticAssistant({ stop_reason: 'error', text: rec.error.message }); + await emit(iii, rec.session_id, { + type: 'message_complete', + message: failed, + body_streamed: false, + }); + + const messages = await store.loadMessages(rec.session_id); + await emit(iii, rec.session_id, { type: 'agent_end', messages }); + logger.error('transition failed; session marked failed', { + session_id: rec.session_id, + from_state, + err: message, + }); + return { ok: true, from_state, to_state: 'failed' }; +} + export async function runTransition( iii: ISdk, state: TurnState, handle: TransitionHandler, payload: TurnStepPayload, ): Promise { - const rec = await persistence.loadRecord(iii, payload.session_id); + const store = createTurnStore(iii); + const rec = await store.loadRecord(payload.session_id); if (!rec) { throw new Error(`turn::${state} invariant: missing session ${payload.session_id}`); } const skipped = staleSkipResult(state, rec); if (skipped) return skipped; - const previous = cloneRecord(rec); + // JSON round-trip matches a persisted reload — snapshot before handler mutates. + const previous = JSON.parse(JSON.stringify(rec)) as TurnStateRecord; const from_state = rec.state; try { await handle(iii, rec); } catch (err) { - throw new Error(`transition from ${from_state} failed: ${String(err)}`); + if (err instanceof TransientError) throw err; + return failTransition(iii, rec, previous, from_state, err); } - await persistence.saveRecord(iii, rec, previous); + await store.saveRecord(rec, previous); return { ok: true, from_state, to_state: rec.state }; } diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 4ece3fdc..2f187647 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -6,12 +6,22 @@ */ import { z } from 'zod'; -import type { AgentMessage } from '../types/agent-message.js'; -import type { TurnState, TurnStateRecord } from './state.js'; +import type { AssistantMessage, AgentMessage } from '../types/agent-message.js'; +import { TurnStateInvariantError } from './errors.js'; +import type { FunctionBatchWork } from './function-execute/types.js'; +import { + FUNCTION_BATCH_STATES, + type AssistantStreamingTurnRecord, + type AwaitingApprovalEntry, + type FunctionBatchTurnRecord, + type SteeringCheckTurnRecord, + type TurnState, + type TurnStateRecord, +} from './state.js'; import type { Mode } from './system-prompt.js'; /** Shared `{ session_id }` payload — `turn::{state}` steps and `turn::get_state`. */ -export const SessionIdPayloadSchema = z.object({ +const SessionIdPayloadSchema = z.object({ session_id: z.string().min(1), }); @@ -35,23 +45,147 @@ export type TurnStepResult = | { ok: true; from_state: TurnState; to_state: TurnState } | { ok: true; skipped: true; reason: 'stale' }; +// --- function_execute / function_awaiting_approval persisted record --- +const AwaitingApprovalEntrySchema = z.object({ + function_call_id: z.string().min(1), + function_id: z.string().min(1), + args: z.unknown(), +}); + +const FunctionBatchWorkSchema = z.custom( + (v) => + v != null && + typeof v === 'object' && + Array.isArray((v as FunctionBatchWork).prepared) && + typeof (v as FunctionBatchWork).executed === 'object' && + (v as FunctionBatchWork).executed !== null, + { message: 'work must include prepared and executed' }, +); + +const AssistantMessageSchema = z.custom( + (v) => v != null && typeof v === 'object' && (v as AssistantMessage).role === 'assistant', + { message: 'last_assistant is required' }, +); + +/** Fields required before function_execute / function_awaiting_approval handlers run. */ +export const FunctionBatchTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.enum(FUNCTION_BATCH_STATES), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + last_assistant: AssistantMessageSchema, + work: FunctionBatchWorkSchema, + awaiting_approval: z.array(AwaitingApprovalEntrySchema), + }) + .passthrough(); + +function formatZodIssues(error: z.ZodError): string { + return error.issues.map((issue) => `${issue.path.join('.')}: ${issue.message}`).join('; '); +} + +/** Validate persisted turn_state for function-batch handlers; throws {@link TurnStateInvariantError}. */ +export function parseFunctionBatchRecord(rec: TurnStateRecord): FunctionBatchTurnRecord { + const result = FunctionBatchTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError( + `invalid function batch turn record: ${formatZodIssues(result.error)}`, + ); + } + // Return the same object — handlers mutate turn_state in place before saveRecord. + return rec as FunctionBatchTurnRecord; +} + +/** Fields required before assistant_streaming handlers run. */ +export const AssistantStreamingTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.literal('assistant_streaming'), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + }) + .passthrough(); + +/** Validate persisted turn_state for assistant_streaming; throws {@link TurnStateInvariantError}. */ +export function parseAssistantStreamingRecord(rec: TurnStateRecord): AssistantStreamingTurnRecord { + const result = AssistantStreamingTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError( + `invalid assistant_streaming turn record: ${formatZodIssues(result.error)}`, + ); + } + return rec as AssistantStreamingTurnRecord; +} + +/** Fields required before steering_check handlers run. */ +export const SteeringCheckTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.literal('steering_check'), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + }) + .passthrough(); + +/** Validate persisted turn_state for steering_check; throws {@link TurnStateInvariantError}. */ +export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTurnRecord { + const result = SteeringCheckTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError( + `invalid steering_check turn record: ${formatZodIssues(result.error)}`, + ); + } + return rec as SteeringCheckTurnRecord; +} + // --- turn::get_state --- export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; -export type GetStateResult = TurnStateRecord | null; -// --- turn::is_abort_signal_set / turn::on_abort_signal (agent-scope state event) --- -const AgentAbortSignalWriteEventSchema = z.object({ +/** Lean projection of TurnStateRecord sent to the UI and returned by turn::get_state. + * Excludes heavy internal fields (work, last_assistant) not needed by consumers. */ +export type TurnStateView = { + session_id: string; + state: TurnState; + turn_count: number; + max_turns?: number; + awaiting_approval?: AwaitingApprovalEntry[]; + error?: { kind: string; message: string }; +}; + +export function toView(rec: TurnStateRecord): TurnStateView { + return { + session_id: rec.session_id, + state: rec.state, + turn_count: rec.turn_count, + max_turns: rec.max_turns, + awaiting_approval: rec.awaiting_approval, + error: rec.error, + }; +} + +export type GetStateResult = TurnStateView | null; + +// --- turn::on_approval (approvals-scope state event) --- +const ApprovalDecisionWriteEventSchema = z.object({ type: z.literal('state').optional(), - scope: z.literal('agent').optional(), + scope: z.literal('approvals').optional(), event_type: z.enum(['state:created', 'state:updated']), - key: z.string().regex(/^session\/[^/]+\/abort_signal$/), - new_value: z.literal(true), - old_value: z.union([z.literal(true), z.literal(false), z.null()]).optional(), + key: z.string().regex(/^[^/]+\/[^/]+$/), + new_value: z.object({ decision: z.enum(['allow', 'deny', 'aborted']) }).passthrough(), + old_value: z.unknown().optional(), }); -export const AbortSignalWriteEventSchema = AgentAbortSignalWriteEventSchema.transform((data) => { - const session_id = data.key.slice('session/'.length, -'/abort_signal'.length); +export const ApprovalDecisionEventSchema = ApprovalDecisionWriteEventSchema.transform((data) => { + const session_id = data.key.slice(0, data.key.indexOf('/')); return { session_id }; }); -export type ParsedAbortSignalWrite = z.infer; diff --git a/harness/src/turn-orchestrator/session-tree-mirror.ts b/harness/src/turn-orchestrator/session-tree-mirror.ts new file mode 100644 index 00000000..26c34ee3 --- /dev/null +++ b/harness/src/turn-orchestrator/session-tree-mirror.ts @@ -0,0 +1,73 @@ +/** + * Incrementally mirrors flat agent messages into the session-tree store. + */ + +import { z } from 'zod'; +import { stateGet, stateSet } from '../runtime/state.js'; +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import type { AgentMessage } from '../types/agent-message.js'; + +const SESSION_TREE_MIRROR_LEN_SCOPE = 'session_tree_mirror_len'; + +const MirrorLenSchema = z.coerce.number().int().nonnegative().catch(0); + +export function parseMirrorLen(raw: unknown): number { + return MirrorLenSchema.parse(raw ?? 0); +} + +export async function mirrorMessagesToSessionTree( + iii: ISdk, + session_id: string, + messages: AgentMessage[], +): Promise { + const alreadyMirrored = parseMirrorLen( + await stateGet(iii, SESSION_TREE_MIRROR_LEN_SCOPE, session_id), + ); + if (messages.length <= alreadyMirrored) return; + + if (alreadyMirrored === 0) { + const ensured = await triggerSessionTree(iii, 'session-tree::ensure', { session_id }); + if (!ensured) return; + } + + let lastAppended: string | null = null; + if (alreadyMirrored > 0) { + const resp = await triggerSessionTree<{ messages?: Array<{ entry_id?: string }> }>( + iii, + 'session-tree::messages', + { session_id }, + ); + if (!resp) return; + const tail = resp.messages?.at(-1); + lastAppended = tail?.entry_id ?? null; + } + + for (const msg of messages.slice(alreadyMirrored)) { + const resp = await triggerSessionTree<{ entry_id?: string }>(iii, 'session-tree::append', { + session_id, + parent_id: lastAppended, + message: msg, + }); + if (!resp) return; + lastAppended = resp.entry_id ?? lastAppended; + } + + await stateSet(iii, SESSION_TREE_MIRROR_LEN_SCOPE, session_id, messages.length); +} + +async function triggerSessionTree( + iii: ISdk, + function_id: string, + payload: Record, +): Promise { + try { + return await iii.trigger({ function_id, payload }); + } catch (err) { + logger.warn(`${function_id} failed; session-tree mirror skipped`, { + session_id: payload.session_id, + err: String(err), + }); + return null; + } +} diff --git a/harness/src/turn-orchestrator/state-runtime/ports.ts b/harness/src/turn-orchestrator/state-runtime/ports.ts new file mode 100644 index 00000000..3b47bab2 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/ports.ts @@ -0,0 +1,60 @@ +/** + * Shared dependency ports for turn FSM state handlers. + */ + +import { emit } from '../events.js'; +import type { RunRequest } from '../run-request.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentMessage, FunctionResultMessage } from '../../types/agent-message.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { createTurnStore, type TurnStore } from './store.js'; + +export type TurnStatePorts = { + loadMessages(session_id: string): Promise; + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + checkpoint(rec: TurnStateRecord): Promise; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; + emitTurnEnd( + session_id: string, + message: AgentMessage, + function_results: FunctionResultMessage[], + ): Promise; + finishSession(rec: TurnStateRecord): Promise; +}; + +export function createTurnStatePorts(iii: ISdk, store?: TurnStore): TurnStatePorts { + const s = store ?? createTurnStore(iii); + + return { + loadMessages(session_id) { + return s.loadMessages(session_id); + }, + + appendMessages(session_id, msgs) { + return s.appendMessages(session_id, msgs); + }, + + checkpoint(rec) { + return s.writeRecord(rec); + }, + + loadRunRequest(session_id) { + return s.loadRunRequest(session_id); + }, + + saveRunRequest(session_id, request) { + return s.saveRunRequest(session_id, request); + }, + + async emitTurnEnd(session_id, message, function_results) { + await emit(iii, session_id, { type: 'turn_end', message, function_results }); + }, + + async finishSession(rec) { + const messages = await s.loadMessages(rec.session_id); + await emit(iii, rec.session_id, { type: 'agent_end', messages }); + transitionTo(rec, 'stopped'); + }, + }; +} diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts new file mode 100644 index 00000000..f924c7f2 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -0,0 +1,147 @@ +/** + * Agent-scope turn FSM store. All `state::*` I/O for turn-orchestrator goes + * through `createTurnStore`. + */ + +import { z } from 'zod'; +import { TriggerAction, type ISdk } from '../../runtime/iii.js'; +import { stateGet, stateSet } from '../../runtime/state.js'; +import { logger } from '../../runtime/otel.js'; +import type { AgentMessage } from '../../types/agent-message.js'; +import { MESSAGES_SCOPE, RUN_REQUEST_SCOPE, TURN_STATE_SCOPE } from '../state.js'; +import { emit } from '../events.js'; +import { type RunRequest, parseRunRequest } from '../run-request.js'; +import { toView, type TurnStateView } from '../schemas.js'; +import { mirrorMessagesToSessionTree } from '../session-tree-mirror.js'; +import { type TurnState, type TurnStateRecord, parseTurnStateRecord } from '../state.js'; + +export const TURN_STEP_QUEUE = 'turn-step'; + +const NON_STEPABLE_STATES = new Set(['stopped', 'failed', 'function_awaiting_approval']); + +/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ +export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { + if (NON_STEPABLE_STATES.has(newState)) return false; + if (previousState !== null && previousState === newState) return false; + return true; +} + +async function enqueueTurnStep(iii: ISdk, session_id: string, state: TurnState): Promise { + try { + await iii.trigger({ + function_id: `turn::${state}`, + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + } catch (err) { + logger.warn('wakeStep failed', { session_id, state, err: String(err) }); + } +} + +export type TurnStore = { + loadRecord(session_id: string): Promise; + saveRecord(rec: TurnStateRecord, previous?: TurnStateRecord | null): Promise; + writeRecord(rec: TurnStateRecord): Promise; + loadMessages(session_id: string): Promise; + saveMessages(session_id: string, messages: AgentMessage[]): Promise; + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; +}; + +const FlatMessagesSchema = z + .array(z.custom((v) => v != null && typeof v === 'object')) + .catch([]); + +/** @internal Exported for unit tests. */ +export function parseFlatMessages(raw: unknown): AgentMessage[] { + return FlatMessagesSchema.parse(raw ?? []); +} + +const scopedGet = (iii: ISdk, scope: string, session_id: string) => + stateGet(iii, scope, session_id); +const scopedSet = (iii: ISdk, scope: string, session_id: string, value: unknown) => + stateSet(iii, scope, session_id, value); + +async function emitTurnStateChanged( + iii: ISdk, + session_id: string, + event_type: 'state:created' | 'state:updated', + new_value: TurnStateView, + old_value?: TurnStateView, +): Promise { + try { + await emit(iii, session_id, { + type: 'turn_state_changed', + event_type, + new_value, + ...(old_value !== undefined && { old_value }), + }); + } catch (err) { + logger.warn('emitTurnStateChanged failed', { + session_id, + err: String(err), + }); + } +} + +async function persistRecord( + iii: ISdk, + rec: TurnStateRecord, + previous?: TurnStateRecord | null, +): Promise { + const result = await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); + const prev = previous !== undefined ? previous : parseTurnStateRecord(result?.old_value ?? null); + + await emitTurnStateChanged( + iii, + rec.session_id, + prev == null ? 'state:created' : 'state:updated', + toView(rec), + prev != null ? toView(prev) : undefined, + ); + + return prev; +} + +export function createTurnStore(iii: ISdk): TurnStore { + return { + async loadRecord(session_id) { + return parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); + }, + + async writeRecord(rec) { + await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); + }, + + async saveRecord(rec, previous) { + const prev = await persistRecord(iii, rec, previous); + if (shouldWakeStep(prev?.state ?? null, rec.state)) { + await enqueueTurnStep(iii, rec.session_id, rec.state); + } + }, + + async loadMessages(session_id) { + return parseFlatMessages(await scopedGet(iii, MESSAGES_SCOPE, session_id)); + }, + + async saveMessages(session_id, messages) { + await scopedSet(iii, MESSAGES_SCOPE, session_id, messages); + await mirrorMessagesToSessionTree(iii, session_id, messages); + }, + + async appendMessages(session_id, msgs) { + const messages = parseFlatMessages(await scopedGet(iii, MESSAGES_SCOPE, session_id)); + await scopedSet(iii, MESSAGES_SCOPE, session_id, [...messages, ...msgs]); + await mirrorMessagesToSessionTree(iii, session_id, [...messages, ...msgs]); + }, + + async saveRunRequest(session_id, request) { + await scopedSet(iii, RUN_REQUEST_SCOPE, session_id, request); + }, + + async loadRunRequest(session_id) { + return parseRunRequest(await scopedGet(iii, RUN_REQUEST_SCOPE, session_id)); + }, + }; +} diff --git a/harness/src/turn-orchestrator/state-runtime/transcript.ts b/harness/src/turn-orchestrator/state-runtime/transcript.ts new file mode 100644 index 00000000..bd6b6e1b --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/transcript.ts @@ -0,0 +1,33 @@ +/** + * Shared transcript idempotency helpers for turn FSM handlers. + */ + +import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; + +/** + * Function_call_ids already persisted for the current turn. Results are appended + * right after the assistant that requested them, so they form the trailing run + * of `function_result` messages; the first non-result from the tail is the turn + * boundary. + */ +export function persistedTrailingResultIds(messages: AgentMessage[]): Set { + const ids = new Set(); + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]; + if (m?.role === 'function_result') ids.add(m.function_call_id); + else break; + } + return ids; +} + +/** True when the trailing assistant message matches the candidate (re-entry dup). */ +export function isDuplicateAssistant(messages: AgentMessage[], asst: AssistantMessage): boolean { + const last = messages[messages.length - 1]; + return ( + last !== undefined && + last.role === 'assistant' && + last.timestamp === asst.timestamp && + last.model === asst.model && + last.provider === asst.provider + ); +} diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts new file mode 100644 index 00000000..d2be3af2 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -0,0 +1,35 @@ +/** + * Shared turn-end and FSM resume helpers for step outcome application. + */ + +import { + emptyAssistant, + type AssistantMessage, + type FunctionResultMessage, +} from '../../types/agent-message.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; + +export type TurnEndEmitter = { + emitTurnEnd( + session_id: string, + message: AssistantMessage, + function_results: FunctionResultMessage[], + ): Promise; +}; + +export async function emitTurnEndOnce( + ports: TurnEndEmitter, + rec: TurnStateRecord, + message?: AssistantMessage, + function_results: FunctionResultMessage[] = [], +): Promise { + if (rec.turn_end_emitted) return; + const last = message ?? rec.last_assistant ?? emptyAssistant(); + await ports.emitTurnEnd(rec.session_id, last, function_results); + rec.turn_end_emitted = true; +} + +export function resumeToAssistantStreaming(rec: TurnStateRecord): void { + rec.function_results = []; + transitionTo(rec, 'assistant_streaming'); +} diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 5d080693..f8fc6995 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -1,20 +1,30 @@ /** - * TurnState + TurnStateRecord + state-key helpers. Mirrors - * `turn-orchestrator/src/state.rs`. + * TurnState + TurnStateRecord types and parsers. + * + * Persistence uses semantic iii scopes (`turn_state`, `messages`, `run_request`, …) + * keyed by `session_id`. Recovery lists scope `turn_state` via {@link parseTurnStateRecord}. */ +import { z } from 'zod'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; -import type { FunctionCall } from '../types/function.js'; +import type { ExecutedCall, FunctionBatchWork, PreparedCall } from './function-execute/types.js'; + +/** Shared iii scope names for turn-orchestrator persistence (key = session_id). */ +export const TURN_STATE_SCOPE = 'turn_state'; +export const MESSAGES_SCOPE = 'messages'; +export const RUN_REQUEST_SCOPE = 'run_request'; export type TurnState = | 'provisioning' | 'assistant_streaming' - | 'assistant_finished' | 'function_execute' | 'function_awaiting_approval' | 'steering_check' - | 'tearing_down' - | 'stopped'; + | 'stopped' + | 'failed'; + +export const FUNCTION_BATCH_STATES = ['function_execute', 'function_awaiting_approval'] as const; +export type FunctionBatchState = (typeof FUNCTION_BATCH_STATES)[number]; export type AwaitingApprovalEntry = { function_call_id: string; @@ -22,22 +32,92 @@ export type AwaitingApprovalEntry = { args: unknown; }; -export type TurnStateRecord = { +/** Durable mid-batch work for function_execute. */ +export type TurnWork = FunctionBatchWork; + +export type { ExecutedCall, FunctionBatchWork, PreparedCall }; + +type TurnStateRecordCore = { session_id: string; - state: TurnState; turn_count: number; max_turns?: number; - last_assistant?: AssistantMessage | null; - pending_function_calls: FunctionCall[]; function_results: FunctionResultMessage[]; turn_end_emitted: boolean; started_at_ms: number; updated_at_ms: number; - awaiting_approval?: AwaitingApprovalEntry[]; /** Set during assistant_streaming when message_update deltas were emitted. */ assistant_body_streamed?: boolean; + error?: { kind: string; message: string }; +}; + +/** Required fields while in function_execute or function_awaiting_approval. */ +export type FunctionBatchTurnRecord = TurnStateRecordCore & { + state: FunctionBatchState; + last_assistant: AssistantMessage; + work: TurnWork; + awaiting_approval: AwaitingApprovalEntry[]; +}; + +/** Persisted shape while in assistant_streaming (last_assistant set mid-handler). */ +export type AssistantStreamingTurnRecord = TurnStateRecordCore & { + state: 'assistant_streaming'; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; +}; + +/** Persisted shape while in steering_check (work cleared on entry from function batch). */ +export type SteeringCheckTurnRecord = TurnStateRecordCore & { + state: 'steering_check'; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; }; +type OtherTurnState = Exclude< + TurnState, + FunctionBatchState | 'assistant_streaming' | 'steering_check' +>; + +export type TurnStateRecord = + | FunctionBatchTurnRecord + | AssistantStreamingTurnRecord + | SteeringCheckTurnRecord + | (TurnStateRecordCore & { + state: OtherTurnState; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; + }); + +const TURN_STATES = [ + 'provisioning', + 'assistant_streaming', + 'function_execute', + 'function_awaiting_approval', + 'steering_check', + 'stopped', + 'failed', +] as const satisfies readonly TurnState[]; + +/** Minimal structural guard for persisted turn_state — nested fields pass through. */ +const TurnStateRecordSchema = z + .object({ + session_id: z.string(), + state: z.enum(TURN_STATES), + turn_count: z.number().catch(0), + function_results: z.array(z.unknown()).catch([]), + turn_end_emitted: z.boolean().catch(false), + started_at_ms: z.number().catch(0), + updated_at_ms: z.number().catch(0), + }) + .passthrough(); + +export function parseTurnStateRecord(raw: unknown): TurnStateRecord | null { + const result = TurnStateRecordSchema.safeParse(raw); + return result.success ? (result.data as TurnStateRecord) : null; +} + export function newRecord(session_id: string, max_turns?: number): TurnStateRecord { const now = Date.now(); return { @@ -46,7 +126,6 @@ export function newRecord(session_id: string, max_turns?: number): TurnStateReco turn_count: 0, max_turns, last_assistant: null, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: now, @@ -58,29 +137,3 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.state = next; rec.updated_at_ms = Date.now(); } - -/** - * Deep copy of a record via JSON round-trip — faithful to a `state::get` - * reload (the record is persisted as JSON), so the runner can snapshot the - * pre-mutation record and thread it into `saveRecord` instead of paying a - * second `state::get` to recover the previous state. - */ -export function cloneRecord(rec: TurnStateRecord): TurnStateRecord { - return JSON.parse(JSON.stringify(rec)) as TurnStateRecord; -} - -export function isTerminal(rec: TurnStateRecord): boolean { - return rec.state === 'stopped'; -} - -export function turnFnId(state: TurnState): string { - return `turn::${state}`; -} - -export const messagesKey = (sid: string) => `session/${sid}/messages`; -export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; -export const runRequestKey = (sid: string) => `session/${sid}/run_request`; -export const functionSchemasKey = (sid: string) => `session/${sid}/function_schemas`; -export const lastSessionTreeLenKey = (sid: string) => `session/${sid}/session_tree_mirror_len`; -export const eventCounterKey = (sid: string) => `session/${sid}/event_counter`; -export const abortSignalKey = (sid: string) => `session/${sid}/abort_signal`; diff --git a/harness/src/turn-orchestrator/states/assistant-finished.ts b/harness/src/turn-orchestrator/states/assistant-finished.ts deleted file mode 100644 index d3b59f5a..00000000 --- a/harness/src/turn-orchestrator/states/assistant-finished.ts +++ /dev/null @@ -1,122 +0,0 @@ -/** - * `turn::assistant_finished`. Persist assistant message and route to steering or function execute. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { AssistantMessage } from '../../types/agent-message.js'; -import type { FunctionCall } from '../../types/function.js'; -import { missingFunctionResult, unwrapAgentTrigger } from '../agent-trigger.js'; -import { emit } from '../events.js'; -import type { PreparedEntry } from '../persistence.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { - const out: FunctionCall[] = []; - for (const b of msg.content) { - if (b.type === 'function_call') { - out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); - } - } - return out; -} - -function assistantMessageComplete(asst: AssistantMessage, body_streamed: boolean): AgentEvent { - return { type: 'message_complete', message: asst, body_streamed }; -} - -export async function handleFinished(iii: ISdk, rec: TurnStateRecord): Promise { - const asst = rec.last_assistant; - if (!asst) { - throw new Error('assistant_finished without last_assistant'); - } - await emit( - iii, - rec.session_id, - assistantMessageComplete(asst, rec.assistant_body_streamed === true), - ); - const isErrorOrAborted = asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; - // Error/aborted assistant messages (e.g. provider auth failures, - // network blips, user aborts) are surfaced to the UI via the - // message_complete emitted above, but we deliberately - // keep them out of the session's persisted message history so the - // LLM's next-turn context doesn't accumulate transient infra noise. - if (!isErrorOrAborted) { - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinished can re-enter (durable trigger - // retry, crash before transitionTo persists). Without this guard a - // second run pushes the SAME assistant message again. If that - // assistant has tool_calls, Anthropic rejects the next request with: - // "each tool_use must have a unique id". - // Detect by comparing timestamp + content shape against the last - // assistant message in flat-state; skip the push when they match. - const last = messages[messages.length - 1]; - const alreadyPersisted = - last && - last.role === 'assistant' && - last.timestamp === asst.timestamp && - last.model === asst.model && - last.provider === asst.provider; - if (alreadyPersisted) { - logger.warn('handleFinished: skipping duplicate assistant push (re-entry detected)', { - session_id: rec.session_id, - timestamp: asst.timestamp, - }); - } else { - messages.push(asst); - await persistence.saveMessages(iii, rec.session_id, messages); - } - } - - if (isErrorOrAborted) { - await emit(iii, rec.session_id, { - type: 'turn_end', - message: asst, - function_results: [], - }); - rec.turn_end_emitted = true; - transitionTo(rec, 'tearing_down'); - return; - } - const calls = extractFunctionCalls(asst); - if (calls.length === 0) { - transitionTo(rec, 'steering_check'); - return; - } - - rec.function_results = []; - rec.pending_function_calls = calls.map(unwrapAgentTrigger); - - const prepared: PreparedEntry[] = calls.map((raw) => { - const function_call = unwrapAgentTrigger(raw); - if (!function_call.function_id) { - return { function_call, blocked: missingFunctionResult() }; - } - return { function_call, blocked: null }; - }); - - await persistence.saveExecutedCalls(iii, rec.session_id, []); - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - transitionTo(rec, 'function_execute'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::assistant_finished', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'assistant_finished', handleFinished, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state assistant_finished: finalize assistant and route onward.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts deleted file mode 100644 index 35f3b912..00000000 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ /dev/null @@ -1,240 +0,0 @@ -/** - * `turn::assistant_streaming`. Start turn, stream provider response, advance to finished. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk, StreamChannelRef } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AssistantMessage } from '../../types/agent-message.js'; -import type { AgentFunction } from '../../types/function.js'; -import type { ProviderStreamInput } from '../../types/provider.js'; -import type { AssistantMessageEvent } from '../../types/stream-event.js'; -import { emit } from '../events.js'; -import * as persistence from '../persistence.js'; -import { runPreflight } from '../preflight.js'; -import { buildInput, decide, targetFunctionId } from '../provider-router.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function eventPartial(ev: AssistantMessageEvent): AssistantMessage | null { - if ('partial' in ev) return ev.partial; - if (ev.type === 'done') return ev.message; - if (ev.type === 'error') return ev.error; - return null; -} - -function syntheticErrorAssistant( - provider: string, - model: string, - reason: string, -): AssistantMessage { - return { - role: 'assistant', - content: [{ type: 'text', text: reason }], - stop_reason: 'error', - error_message: reason, - error_kind: 'transient', - usage: null, - model, - provider, - timestamp: Date.now(), - }; -} - -function formatProviderError(err: unknown): string { - const raw = err instanceof Error ? err.message : String(err); - return raw - .replace(/^IIIInvocationError:\s*/i, '') - .replace(/^invocation_failed:\s*/i, '') - .trim(); -} - -export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { - if (rec.max_turns !== undefined && rec.turn_count >= rec.max_turns) { - const cap = rec.max_turns ?? 0; - const exhausted: AssistantMessage = { - role: 'assistant', - content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - }; - await emit(iii, rec.session_id, { - type: 'message_complete', - message: exhausted, - body_streamed: false, - }); - await emit(iii, rec.session_id, { - type: 'turn_end', - message: exhausted, - function_results: [], - }); - rec.turn_end_emitted = true; - rec.last_assistant = exhausted; - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(exhausted); - await persistence.saveMessages(iii, rec.session_id, messages); - transitionTo(rec, 'tearing_down'); - return; - } - rec.turn_count++; - rec.turn_end_emitted = false; - rec.assistant_body_streamed = false; - - const request = await persistence.loadRunRequest(iii, rec.session_id); - let messages = await persistence.loadMessages(iii, rec.session_id); - const schemas = await persistence.loadFunctionSchemas(iii, rec.session_id); - - const { provider, model, system_prompt } = request; - const tools = (Array.isArray(schemas) ? schemas : []) as AgentFunction[]; - - const decision = decide({ provider, model }); - const targetFn = targetFunctionId(decision); - - const preflightResult = await runPreflight( - iii, - rec.session_id, - messages, - decision.provider, - model, - ); - if (preflightResult === 'compacted') { - messages = await persistence.loadMessages(iii, rec.session_id); - } - - let channel: Awaited>; - try { - channel = await iii.createChannel(); - } catch (err) { - logger.warn('createChannel failed; falling back to synthetic error', { - err: String(err), - }); - rec.last_assistant = syntheticErrorAssistant( - decision.provider, - decision.model, - `create_channel failed: ${String(err)}`, - ); - transitionTo(rec, 'assistant_finished'); - return; - } - - const messageQueue: string[] = []; - let done = false; - let resolveNext: (() => void) | null = null; - channel.reader.onMessage((msg: string) => { - messageQueue.push(msg); - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); - } - }); - channel.reader.stream.resume(); - - const input: ProviderStreamInput = buildInput( - decision, - channel.writerRef as StreamChannelRef, - system_prompt, - messages, - tools, - ); - - let triggerError: string | null = null; - const triggerPromise = iii - .trigger({ - function_id: targetFn, - payload: input, - timeoutMs: 300_000, - }) - .catch((err) => { - logger.warn('provider stream trigger failed', { targetFn, err: String(err) }); - triggerError = formatProviderError(err); - done = true; - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); - } - return null; - }); - - const readPromise = (async (): Promise => { - let final: AssistantMessage | null = null; - while (!done) { - while (messageQueue.length > 0) { - const text = messageQueue.shift(); - if (text === undefined) break; - let event: AssistantMessageEvent | null = null; - try { - event = JSON.parse(text) as AssistantMessageEvent; - } catch (err) { - logger.warn('decode AssistantMessageEvent failed', { - session_id: rec.session_id, - err: String(err), - }); - continue; - } - const partial = eventPartial(event); - if (partial) final = partial; - if (event.type !== 'done' && event.type !== 'error') { - if (partial) { - await emit(iii, rec.session_id, { - type: 'message_update', - message: partial, - llm_event: event, - }); - if (event.type === 'text_delta' || event.type === 'thinking_delta') { - rec.assistant_body_streamed = true; - } - } - continue; - } - if (event.type === 'done') final = event.message; - else final = event.error; - done = true; - break; - } - if (done) break; - await new Promise((r) => { - resolveNext = r; - }); - } - return final; - })(); - - const [, finalMsg] = await Promise.all([triggerPromise, readPromise]); - if (finalMsg) { - rec.last_assistant = finalMsg; - } else { - const errorText = triggerError ?? 'provider channel closed without final'; - const synthetic = syntheticErrorAssistant(decision.provider, decision.model, errorText); - await emit(iii, rec.session_id, { - type: 'message_update', - message: synthetic, - llm_event: { type: 'text_delta', partial: synthetic, delta: errorText }, - }); - rec.last_assistant = synthetic; - } - transitionTo(rec, 'assistant_finished'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::assistant_streaming', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'assistant_streaming', handleStreaming, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state assistant_streaming: start turn and stream provider response.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts deleted file mode 100644 index 2ddcc936..00000000 --- a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts +++ /dev/null @@ -1,112 +0,0 @@ -/** - * `turn::function_awaiting_approval`. Read approval decisions and resume execute. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import { ApprovalResumePayloadSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; -import type { z } from 'zod'; -import type { ISdk } from '../../runtime/iii.js'; -import type { FunctionResult } from '../../types/function.js'; -import { text } from '../../types/content.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -export type ApprovalDecision = z.infer; - -/** Decode stored approval decision from `state::get` (scope `approvals`). */ -export function parseApprovalDecision(value: unknown): ApprovalDecision | null { - const parsed = ApprovalResumePayloadSchema.safeParse(value); - return parsed.success ? parsed.data : null; -} - -async function readDecision( - iii: ISdk, - session_id: string, - function_call_id: string, -): Promise { - const key = `${session_id}/${function_call_id}`; - const raw = await iii.trigger({ - function_id: 'state::get', - payload: { scope: STATE_SCOPE, key }, - }); - return parseApprovalDecision(raw); -} - -function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { - const reason = - decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); - const message = - decision.decision === 'aborted' - ? `Function call aborted: ${reason}` - : `Permission denied by user: ${reason}`; - return { - content: [text(message)], - details: { - approval_denied: true, - decision: decision.decision, - reason, - }, - terminate: false, - }; -} - -export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { - const awaiting = rec.awaiting_approval ?? []; - if (awaiting.length === 0) { - transitionTo(rec, 'function_execute'); - return; - } - - const decisions = await Promise.all( - awaiting.map((entry) => readDecision(iii, rec.session_id, entry.function_call_id)), - ); - - if (decisions.some((decision) => decision === null)) { - return; - } - - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); - for (let i = 0; i < awaiting.length; i++) { - const entry = awaiting[i]; - const decision = decisions[i]; - if (!entry || !decision) continue; - const idx = prepared.findIndex( - (preparedEntry) => preparedEntry.function_call.id === entry.function_call_id, - ); - if (idx < 0) continue; - const current = prepared[idx]; - if (!current) continue; - if (decision.decision === 'allow') { - prepared[idx] = { ...current, pre_approved: true, blocked: null }; - } else { - prepared[idx] = { - ...current, - pre_approved: false, - blocked: denialResultFromDecision(decision), - }; - } - } - - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - - rec.awaiting_approval = []; - transitionTo(rec, 'function_execute'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::function_awaiting_approval', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state function_awaiting_approval: read approval decisions and resume.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts deleted file mode 100644 index 87fe86aa..00000000 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ /dev/null @@ -1,267 +0,0 @@ -/** - * `turn::function_execute`. Run prepared function calls, finalize results, route onward. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { - AgentMessage, - AssistantMessage, - FunctionResultMessage, -} from '../../types/agent-message.js'; -import type { FunctionCall, FunctionResult } from '../../types/function.js'; -import { dispatchWithHook, isErrorResult, triggerFunctionCall } from '../agent-trigger.js'; -import { registerApprovalResume } from '../approval-resume.js'; -import { emit } from '../events.js'; -import { publishAfter } from '../hook.js'; -import * as persistence from '../persistence.js'; -import type { ExecutedEntry } from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function buildFunctionExecutionEnd( - fc: FunctionCall, - result: FunctionResult, - is_error: boolean, - duration_ms: number, -): AgentEvent { - return { - type: 'function_execution_end', - function_call_id: fc.id, - function_id: fc.function_id, - result, - is_error, - duration_ms, - }; -} - -function augmentFunctionCall(fc: FunctionCall, session_id: string): FunctionCall { - let augmented_args: unknown; - if (fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments)) { - augmented_args = { ...(fc.arguments as Record) }; - } else { - augmented_args = { arguments: fc.arguments }; - } - if (typeof augmented_args === 'object' && augmented_args !== null) { - const obj = augmented_args as Record; - obj.session_id = session_id; - obj.function_call_id = fc.id; - obj.function_id = fc.function_id; - obj.function_call = { - id: fc.id, - function_id: fc.function_id, - arguments: fc.arguments, - }; - } - return { id: fc.id, function_id: fc.function_id, arguments: augmented_args }; -} - -async function commitExecutedCall( - iii: ISdk, - rec: TurnStateRecord, - results: ExecutedEntry[], - fc: FunctionCall, - result: FunctionResult, - startedAt: number, - is_error?: boolean, -): Promise { - const duration_ms = Date.now() - startedAt; - const error = is_error ?? isErrorResult(result); - persistence.upsertExecutedCall(results, { - function_call: fc, - result, - is_error: error, - duration_ms, - }); - await persistence.saveExecutedCalls(iii, rec.session_id, results); - await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, error, duration_ms)); -} - -function buildFinalizeLifecycle( - asst: AssistantMessage, - results: FunctionResultMessage[], -): AgentEvent[] { - const out: AgentEvent[] = [{ type: 'turn_end', message: asst, function_results: results }]; - return out; -} - -async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { - const executed = await persistence.loadExecutedCalls(iii, rec.session_id); - const function_results: FunctionResultMessage[] = []; - let all_terminate = executed.length > 0; - for (const e of executed) { - let result = e.result; - const merged = await publishAfter(iii, e.function_call, result); - if ( - merged && - typeof merged === 'object' && - Array.isArray((merged as Record).content) - ) { - result = merged as FunctionResult; - } - if (!result.terminate) all_terminate = false; - function_results.push({ - role: 'function_result', - function_call_id: e.function_call.id, - function_id: e.function_call.function_id, - content: result.content, - details: result.details, - is_error: e.is_error, - timestamp: Date.now(), - }); - } - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinalize can re-enter (durable trigger retry, - // step-fanout race, crash mid-finalize before transitionTo persists). - // executedCalls is only cleared at the start of the NEXT handlePrepare, - // so a second run reads the SAME results and would push duplicates into - // flat-state. Skip any function_result whose function_call_id is already - // present. Anthropic rejects duplicate `tool_result` blocks with id: - // "each tool_use must have a single result. Found multiple tool_result - // blocks with id: toolu_..." - // and any provider's wire-messages flush would produce them otherwise. - // Only the most-recent function_result block matters for dedup — - // duplicates only appear when the re-entry runs against a slice - // we already wrote in this same finalize, so walking from the tail - // and stopping once we pass the boundary of pre-existing results - // is sufficient. Pre-fix this scanned every message from the head - // on every finalize, which grew O(history) per turn for a guard - // that only ever protects against ~10 entries. - const incomingIds = new Set(); - for (const r of function_results) incomingIds.add(r.function_call_id); - const existingResultIds = new Set(); - for (let i = messages.length - 1; i >= 0; i--) { - const m = messages[i]; - if (!m) continue; - if (m.role === 'function_result') { - existingResultIds.add(m.function_call_id); - continue; - } - if (m.role === 'assistant') { - // Once we cross an assistant boundary BEFORE seeing any - // pending incoming id we've passed the turn this finalize - // is writing for — earlier function_result blocks can't be - // duplicates of `function_results`. - let unseen = false; - for (const id of incomingIds) { - if (!existingResultIds.has(id)) { - unseen = true; - break; - } - } - if (!unseen) break; - } - } - let appended = 0; - for (const r of function_results) { - if (existingResultIds.has(r.function_call_id)) continue; - messages.push(r as AgentMessage); - existingResultIds.add(r.function_call_id); - appended++; - } - if (appended < function_results.length) { - logger.warn('handleFinalize: skipped duplicate function_results (re-entry detected)', { - session_id: rec.session_id, - total: function_results.length, - appended, - skipped: function_results.length - appended, - }); - } - await persistence.saveMessages(iii, rec.session_id, messages); - - const asst = rec.last_assistant; - rec.function_results = function_results; - rec.pending_function_calls = []; - // Clear persisted executedCalls now so a re-entry into handleFinalize - // (durable retry, crash before transitionTo) finds an empty set and - // produces zero new function_results to push. Belt+suspenders alongside - // the idempotency guard above. handlePrepare also clears at the start - // of the NEXT turn, but that's too late if re-entry happens before then. - await persistence.saveExecutedCalls(iii, rec.session_id, []); - - if (asst) { - for (const evt of buildFinalizeLifecycle(asst, function_results)) { - await emit(iii, rec.session_id, evt); - } - rec.turn_end_emitted = true; - } - transitionTo(rec, all_terminate ? 'tearing_down' : 'steering_check'); -} - -export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); - const results = await persistence.loadExecutedCalls(iii, rec.session_id); - - for (const entry of prepared) { - const fc = entry.function_call; - await emit(iii, rec.session_id, { - type: 'function_execution_start', - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - const startedAt = Date.now(); - - const existing = persistence.findExecutedCall(results, fc.id); - if (existing) { - await emit( - iii, - rec.session_id, - buildFunctionExecutionEnd(fc, existing.result, existing.is_error, existing.duration_ms), - ); - continue; - } - - if (entry.pre_approved === true) { - await commitExecutedCall( - iii, - rec, - results, - fc, - await triggerFunctionCall(iii, fc), - startedAt, - ); - continue; - } - - if (entry.blocked) { - await commitExecutedCall(iii, rec, results, fc, entry.blocked, startedAt, true); - continue; - } - - const out = await dispatchWithHook(iii, augmentFunctionCall(fc, rec.session_id)); - if (out.kind === 'pending') { - rec.awaiting_approval = rec.awaiting_approval ?? []; - rec.awaiting_approval.push({ - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - registerApprovalResume(iii, rec.session_id, fc.id); - transitionTo(rec, 'function_awaiting_approval'); - return; - } - - await commitExecutedCall(iii, rec, results, fc, out.result, startedAt); - } - await finalizeExecutedCalls(iii, rec); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::function_execute', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'function_execute', handleExecute, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state function_execute: dispatch prepared calls and finalize results.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/index.ts b/harness/src/turn-orchestrator/states/index.ts deleted file mode 100644 index e7865709..00000000 --- a/harness/src/turn-orchestrator/states/index.ts +++ /dev/null @@ -1,11 +0,0 @@ -/** - * Re-export per-state register functions. Each `turn::{state}` lives in its own file. - */ - -export { register as registerProvisioning } from './provisioning.js'; -export { register as registerAssistantStreaming } from './assistant-streaming.js'; -export { register as registerAssistantFinished } from './assistant-finished.js'; -export { register as registerFunctionExecute } from './function-execute.js'; -export { register as registerFunctionAwaitingApproval } from './function-awaiting-approval.js'; -export { register as registerSteeringCheck } from './steering-check.js'; -export { register as registerTearingDown } from './tearing-down.js'; diff --git a/harness/src/turn-orchestrator/states/provisioning.ts b/harness/src/turn-orchestrator/states/provisioning.ts deleted file mode 100644 index 8ca88284..00000000 --- a/harness/src/turn-orchestrator/states/provisioning.ts +++ /dev/null @@ -1,110 +0,0 @@ -/** - * `turn::provisioning`. First FSM step after `run::start`: materialize tool schemas, - * assemble the system prompt, persist the enriched run request, then advance. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import { agentTriggerTool } from '../agent-trigger.js'; -import type { TurnOrchestratorConfig } from '../config.js'; -import * as persistence from '../persistence.js'; -import { type RunRequest } from '../run-request.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { type DefaultSkillBody, buildSystemPrompt, defaultSkillBody } from '../system-prompt.js'; - -const FETCH_TIMEOUT_MS = 10_000; - -export function parseDirectoryBody(resp: unknown): string | null { - if (typeof resp === 'string') return resp; - if (resp && typeof resp === 'object') { - const body = (resp as { body?: unknown }).body; - if (typeof body === 'string') return body; - } - return null; -} - -async function fetchSkill(iii: ISdk, id: string): Promise { - try { - const resp = await iii.trigger({ - function_id: 'directory::skills::get', - payload: { id }, - timeoutMs: FETCH_TIMEOUT_MS, - }); - return parseDirectoryBody(resp); - } catch (err) { - logger.warn('directory::skills::get failed', { id, err: String(err) }); - return null; - } -} - -async function fetchDefaultSkills(iii: ISdk, uris: readonly string[]): Promise { - const bodies: DefaultSkillBody[] = []; - for (const uri of uris) { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - const body = await fetchSkill(iii, id); - bodies.push(defaultSkillBody(uri, body)); - } - return bodies; -} - -async function fetchSkillsIndex(iii: ISdk): Promise { - try { - const resp = await iii.trigger({ - function_id: 'directory::skills::index', - payload: {}, - timeoutMs: FETCH_TIMEOUT_MS, - }); - const body = parseDirectoryBody(resp); - return body && body.length > 0 ? body : null; - } catch (err) { - logger.warn('directory::skills::index failed', { err: String(err) }); - return null; - } -} - -export async function handleProvisioning( - iii: ISdk, - cfg: TurnOrchestratorConfig, - rec: TurnStateRecord, -): Promise { - const request = await persistence.loadRunRequest(iii, rec.session_id); - - await persistence.saveFunctionSchemas(iii, rec.session_id, [agentTriggerTool()]); - - const override = request.system_prompt.length > 0 ? request.system_prompt : null; - - const [skillsIndex, bodies] = await Promise.all([ - fetchSkillsIndex(iii), - fetchDefaultSkills(iii, cfg.system_default_skills), - ]); - const prompt = buildSystemPrompt(bodies, null, override, request.mode, skillsIndex); - - const updated: RunRequest = { ...request, system_prompt: prompt }; - await persistence.saveRunRequest(iii, rec.session_id, updated); - - transitionTo(rec, 'assistant_streaming'); -} - -export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { - iii.registerFunction( - 'turn::provisioning', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition( - iii, - 'provisioning', - (i, rec) => handleProvisioning(i, cfg, rec), - parsed, - ); - }, - { - description: - 'Run one durable FSM transition for session in state provisioning: materialize tool schemas, build system prompt, advance to assistant_streaming.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts deleted file mode 100644 index 77a0f6a5..00000000 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ /dev/null @@ -1,165 +0,0 @@ -/** - * `turn::steering_check`. Drains steering / followup inboxes and the abort flag, then routes onward. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; -import { emit } from '../events.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -export type SteeringRoute = - | 'abort' - | 'steering' - | 'followup' - | 'continue_after_function' - | 'end_turn'; - -/** Pure priority router — no I/O. */ -export function route( - abort: boolean, - has_steering: boolean, - has_followup: boolean, - has_function_results: boolean, -): SteeringRoute { - if (abort) return 'abort'; - if (has_steering) return 'steering'; - if (has_followup) return 'followup'; - if (has_function_results) return 'continue_after_function'; - return 'end_turn'; -} - -async function abortSet(iii: ISdk, session_id: string): Promise { - try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope: 'agent', key: abortSignalKey(session_id) }, - }); - return v === true; - } catch { - return false; - } -} - -async function drainQueue(iii: ISdk, name: string, session_id: string): Promise { - try { - const resp = await iii.trigger({ - function_id: 'session-inbox::drain', - payload: { name, session_id }, - }); - if (Array.isArray(resp?.items)) return resp.items as AgentMessage[]; - } catch { - // ignore - } - return []; -} - -function abortedMessage(): AssistantMessage { - return { - role: 'assistant', - content: [], - stop_reason: 'aborted', - error_message: 'aborted', - error_kind: 'transient', - usage: null, - model: 'harness', - provider: 'harness', - timestamp: Date.now(), - }; -} - -async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { - if (rec.turn_end_emitted) return; - const last = - rec.last_assistant ?? - ({ - role: 'assistant', - content: [], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - } as AssistantMessage); - await emit(iii, rec.session_id, { - type: 'turn_end', - message: last, - function_results: [], - }); - rec.turn_end_emitted = true; -} - -export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { - const abort = await abortSet(iii, rec.session_id); - const steering = abort ? [] : await drainQueue(iii, 'steering', rec.session_id); - const followup = - abort || steering.length > 0 ? [] : await drainQueue(iii, 'followup', rec.session_id); - - const decision = route( - abort, - steering.length > 0, - followup.length > 0, - rec.function_results.length > 0, - ); - switch (decision) { - case 'abort': { - const aborted = abortedMessage(); - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(aborted); - await persistence.saveMessages(iii, rec.session_id, messages); - rec.last_assistant = aborted; - if (!rec.turn_end_emitted) { - await emit(iii, rec.session_id, { - type: 'turn_end', - message: aborted, - function_results: [], - }); - rec.turn_end_emitted = true; - } - transitionTo(rec, 'tearing_down'); - break; - } - case 'steering': - case 'followup': { - const inbox = decision === 'steering' ? steering : followup; - await emitTurnEndOnce(iii, rec); - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(...inbox); - await persistence.saveMessages(iii, rec.session_id, messages); - rec.function_results = []; - transitionTo(rec, 'assistant_streaming'); - break; - } - case 'continue_after_function': { - rec.function_results = []; - transitionTo(rec, 'assistant_streaming'); - break; - } - case 'end_turn': { - await emitTurnEndOnce(iii, rec); - transitionTo(rec, 'tearing_down'); - break; - } - } -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::steering_check', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'steering_check', handleSteering, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state steering_check: drain inboxes and route onward.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/tearing-down.ts b/harness/src/turn-orchestrator/states/tearing-down.ts deleted file mode 100644 index d9af5420..00000000 --- a/harness/src/turn-orchestrator/states/tearing-down.ts +++ /dev/null @@ -1,34 +0,0 @@ -/** - * `turn::tearing_down`. Emit `agent_end` and transition to `stopped`. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import type { AgentMessage } from '../../types/agent-message.js'; -import { emit } from '../events.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -export async function handleTearingDown(iii: ISdk, rec: TurnStateRecord): Promise { - const messages: AgentMessage[] = await persistence.loadMessages(iii, rec.session_id); - await emit(iii, rec.session_id, { type: 'agent_end', messages }); - transitionTo(rec, 'stopped'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::tearing_down', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'tearing_down', handleTearingDown, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state tearing_down: emit agent_end and mark stopped.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/steering-check/ports.ts b/harness/src/turn-orchestrator/steering-check/ports.ts new file mode 100644 index 00000000..a4bca3f7 --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/ports.ts @@ -0,0 +1,46 @@ +/** + * Typed dependency ports for steering_check. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentEvent } from '../../types/agent-event.js'; +import type { AgentMessage } from '../../types/agent-message.js'; +import { emit } from '../events.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; + +/** Decode session-inbox drain responses. */ +export function parseDrainItems(resp: unknown): AgentMessage[] { + if (resp && typeof resp === 'object' && Array.isArray((resp as { items?: unknown }).items)) { + return (resp as { items: AgentMessage[] }).items; + } + return []; +} + +export type SteeringCheckPorts = TurnStatePorts & { + drainInbox(name: 'steering' | 'followup', session_id: string): Promise; + emit(session_id: string, event: AgentEvent): Promise; +}; + +export function createSteeringCheckPorts(iii: ISdk): SteeringCheckPorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async drainInbox(name, session_id) { + try { + const resp = await iii.trigger({ + function_id: 'session-inbox::drain', + payload: { name, session_id }, + }); + return parseDrainItems(resp); + } catch { + return []; + } + }, + + emit(session_id, event) { + return emit(iii, session_id, event); + }, + }; +} diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts new file mode 100644 index 00000000..e4624314 --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/process.ts @@ -0,0 +1,34 @@ +/** + * Drain inboxes, route, apply steering_check outcomes, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { runTransition } from '../run-transition.js'; +import { + TurnStepPayloadSchema, + parseSteeringCheckRecord, + type TurnStepPayload, +} from '../schemas.js'; +import type { TurnStateRecord } from '../state.js'; +import { createSteeringCheckPorts } from './ports.js'; +import { runSteeringCheck } from './run.js'; + +export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { + const steering = parseSteeringCheckRecord(rec); + const ports = createSteeringCheckPorts(iii); + await runSteeringCheck(ports, steering); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::steering_check', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'steering_check', handleSteering, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state steering_check: drain inboxes and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts new file mode 100644 index 00000000..e7e42ee3 --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/run.ts @@ -0,0 +1,114 @@ +/** + * Drain inboxes, route steering_check outcomes, and apply transitions. + */ + +import type { AgentMessage } from '../../types/agent-message.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; +import type { SteeringCheckTurnRecord } from '../state.js'; +import type { SteeringCheckPorts } from './ports.js'; + +export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; + +export type SteeringCheckOutcome = + | { kind: 'max_turns_reached' } + | { kind: 'resume_with_inbox'; inbox: AgentMessage[] } + | { kind: 'continue_after_function' } + | { kind: 'end_turn' }; + +export function route( + has_steering: boolean, + has_followup: boolean, + has_function_results: boolean, +): SteeringRoute { + if (has_steering) return 'steering'; + if (has_followup) return 'followup'; + if (has_function_results) return 'continue_after_function'; + return 'end_turn'; +} + +function maxTurnsReached(rec: SteeringCheckTurnRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +async function endForMaxTurns( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); + rec.last_assistant = msg; + await ports.appendMessages(rec.session_id, [msg]); + await ports.emit(rec.session_id, { + type: 'message_complete', + message: msg, + body_streamed: false, + }); + await emitTurnEndOnce(ports, rec, msg); + await ports.finishSession(rec); +} + +export async function processSteeringCheck( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { + const steering = await ports.drainInbox('steering', rec.session_id); + const followup = steering.length > 0 ? [] : await ports.drainInbox('followup', rec.session_id); + + const decision = route(steering.length > 0, followup.length > 0, rec.function_results.length > 0); + + if ( + (decision === 'steering' || + decision === 'followup' || + decision === 'continue_after_function') && + maxTurnsReached(rec) + ) { + return { kind: 'max_turns_reached' }; + } + + switch (decision) { + case 'steering': + return { kind: 'resume_with_inbox', inbox: steering }; + case 'followup': + return { kind: 'resume_with_inbox', inbox: followup }; + case 'continue_after_function': + return { kind: 'continue_after_function' }; + case 'end_turn': + return { kind: 'end_turn' }; + } +} + +export async function applySteeringCheckOutcome( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, + outcome: SteeringCheckOutcome, +): Promise { + switch (outcome.kind) { + case 'max_turns_reached': + await endForMaxTurns(ports, rec); + return; + case 'resume_with_inbox': { + await emitTurnEndOnce(ports, rec); + await ports.appendMessages(rec.session_id, outcome.inbox); + resumeToAssistantStreaming(rec); + return; + } + case 'continue_after_function': + resumeToAssistantStreaming(rec); + return; + case 'end_turn': + await emitTurnEndOnce(ports, rec); + await ports.finishSession(rec); + return; + } +} + +export async function runSteeringCheck( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { + const outcome = await processSteeringCheck(ports, rec); + await applySteeringCheckOutcome(ports, rec, outcome); +} diff --git a/harness/src/turn-orchestrator/synthetic-assistant.ts b/harness/src/turn-orchestrator/synthetic-assistant.ts new file mode 100644 index 00000000..282f64ae --- /dev/null +++ b/harness/src/turn-orchestrator/synthetic-assistant.ts @@ -0,0 +1,33 @@ +/** + * Synthetic assistant messages the orchestrator injects when there is no real + * provider turn: stream/transition errors, aborts, and the max_turns stop. + * Builds on `emptyAssistant` so the message scaffolding lives in one place. + */ + +import { type AssistantMessage, emptyAssistant } from '../types/agent-message.js'; +import { text } from '../types/content.js'; +import type { StopReason } from '../types/stream-event.js'; + +export type SyntheticAssistantOptions = { + stop_reason: StopReason; + /** Body text; omitted produces empty content (e.g. an aborted turn). */ + text?: string; + provider?: string; + model?: string; +}; + +/** + * Build an assistant message with no provider behind it. `error`/`aborted` + * stop reasons are flagged `error_kind: 'transient'` and carry an + * `error_message` (the body text, or the stop reason when there is no text). + */ +export function syntheticAssistant(opts: SyntheticAssistantOptions): AssistantMessage { + const isError = opts.stop_reason === 'error' || opts.stop_reason === 'aborted'; + return { + ...emptyAssistant(opts.provider ?? '', opts.model ?? ''), + stop_reason: opts.stop_reason, + content: opts.text ? [text(opts.text)] : [], + error_message: isError ? (opts.text ?? opts.stop_reason) : null, + error_kind: isError ? 'transient' : null, + }; +} diff --git a/harness/src/turn-orchestrator/system-prompt.ts b/harness/src/turn-orchestrator/system-prompt.ts index 0f5b4e0e..443b632d 100644 --- a/harness/src/turn-orchestrator/system-prompt.ts +++ b/harness/src/turn-orchestrator/system-prompt.ts @@ -1,10 +1,17 @@ /** - * System-prompt assembly. Mirrors - * `turn-orchestrator/src/system_prompt.rs`. + * System-prompt assembly: turns the run's mode, default-skill bodies, and the + * skills index into the single system prompt string sent to the provider. */ export type Mode = 'plan' | 'ask' | 'agent'; +const III_URI_PREFIX = 'iii://'; + +/** Bare skill id from a skill URI (`iii://a/b` → `a/b`; bare ids pass through). */ +export function skillIdFromUri(uri: string): string { + return uri.startsWith(III_URI_PREFIX) ? uri.slice(III_URI_PREFIX.length) : uri; +} + const MODE_PARAGRAPHS: Record = { plan: `You are operating in plan mode: investigate first, then produce a concise numbered plan. 1. Investigate everything needed to fully plan — explore relevant functions, skills, and code via \`agent_trigger\` as needed. @@ -58,20 +65,25 @@ export type DefaultSkillBody = { }; export function defaultSkillBody(uri: string, body: string | null): DefaultSkillBody { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - return { uri, id, body }; + return { uri, id: skillIdFromUri(uri), body }; } +export type SystemPromptOptions = { + /** Caller-supplied prompt; when non-empty it is returned verbatim. */ + override?: string | null; + /** Operating mode; prepends a mode paragraph before the identity preamble. */ + mode?: Mode | null; + /** Skills index block appended after the preamble. */ + skillsIndex?: string | null; +}; + export function buildSystemPrompt( skills: DefaultSkillBody[], - cwd?: string | null, - override?: string | null, - mode?: Mode | null, - skillsIndex?: string | null, + opts: SystemPromptOptions = {}, ): string { + const { override, mode, skillsIndex } = opts; if (override && override.length > 0) return override; let out = isMode(mode) ? `${MODE_PARAGRAPHS[mode]}\n\n${IDENTITY_PREAMBLE}` : IDENTITY_PREAMBLE; - if (cwd && cwd.length > 0) out += `\n\nWorking directory: ${cwd}`; if (skillsIndex && skillsIndex.length > 0) out += `\n\n${skillsIndex}`; for (const s of skills) { out += `\n\n# ${s.uri}\n\n`; diff --git a/harness/src/turn-orchestrator/turn-state-write.ts b/harness/src/turn-orchestrator/turn-state-write.ts deleted file mode 100644 index 296f4a7e..00000000 --- a/harness/src/turn-orchestrator/turn-state-write.ts +++ /dev/null @@ -1,30 +0,0 @@ -/** - * UI notification when agent-scope turn_state is persisted via `saveRecord` / - * `persistRecord`. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { emit } from './events.js'; - -export async function emitTurnStateChanged( - iii: ISdk, - session_id: string, - event_type: 'state:created' | 'state:updated', - new_value: Record, - old_value?: Record, -): Promise { - try { - await emit(iii, session_id, { - type: 'turn_state_changed', - event_type, - new_value, - ...(old_value !== undefined && { old_value }), - }); - } catch (err) { - logger.warn('emitTurnStateChanged failed', { - session_id, - err: String(err), - }); - } -} diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts deleted file mode 100644 index ec57e29b..00000000 --- a/harness/src/turn-orchestrator/wake.ts +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Durable FSM wake via iii-queue FIFO `turn-step`. Enqueues `turn::{state}` per - * persisted turn_state, not a generic dispatcher. - */ - -import { TriggerAction, type ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import * as persistence from './persistence.js'; -import { turnFnId, type TurnState, type TurnStateRecord } from './state.js'; - -export const TURN_STEP_QUEUE = 'turn-step'; - -const NON_STEPABLE_STATES = new Set(['stopped', 'function_awaiting_approval']); - -/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ -export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { - if (NON_STEPABLE_STATES.has(newState)) return false; - if (previousState !== null && previousState === newState) return false; - return true; -} - -/** Guard before enqueueing from approval/abort — skip terminal sessions. */ -export function shouldRunStep(rec: TurnStateRecord | null): boolean { - if (!rec) return false; - return rec.state !== 'stopped'; -} - -export async function wakeState(iii: ISdk, session_id: string, state: TurnState): Promise { - try { - await iii.trigger({ - function_id: turnFnId(state), - payload: { session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); - } catch (err) { - logger.warn('wakeState failed', { session_id, state, err: String(err) }); - } -} - -/** Enqueue the handler for the session's current persisted state (approval/abort). */ -export async function wakeFromRecord(iii: ISdk, session_id: string): Promise { - const rec = await persistence.loadRecord(iii, session_id); - if (!rec || !shouldRunStep(rec)) return; - await wakeState(iii, session_id, rec.state); -} diff --git a/harness/src/types/agent-event.ts b/harness/src/types/agent-event.ts index 83076cf8..32705310 100644 --- a/harness/src/types/agent-event.ts +++ b/harness/src/types/agent-event.ts @@ -41,7 +41,7 @@ export type AgentEvent = result: FunctionResult; is_error: boolean; /** Wall-clock ms between the matching function_execution_start and end. - * Reused from persisted ExecutedEntry on resumed runs so replayed + * Reused from persisted ExecutedCall on resumed runs so replayed * calls keep their original timing. */ duration_ms: number; } diff --git a/harness/src/types/function.ts b/harness/src/types/function.ts index 3bf69979..55336a74 100644 --- a/harness/src/types/function.ts +++ b/harness/src/types/function.ts @@ -45,15 +45,3 @@ export type FunctionResult = { details: unknown; terminate?: boolean; }; - -/** Prepared call entry persisted in the FSM's `function_prepared` staging. */ -export type PreparedFunctionCall = - | { kind: 'prepared'; function_call: FunctionCall } - | { kind: 'immediate'; result: FunctionResult; is_error: boolean }; - -/** Finalized call entry persisted in `function_executed`. */ -export type FinalizedFunctionCall = { - function_call: FunctionCall; - result: FunctionResult; - is_error: boolean; -}; diff --git a/harness/tests/_helpers/stateStoreKey.ts b/harness/tests/_helpers/stateStoreKey.ts new file mode 100644 index 00000000..351c3a02 --- /dev/null +++ b/harness/tests/_helpers/stateStoreKey.ts @@ -0,0 +1,8 @@ +/** Composite key used by test mocks mirroring `${scope}/${key}` iii state storage. */ +export function stateStoreKey(scope: string, key: string): string { + return `${scope}/${key}`; +} + +export function payloadStoreKey(payload: { scope?: string; key?: string }): string { + return stateStoreKey(payload.scope ?? '', payload.key ?? ''); +} diff --git a/harness/tests/approval-gate/_helpers/fakeIii.ts b/harness/tests/approval-gate/_helpers/fakeIii.ts index b37b19ff..b5e9f7ff 100644 --- a/harness/tests/approval-gate/_helpers/fakeIii.ts +++ b/harness/tests/approval-gate/_helpers/fakeIii.ts @@ -7,31 +7,37 @@ import type { ISdk } from 'iii-sdk'; import { vi } from 'vitest'; -export type TriggerCall = { function_id: string; payload: unknown }; +export type TriggerCall = { function_id: string; payload: unknown; action?: unknown }; export type FakeIii = { iii: ISdk; calls: TriggerCall[]; - resumeCalls: TriggerCall[]; streamSets: unknown[]; }; export function fakeIii(): FakeIii { const calls: TriggerCall[] = []; - const resumeCalls: TriggerCall[] = []; const streamSets: unknown[] = []; const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - calls.push({ function_id, payload }); - if (function_id.startsWith('turn::approval_resume::')) { - resumeCalls.push({ function_id, payload }); - } else if (function_id === 'stream::set') { - streamSets.push(payload); - } - return null; - }), + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + calls.push({ function_id, payload, action }); + if (function_id === 'stream::set') { + streamSets.push(payload); + } + return null; + }, + ), } as unknown as ISdk; - return { iii, calls, resumeCalls, streamSets }; + return { iii, calls, streamSets }; } diff --git a/harness/tests/approval-gate/resolve.test.ts b/harness/tests/approval-gate/resolve.test.ts index 689b0756..ae74e3ca 100644 --- a/harness/tests/approval-gate/resolve.test.ts +++ b/harness/tests/approval-gate/resolve.test.ts @@ -9,9 +9,9 @@ import { describe, expect, it, vi } from 'vitest'; import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; import { fakeIii } from './_helpers/fakeIii.js'; -describe('handleResolveRequest — routing the decision', () => { - it('routes to the exact per-call resume fn with a normalized payload', async () => { - const { iii, resumeCalls } = fakeIii(); +describe('handleResolveRequest — writing the decision', () => { + it('writes the decision to approvals// with a normalized payload', async () => { + const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', function_call_id: 'fc-1', @@ -19,25 +19,18 @@ describe('handleResolveRequest — routing the decision', () => { reason: 'user cancelled', }); expect(out).toEqual({ ok: true }); - expect(resumeCalls).toEqual([ + expect(calls).toEqual([ { - function_id: 'turn::approval_resume::s1/fc-1', - payload: { decision: 'deny', reason: 'user cancelled' }, + function_id: 'state::set', + payload: { + scope: 'approvals', + key: 's1/fc-1', + value: { decision: 'deny', reason: 'user cancelled' }, + }, }, ]); }); - it('prefers function_call_id over a conflicting legacy tool_call_id', async () => { - const { iii, resumeCalls } = fakeIii(); - await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'canonical', - tool_call_id: 'legacy', - decision: 'allow', - }); - expect(resumeCalls[0]?.function_id).toBe('turn::approval_resume::s1/canonical'); - }); - it('never emits to the agent::events stream (denial flows via execution_end)', async () => { const { iii, streamSets } = fakeIii(); await handleResolveRequest(iii, { @@ -66,23 +59,23 @@ describe('handleResolveRequest — hostile / malformed input is rejected, not cr expect(calls).toHaveLength(0); }); - it('returns invalid_payload when the resolved id (via tool_call_id) contains a slash', async () => { + it('returns invalid_payload and fires nothing when function_call_id is missing', async () => { const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', - tool_call_id: 'fc/evil', decision: 'allow', - }); + } as never); expect(out).toEqual({ ok: false, error: 'invalid_payload' }); expect(calls).toHaveLength(0); }); - it('returns invalid_payload and fires nothing when both ids are missing', async () => { + it('returns invalid_payload when function_call_id contains a slash', async () => { const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', + function_call_id: 'fc/evil', decision: 'allow', - } as never); + }); expect(out).toEqual({ ok: false, error: 'invalid_payload' }); expect(calls).toHaveLength(0); }); @@ -100,7 +93,7 @@ describe('handleResolveRequest — hostile / malformed input is rejected, not cr }); describe('handleResolveRequest — downstream failure is surfaced as resume_failed', () => { - it('returns resume_failed when the resume trigger rejects', async () => { + it('returns resume_failed when the state::set write rejects', async () => { const { iii } = fakeIii(); (iii.trigger as ReturnType).mockRejectedValue(new Error('boom')); const out = await handleResolveRequest(iii, { diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index 662e6c25..1ba24949 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -7,48 +7,32 @@ */ import { describe, expect, it } from 'vitest'; import { + ApprovalDecisionSchema, ApprovalResumePayloadSchema, ResolvePayloadSchema, - approvalResumeFnId, parsePolicyReply, pendingKey, resolveFunctionOptions, } from '../../src/approval-gate/schemas.js'; describe('ResolvePayloadSchema — id normalization & validation', () => { - it('prefers function_call_id over a conflicting tool_call_id', () => { - expect( - ResolvePayloadSchema.parse({ - session_id: 's', - function_call_id: 'canonical', - tool_call_id: 'legacy', - decision: 'allow', - }), - ).toEqual({ - session_id: 's', - function_call_id: 'canonical', - decision: 'allow', - reason: null, - }); - }); - it('coerces an omitted reason to null', () => { const parsed = ResolvePayloadSchema.parse({ session_id: 's', - tool_call_id: 'legacy', + function_call_id: 'fc-1', decision: 'deny', }); expect(parsed.reason).toBeNull(); - expect(parsed.function_call_id).toBe('legacy'); + expect(parsed.function_call_id).toBe('fc-1'); }); it.each([ - ['both ids missing', { session_id: 's', decision: 'allow' }], + ['function_call_id missing', { session_id: 's', decision: 'allow' }], + ['tool_call_id only (legacy)', { session_id: 's', tool_call_id: 'legacy', decision: 'allow' }], ['empty function_call_id', { session_id: 's', function_call_id: '', decision: 'allow' }], ['empty session_id', { session_id: '', function_call_id: 'fc', decision: 'allow' }], ['slash in session_id', { session_id: 'a/b', function_call_id: 'fc', decision: 'allow' }], ['slash in function_call_id', { session_id: 's', function_call_id: 'a/b', decision: 'allow' }], - ['slash via tool_call_id', { session_id: 's', tool_call_id: 'a/b', decision: 'allow' }], ['non-enum decision', { session_id: 's', function_call_id: 'fc', decision: 'maybe' }], ['numeric reason', { session_id: 's', function_call_id: 'fc', decision: 'allow', reason: 7 }], ])('rejects %s', (_label, payload) => { @@ -102,8 +86,8 @@ describe('parsePolicyReply — fail closed', () => { }); describe('state-key derivation — separator integrity', () => { - it('derives turn::approval_resume::/', () => { - expect(approvalResumeFnId('sess-1', 'fc-1')).toBe('turn::approval_resume::sess-1/fc-1'); + it('derives /', () => { + expect(pendingKey('sess-1', 'fc-1')).toBe('sess-1/fc-1'); }); it.each([ @@ -111,14 +95,13 @@ describe('state-key derivation — separator integrity', () => { ['function_call', 'a', 'b/c'], ])('throws if the %s id smuggles a slash', (_which, session, fcall) => { expect(() => pendingKey(session, fcall)).toThrow(); - expect(() => approvalResumeFnId(session, fcall)).toThrow(); }); }); -describe('ApprovalResumePayloadSchema', () => { +describe('ApprovalDecisionSchema', () => { it('accepts the three terminal decisions with an explicit reason', () => { for (const decision of ['allow', 'deny', 'aborted'] as const) { - expect(ApprovalResumePayloadSchema.parse({ decision, reason: null })).toEqual({ + expect(ApprovalDecisionSchema.parse({ decision, reason: null })).toEqual({ decision, reason: null, }); @@ -126,10 +109,14 @@ describe('ApprovalResumePayloadSchema', () => { }); it('rejects a missing reason and an unknown decision', () => { - expect(ApprovalResumePayloadSchema.safeParse({ decision: 'allow' }).success).toBe(false); - expect( - ApprovalResumePayloadSchema.safeParse({ decision: 'paused', reason: null }).success, - ).toBe(false); + expect(ApprovalDecisionSchema.safeParse({ decision: 'allow' }).success).toBe(false); + expect(ApprovalDecisionSchema.safeParse({ decision: 'paused', reason: null }).success).toBe( + false, + ); + }); + + it('keeps ApprovalResumePayloadSchema as a deprecated alias', () => { + expect(ApprovalResumePayloadSchema).toBe(ApprovalDecisionSchema); }); }); diff --git a/harness/tests/context-compaction/e2e/full-session.test.ts b/harness/tests/context-compaction/e2e/full-session.test.ts index 756607b1..5a65f107 100644 --- a/harness/tests/context-compaction/e2e/full-session.test.ts +++ b/harness/tests/context-compaction/e2e/full-session.test.ts @@ -6,7 +6,7 @@ * up to an InMemoryStore. Verifies the three structural guarantees that * the unit/integration tests cannot observe in isolation: * - * 1. The flat state at scope `agent`, key `session//messages` is + * 1. The flat state at scope `messages`, key `` is * rewritten to a reduced array: [summary-as-asst-msg, ...tail, replay]. * 2. The session tree's active path stays connected — the Compaction * entry, replayed user message, and synthetic continue-prompt are @@ -16,9 +16,10 @@ */ import { describe, expect, it, vi } from 'vitest'; -import { flatMessagesKey } from '../../../src/context-compaction/flat-state.js'; +import { payloadStoreKey, stateStoreKey } from '../../_helpers/stateStoreKey.js'; import { handleSync } from '../../../src/context-compaction/handler-sync.js'; import type { ISdk } from '../../../src/runtime/iii.js'; +import { MESSAGES_SCOPE } from '../../../src/turn-orchestrator/state.js'; import { registerTree } from '../../../src/session/tree/register.js'; import { InMemoryStore } from '../../../src/session/tree/store.js'; import type { SessionEntry } from '../../../src/session/tree/types.js'; @@ -87,7 +88,7 @@ function buildTestSdk(opts: { const store = new InMemoryStore(); // Pre-seed flat state with the overflowing transcript. - stateStore.set(flatMessagesKey(opts.session_id), opts.flatMessages); + stateStore.set(stateStoreKey(MESSAGES_SCOPE, opts.session_id), opts.flatMessages); // Stub channel writer so streamAndCollect can deliver a synthetic done event. let channelCb: ((raw: string) => void) | null = null; @@ -107,25 +108,31 @@ function buildTestSdk(opts: { // 1) state::* — back the lease / flat-state rewrite with stateStore. if (fn === 'state::get') { - const p = (payload ?? {}) as { key: string }; - const v = stateStore.get(p.key); + const p = (payload ?? {}) as { scope: string; key: string }; + const v = stateStore.get(payloadStoreKey(p)); return v !== undefined ? v : null; } if (fn === 'state::set') { - const p = (payload ?? {}) as { key: string; value: unknown }; - if (p.value === null || p.value === undefined) stateStore.delete(p.key); - else stateStore.set(p.key, p.value); + const p = (payload ?? {}) as { scope: string; key: string; value: unknown }; + const storeKey = payloadStoreKey(p); + if (p.value === null || p.value === undefined) stateStore.delete(storeKey); + else stateStore.set(storeKey, p.value); return { ok: true }; } if (fn === 'state::update') { - const p = (payload ?? {}) as { key: string; ops: Array<{ type: string; value?: unknown }> }; - const oldValue = stateStore.has(p.key) ? stateStore.get(p.key) : null; + const p = (payload ?? {}) as { + scope: string; + key: string; + ops: Array<{ type: string; value?: unknown }>; + }; + const storeKey = payloadStoreKey(p); + const oldValue = stateStore.has(storeKey) ? stateStore.get(storeKey) : null; let newValue: unknown = oldValue; for (const op of p.ops ?? []) { if (op.type === 'set') newValue = op.value; } - if (newValue === null || newValue === undefined) stateStore.delete(p.key); - else stateStore.set(p.key, newValue); + if (newValue === null || newValue === undefined) stateStore.delete(storeKey); + else stateStore.set(storeKey, newValue); return { old_value: oldValue ?? null, new_value: newValue ?? null }; } @@ -264,7 +271,7 @@ describe('e2e full-session compaction', () => { const lastUserId = entryIds[entryIds.length - 1] ?? ''; // the final user msg // Sanity check: the pre-compaction flat state matches the seed. - const beforeFlat = stateStore.get(flatMessagesKey(SESSION_ID)) as AgentMessage[]; + const beforeFlat = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SESSION_ID)) as AgentMessage[]; expect(beforeFlat.length).toBe(overflowing.length); // Run preflight. The 30-turn fixture should overflow the 8k usable budget. @@ -272,7 +279,7 @@ describe('e2e full-session compaction', () => { expect(result).toBe('compacted'); // --- Assertion 1: flat state is reduced and shaped correctly. --- - const afterFlat = stateStore.get(flatMessagesKey(SESSION_ID)) as AgentMessage[]; + const afterFlat = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SESSION_ID)) as AgentMessage[]; expect(afterFlat.length).toBeLessThan(overflowing.length); // First message must be the summary-as-assistant-msg containing SUMMARY. @@ -367,7 +374,7 @@ describe('e2e full-session compaction', () => { } // Flat state is untouched. - const after = stateStore.get(flatMessagesKey(SID)) as AgentMessage[]; + const after = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SID)) as AgentMessage[]; expect(after.length).toBe(tinyMessages.length); // Summariser was never invoked. diff --git a/harness/tests/context-compaction/flat-state-key.test.ts b/harness/tests/context-compaction/flat-state-key.test.ts deleted file mode 100644 index 64ef249e..00000000 --- a/harness/tests/context-compaction/flat-state-key.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Drift-guard: the context-compaction worker writes flat session messages - * to a key it composes itself in `flat-state.ts::flatMessagesKey`, because - * importing `turn-orchestrator/state.ts::messagesKey` directly would - * create a package-layer dependency (the orchestrator depends on - * context-compaction via preflight). The two functions MUST agree forever - * — otherwise compaction silently writes the rewritten history to a - * shadow key the orchestrator never reads from. - * - * If this test fails, fix the key shape in flat-state.ts to match - * messagesKey, then take a hard look at whether the duplication should - * be lifted into a shared `runtime/keys.ts` module. - */ - -import { describe, expect, it } from 'vitest'; -import { flatMessagesKey } from '../../src/context-compaction/flat-state.js'; -import { messagesKey } from '../../src/turn-orchestrator/state.js'; - -describe('flatMessagesKey ↔ turn-orchestrator messagesKey', () => { - it('produces the identical key for any session id', () => { - for (const sid of ['s', 'console-abc', 'session-with-dashes-12345', 'x']) { - expect(flatMessagesKey(sid)).toBe(messagesKey(sid)); - } - }); -}); diff --git a/harness/tests/context-compaction/integration/flow-sync.test.ts b/harness/tests/context-compaction/integration/flow-sync.test.ts index 8ccd3d39..56f9cb06 100644 --- a/harness/tests/context-compaction/integration/flow-sync.test.ts +++ b/harness/tests/context-compaction/integration/flow-sync.test.ts @@ -8,6 +8,7 @@ * 3. Lease held → status === 'busy'. */ import { describe, expect, it, vi } from 'vitest'; +import { payloadStoreKey, stateStoreKey } from '../../_helpers/stateStoreKey.js'; import { handleSync } from '../../../src/context-compaction/handler-sync.js'; import type { ISdk } from '../../../src/runtime/iii.js'; import { loadFixture } from '../../fixtures/load.js'; @@ -91,29 +92,35 @@ function buildSyncMock(opts: { return { ok: true }; } if (function_id === 'state::get') { - const v = stateStore.get((payload as { key: string }).key); + const v = stateStore.get(payloadStoreKey(payload as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { - const p = payload as { key: string; value: unknown }; + const p = payload as { key: string; value: unknown; scope?: string }; + const storeKey = payloadStoreKey(p); if (p.value === null || p.value === undefined) { - stateStore.delete(p.key); + stateStore.delete(storeKey); } else { - stateStore.set(p.key, p.value); + stateStore.set(storeKey, p.value); } return { ok: true }; } if (function_id === 'state::update') { - const p = payload as { key: string; ops: Array<{ type: string; value?: unknown }> }; - const oldValue = stateStore.has(p.key) ? stateStore.get(p.key) : null; + const p = payload as { + key: string; + scope?: string; + ops: Array<{ type: string; value?: unknown }>; + }; + const storeKey = payloadStoreKey(p); + const oldValue = stateStore.has(storeKey) ? stateStore.get(storeKey) : null; let newValue: unknown = oldValue; for (const op of p.ops ?? []) { if (op.type === 'set') newValue = op.value; } if (newValue === null || newValue === undefined) { - stateStore.delete(p.key); + stateStore.delete(storeKey); } else { - stateStore.set(p.key, newValue); + stateStore.set(storeKey, newValue); } return { old_value: oldValue ?? null, new_value: newValue ?? null }; } @@ -215,8 +222,8 @@ describe('flow-sync: lease held → busy', () => { // Pre-populate the state store with an active lease for this session const sessionId = `${mediumFixture.session_id}-busy`; const stateStore = new Map(); - const leaseKey = `session/${sessionId}/compaction_lease`; - stateStore.set(leaseKey, { nonce: 'held-by-another', ts: Date.now() - 1000 }); + const leaseStoreKey = stateStoreKey('compaction_lease', sessionId); + stateStore.set(leaseStoreKey, { nonce: 'held-by-another', ts: Date.now() - 1000 }); const { iii } = buildSyncMock({ fixtureMessages, stateStore }); diff --git a/harness/tests/context-compaction/lease.test.ts b/harness/tests/context-compaction/lease.test.ts index 3f44b7d0..7fb40a80 100644 --- a/harness/tests/context-compaction/lease.test.ts +++ b/harness/tests/context-compaction/lease.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; +import { payloadStoreKey, stateStoreKey } from '../_helpers/stateStoreKey.js'; import { LEASE_TTL_SECS, - leaseKey, mintLeaseNonce, readLeaseTimestampSecs, acquireLease, @@ -9,10 +9,9 @@ import { } from '../../src/context-compaction/lease.js'; describe('lease helpers', () => { - it('leaseKey namespaces by session', () => { - const k = leaseKey('s9'); - expect(k).toContain('s9'); - expect(k).toContain('compaction_lease'); + it('stateStoreKey namespaces compaction lease by session', () => { + const k = stateStoreKey('compaction_lease', 's9'); + expect(k).toBe('compaction_lease/s9'); }); it('mintLeaseNonce produces unique values across rapid calls', () => { @@ -25,8 +24,8 @@ describe('lease helpers', () => { expect(readLeaseTimestampSecs({ nonce: 'a', ts: 1_700_000_000_000 })).toBe(1_700_000_000); }); - it('readLeaseTimestampSecs accepts legacy bare-int (seconds)', () => { - expect(readLeaseTimestampSecs(1_700_000_000)).toBe(1_700_000_000); + it('readLeaseTimestampSecs treats bare-int values as inactive', () => { + expect(readLeaseTimestampSecs(1_700_000_000)).toBe(0); }); it('readLeaseTimestampSecs returns 0 for garbage', () => { @@ -49,20 +48,21 @@ function makeStateIii() { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } if (function_id === 'state::update') { - const key = p['key'] as string; + const key = payloadStoreKey(p as { scope?: string; key?: string }); const ops = (p['ops'] ?? []) as Array<{ type: string; value?: unknown }>; const oldValue = store.has(key) ? store.get(key) : null; let newValue: unknown = oldValue; @@ -94,11 +94,11 @@ describe('lease kinds', () => { expect(nonce1).not.toBe(nonce2); }); - it('leaseKey produces different keys for compaction vs prune', () => { - const k1 = leaseKey('sess1', 'compaction'); - const k2 = leaseKey('sess1', 'prune'); - expect(k1).toContain('compaction_lease'); - expect(k2).toContain('prune_lease'); + it('stateStoreKey produces different keys for compaction vs prune', () => { + const k1 = stateStoreKey('compaction_lease', 'sess1'); + const k2 = stateStoreKey('prune_lease', 'sess1'); + expect(k1).toBe('compaction_lease/sess1'); + expect(k2).toBe('prune_lease/sess1'); expect(k1).not.toBe(k2); }); }); @@ -126,22 +126,23 @@ function makeRacyStateIii(writeLatencyMs: number) { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { await new Promise((r) => setTimeout(r, writeLatencyMs)); const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } if (function_id === 'state::update') { await new Promise((r) => setTimeout(r, writeLatencyMs)); - const key = p['key'] as string; + const key = payloadStoreKey(p as { scope?: string; key?: string }); const ops = (p['ops'] ?? []) as Array<{ type: string; value?: unknown }>; const oldValue = store.has(key) ? store.get(key) : null; let newValue: unknown = oldValue; @@ -171,15 +172,16 @@ function makeFailingUpdateIii() { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } diff --git a/harness/tests/context-compaction/turn-end-subscription.test.ts b/harness/tests/context-compaction/turn-end-subscription.test.ts new file mode 100644 index 00000000..7d2db72e --- /dev/null +++ b/harness/tests/context-compaction/turn-end-subscription.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it, vi } from 'vitest'; +import { register } from '../../src/context-compaction/register.js'; +import type { ISdk } from '../../src/runtime/iii.js'; + +describe('context-compaction stream subscription', () => { + it('subscribes to agent::turn_end, not the full agent::events firehose', async () => { + const registerTrigger = vi.fn(); + const iii = { + registerFunction: vi.fn(), + registerTrigger, + trigger: vi.fn(async () => null), + } as unknown as ISdk; + + await register(iii); + + const streamTriggers = registerTrigger.mock.calls + .map( + (c) => c[0] as { type?: string; function_id?: string; config?: { stream_name?: string } }, + ) + .filter( + (t) => t?.type === 'stream' && t?.function_id === 'context-compaction::on_agent_event', + ); + + expect(streamTriggers).toHaveLength(1); + expect(streamTriggers[0].config?.stream_name).toBe('agent::turn_end'); + }); +}); diff --git a/harness/tests/harness/fanout/sessions-poll.test.ts b/harness/tests/harness/fanout/sessions-poll.test.ts new file mode 100644 index 00000000..08ecf367 --- /dev/null +++ b/harness/tests/harness/fanout/sessions-poll.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it, vi } from 'vitest'; +import { spawnSessionsPoll } from '../../../src/harness/fanout/sessions-poll.js'; +import { FanoutState } from '../../../src/harness/ui-subscribe.js'; +import { TURN_STATE_SCOPE } from '../../../src/turn-orchestrator/state.js'; +import type { ISdk } from '../../../src/runtime/iii.js'; + +type Handler = (event: unknown) => Promise; + +// Session-create fanout watches scope `turn_state`. The state trigger has NO +// condition_function_id, so the engine hands every write on that scope to the +// handler — the handler is the sole gate. These tests hammer that gate and +// the registration shape. +function setup(subscribers: string[] = []) { + const handlers = new Map(); + const triggers: Array<{ type?: string; function_id?: string; config?: Record }> = + []; + const sent: Array<{ function_id: string; payload: unknown }> = []; + const iii = { + registerFunction: vi.fn((id: string, h: Handler) => { + handlers.set(id, h); + return { unregister() {} }; + }), + registerTrigger: vi.fn((t) => { + triggers.push(t); + return { unregister() {} }; + }), + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + sent.push(req); + return null; + }), + } as unknown as ISdk; + + const state = new FanoutState(); + for (const b of subscribers) state.subscribe(b, null); + spawnSessionsPoll(iii, state); + return { handlers, triggers, sent }; +} + +const createEvent = (over: Record = {}) => ({ + event_type: 'state:created' as const, + scope: TURN_STATE_SCOPE, + key: 'sess-1', + old_value: null, + new_value: { session_id: 'sess-1', state: 'provisioning' }, + message_type: 'state', + ...over, +}); + +function changedCalls(sent: Array<{ function_id: string; payload: unknown }>) { + return sent.filter((s) => s.function_id.startsWith('ui::sessions::changed::')); +} + +describe('spawnSessionsPoll registration (eliminates the per-write predicate RPC)', () => { + it('registers a scope-only turn_state trigger with NO condition_function_id and no predicate fn', () => { + const { handlers, triggers } = setup(); + + expect([...handlers.keys()]).not.toContain('harness::session::is_create_event'); + expect([...handlers.keys()]).toContain('harness::fanout::session_created'); + + const t = triggers.find((x) => x.function_id === 'harness::fanout::session_created'); + expect(t?.type).toBe('state'); + expect(t?.config?.scope).toBe(TURN_STATE_SCOPE); + expect(t?.config?.condition_function_id).toBeUndefined(); + }); +}); + +describe('session_created handler (sole gate)', () => { + it('fans out the new session id to every all-sessions subscriber', async () => { + const { handlers, sent } = setup(['b1', 'b2']); + const handler = handlers.get('harness::fanout::session_created'); + + await handler?.(createEvent({ key: 'sess-1' })); + + const changed = changedCalls(sent); + expect(changed.map((c) => c.function_id).sort()).toEqual([ + 'ui::sessions::changed::b1', + 'ui::sessions::changed::b2', + ]); + expect(changed[0]?.payload).toEqual({ added: ['sess-1'], removed: [] }); + }); + + it.each([ + ['state:updated (not a new session)', { event_type: 'state:updated' }], + // The dangerous one: a delete must NOT report the session as "added". + ['state:deleted (removed session)', { event_type: 'state:deleted', new_value: null }], + ['empty key', { key: '' }], + ])('does NOT fan out on %s', async (_label, over) => { + const { handlers, sent } = setup(['b1']); + const handler = handlers.get('harness::fanout::session_created'); + + await handler?.(createEvent(over)); + + expect(changedCalls(sent)).toHaveLength(0); + }); +}); diff --git a/harness/tests/harness/policy.test.ts b/harness/tests/harness/policy.test.ts index d5b95a30..06550262 100644 --- a/harness/tests/harness/policy.test.ts +++ b/harness/tests/harness/policy.test.ts @@ -562,7 +562,6 @@ describe('shipped iii-permissions.yaml', () => { 'auth::delete_token', 'run::start', 'router::stream_assistant', - 'router::abort', ]; it('kernel surfaces are denied unconditionally — hostile args cannot dodge them', async () => { diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts deleted file mode 100644 index 77d20819..00000000 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ /dev/null @@ -1,180 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { - clearApprovalResumeRegistry, - registerApprovalResume, -} from '../../src/turn-orchestrator/approval-resume.js'; -import { - handleAbortSignalWrite, - isAbortSignalWrite, -} from '../../src/turn-orchestrator/on-abort-signal.js'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; - -async function flushMicrotasks(): Promise { - await Promise.resolve(); - await Promise.resolve(); -} - -function fakeIii(): { - iii: ISdk; - wakeTriggers: Array<{ session_id: string; function_id: string }>; - stateStore: Map; -} { - const stateStore = new Map(); - const wakeTriggers: Array<{ session_id: string; function_id: string }> = []; - const handlers = new Map Promise>(); - - const iii = { - registerFunction: vi.fn((fnId: string, handler: (payload: unknown) => Promise) => { - handlers.set(fnId, handler); - return { unregister: vi.fn() }; - }), - trigger: vi.fn( - async ({ - function_id, - payload, - action, - }: { - function_id: string; - payload: unknown; - action?: unknown; - }) => { - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - const fullKey = `${p.scope}/${p.key}`; - const old_value = stateStore.get(fullKey) ?? null; - stateStore.set(fullKey, p.value); - if (p.scope === 'agent') { - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value: p.value, - message_type: 'state', - }; - if (isAbortSignalWrite(event)) { - queueMicrotask(() => { - void handleAbortSignalWrite(iii as unknown as ISdk, event); - }); - } - } - return null; - } - - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - - if (function_id.startsWith('turn::') && action != null) { - const p = payload as { session_id: string }; - wakeTriggers.push({ session_id: p.session_id, function_id }); - return null; - } - - const handler = handlers.get(function_id); - if (handler) { - await handler(payload); - return null; - } - - return null; - }, - ), - }; - - return { iii: iii as unknown as ISdk, wakeTriggers, stateStore }; -} - -describe('approval resume reactive trigger', () => { - afterEach(() => { - clearApprovalResumeRegistry(); - }); - - it('approval::resolve via resume fn automatically enqueues turn::{state}', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-x'); - rec.state = 'function_awaiting_approval'; - stateStore.set(`agent/${turnStateKey('sess-x')}`, rec); - registerApprovalResume(iii, 'sess-x', 'fc-1'); - - const out = await handleResolveRequest(iii, { - session_id: 'sess-x', - function_call_id: 'fc-1', - decision: 'allow', - }); - expect(out).toEqual({ ok: true }); - - await flushMicrotasks(); - - expect(wakeTriggers).toHaveLength(1); - expect(wakeTriggers[0]).toMatchObject({ - session_id: 'sess-x', - function_id: 'turn::function_awaiting_approval', - }); - }); - - it('writing session//abort_signal=true enqueues turn::{state}', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-abort'); - rec.state = 'assistant_streaming'; - stateStore.set(`agent/${turnStateKey('sess-abort')}`, rec); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-abort/abort_signal', - value: true, - }, - }); - - await flushMicrotasks(); - - expect(wakeTriggers).toHaveLength(1); - expect(wakeTriggers[0]).toMatchObject({ - session_id: 'sess-abort', - function_id: 'turn::assistant_streaming', - }); - }); - - it('writing session//abort_signal=false does NOT trigger (condition rejects clears)', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-clear'); - rec.state = 'function_execute'; - stateStore.set(`agent/${turnStateKey('sess-clear')}`, rec); - - await iii.trigger({ - function_id: 'state::set', - payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: true }, - }); - await flushMicrotasks(); - wakeTriggers.length = 0; - - await iii.trigger({ - function_id: 'state::set', - payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: false }, - }); - await flushMicrotasks(); - - expect(wakeTriggers).toHaveLength(0); - }); - - it('writing an unrelated agent-scope key does NOT trigger', async () => { - const { iii, wakeTriggers } = fakeIii(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-x/turn_state', - value: { state: 'function_execute' }, - }, - }); - await Promise.resolve(); - - expect(wakeTriggers).toHaveLength(0); - }); -}); diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index bac27500..b0552ab2 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -1,8 +1,9 @@ import { describe, expect, it, vi } from 'vitest'; import { TriggerAction } from '../../src/runtime/iii.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; +import { createTurnStore } from '../../src/turn-orchestrator/state-runtime/store.js'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; function fakeIii(): { iii: ISdk; @@ -30,8 +31,13 @@ function fakeIii(): { if (function_id === 'state::set') { const p = payload as { scope: string; key: string; value: unknown }; - stateStore.set(`${p.scope}/${p.key}`, structuredClone(p.value)); - return null; + const storeKey = `${p.scope}/${p.key}`; + const old_value = stateStore.has(storeKey) + ? structuredClone(stateStore.get(storeKey)) + : null; + const new_value = structuredClone(p.value); + stateStore.set(storeKey, new_value); + return { old_value, new_value }; } if (function_id === 'state::update') { @@ -55,10 +61,11 @@ function fakeIii(): { describe('saveRecord wake integration', () => { it('writing a new stepable turn_state enqueues turn::provisioning', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-a'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([ { @@ -71,12 +78,13 @@ describe('saveRecord wake integration', () => { it('subsequent transitions enqueue turn::{newState}', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-b'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); rec.state = 'assistant_streaming'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([ { @@ -94,32 +102,35 @@ describe('saveRecord wake integration', () => { it('parking in function_awaiting_approval does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-c'); rec.state = 'function_awaiting_approval'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); it('terminal stopped state does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-d'); rec.state = 'stopped'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); it('same-state re-save does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-e'); rec.state = 'function_execute'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); wakeInvocations.length = 0; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); @@ -127,31 +138,79 @@ describe('saveRecord wake integration', () => { function turnStateGets(iii: ISdk, session_id: string): number { const trigger = iii.trigger as unknown as { - mock: { calls: Array<[{ function_id: string; payload?: { key?: string } }]> }; + mock: { + calls: Array<[{ function_id: string; payload?: { scope?: string; key?: string } }]>; + }; }; return trigger.mock.calls.filter( - ([arg]) => arg.function_id === 'state::get' && arg.payload?.key === turnStateKey(session_id), + ([arg]) => + arg.function_id === 'state::get' && + arg.payload?.scope === TURN_STATE_SCOPE && + arg.payload?.key === session_id, ).length; } +describe('turn_state persistence (create-fanout source)', () => { + it('a newly-created session persists turn_state keyed by session id', async () => { + const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-new'); + rec.state = 'provisioning'; + + await store.saveRecord(rec); + + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-new`)).toBe(true); + }); + + it('a transition on an existing session updates the same turn_state key', async () => { + const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-x'); + rec.state = 'provisioning'; + await store.saveRecord(rec); + + rec.state = 'assistant_streaming'; + await store.saveRecord(rec); + + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-x`)).toBe(true); + expect((stateStore.get(`${TURN_STATE_SCOPE}/sess-x`) as { state: string }).state).toBe( + 'assistant_streaming', + ); + }); + + it('a threaded previous record (transition) keeps one turn_state entry', async () => { + const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); + const previous = newRecord('sess-y'); + previous.state = 'provisioning'; + const next = { ...previous, state: 'assistant_streaming' as const }; + + await store.saveRecord(next, previous); + + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-y`)).toBe(true); + }); +}); + describe('saveRecord read elimination (#5)', () => { - it('2-arg saveRecord reads turn_state exactly once (no double load)', async () => { + it('2-arg saveRecord does not pre-read turn_state (uses state::set old_value)', async () => { const { iii } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-r1'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); - expect(turnStateGets(iii, 'sess-r1')).toBe(1); + expect(turnStateGets(iii, 'sess-r1')).toBe(0); }); it('saveRecord with a threaded previous reads turn_state zero times', async () => { const { iii } = fakeIii(); + const store = createTurnStore(iii); const previous = newRecord('sess-r2'); previous.state = 'provisioning'; const next = { ...previous, state: 'assistant_streaming' as const }; - await persistence.saveRecord(iii, next, previous); + await store.saveRecord(next, previous); expect(turnStateGets(iii, 'sess-r2')).toBe(0); }); diff --git a/harness/tests/integration/parallel-approval-harness.ts b/harness/tests/integration/parallel-approval-harness.ts new file mode 100644 index 00000000..2fa5e5cc --- /dev/null +++ b/harness/tests/integration/parallel-approval-harness.ts @@ -0,0 +1,208 @@ +/** + * Integration harness for parallel approval flows: real TurnStore + runTransition, + * simulated iii state/streams, and dispatchWithHook routing. + */ + +import { vi } from 'vitest'; +import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; +import { + handleApprovalStateWrite, + handleAwaitingApproval, +} from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; +import { + TURN_STATE_SCOPE, + newRecord, + type TurnStateRecord, +} from '../../src/turn-orchestrator/state.js'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { AgentEvent } from '../../src/types/agent-event.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +export type ParallelApprovalHarness = { + iii: ISdk; + stateStore: Map; + emitted: AgentEvent[]; + loadTurnRecord(session_id: string): TurnStateRecord | null; + seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord; + runExecute(session_id: string): Promise; + resolveApproval( + session_id: string, + function_call_id: string, + decision: 'allow' | 'deny', + reason?: string | null, + ): Promise; +}; + +function makeAgentTriggerCall( + id: string, + functionId: string, + payload: unknown = {}, +): { type: 'function_call'; id: string; function_id: string; arguments: unknown } { + return { + type: 'function_call', + id, + function_id: 'agent_trigger', + arguments: { function: functionId, payload }, + }; +} + +export function makeAssistantWithCalls( + calls: Array<{ id: string; functionId: string; payload?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => makeAgentTriggerCall(c.id, c.functionId, c.payload ?? {})), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +async function flushMicrotasks(): Promise { + await Promise.resolve(); + await Promise.resolve(); +} + +async function runTurnStep(iii: ISdk, function_id: string, session_id: string): Promise { + const payload = { session_id }; + if (function_id === 'turn::function_execute') { + await runTransition(iii, 'function_execute', handleExecute, payload); + return; + } + if (function_id === 'turn::function_awaiting_approval') { + await runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, payload); + } +} + +export function createParallelApprovalHarness(): ParallelApprovalHarness { + const stateStore = new Map(); + const emitted: AgentEvent[] = []; + let eventSeq = 0; + + const iii = { + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + const v = stateStore.get(`${p.scope}/${p.key}`); + return v === undefined ? null : structuredClone(v); + } + + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + const storeKey = `${p.scope}/${p.key}`; + const old_value = stateStore.has(storeKey) + ? structuredClone(stateStore.get(storeKey)) + : null; + const new_value = structuredClone(p.value); + stateStore.set(storeKey, new_value); + if (p.scope === 'approvals') { + const event = { + event_type: old_value == null ? 'state:created' : 'state:updated', + scope: p.scope, + key: p.key, + old_value, + new_value, + message_type: 'state', + }; + await handleApprovalStateWrite(iii as unknown as ISdk, event); + } + return { old_value, new_value }; + } + + if (function_id === 'state::update') { + eventSeq += 1; + return { old_value: eventSeq - 1 }; + } + + if (function_id === 'stream::set') { + const p = payload as { data: AgentEvent }; + emitted.push(p.data); + return null; + } + + if (function_id === 'shell::run') { + return { + content: [{ type: 'text', text: 'ok' }], + details: {}, + terminate: false, + }; + } + + if (function_id.startsWith('turn::') && action != null) { + const p = payload as { session_id: string }; + await runTurnStep(iii as unknown as ISdk, function_id, p.session_id); + return null; + } + + return null; + }, + ), + } as unknown as ISdk; + + return { + iii, + stateStore, + emitted, + + loadTurnRecord(session_id: string): TurnStateRecord | null { + const raw = stateStore.get(`${TURN_STATE_SCOPE}/${session_id}`); + return raw ? (structuredClone(raw) as TurnStateRecord) : null; + }, + + seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord { + const rec = newRecord(session_id); + enterFunctionExecute(rec, assistant); + rec.state = 'function_execute'; + stateStore.set(`${TURN_STATE_SCOPE}/${session_id}`, structuredClone(rec)); + return rec; + }, + + async runExecute(session_id: string): Promise { + await runTurnStep(iii, 'turn::function_execute', session_id); + }, + + async resolveApproval( + session_id: string, + function_call_id: string, + decision: 'allow' | 'deny', + reason: null | string = null, + ): Promise { + const out = await handleResolveRequest(iii, { + session_id, + function_call_id, + decision, + reason, + }); + if (!out.ok) throw new Error(`approval::resolve failed: ${out.error}`); + await flushMicrotasks(); + }, + }; +} + +export function executionEvents( + emitted: AgentEvent[], + type: 'function_execution_start' | 'function_execution_end', + function_call_id?: string, +): AgentEvent[] { + return emitted.filter((event) => { + if (event.type !== type) return false; + if (!function_call_id) return true; + return 'function_call_id' in event && event.function_call_id === function_call_id; + }); +} diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts new file mode 100644 index 00000000..a13c5435 --- /dev/null +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -0,0 +1,175 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; +import { + createParallelApprovalHarness, + executionEvents, + makeAssistantWithCalls, +} from './parallel-approval-harness.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('parallel approval e2e', () => { + it('dispatches later calls while earlier ones park without blocking the batch', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'result', + result: { + content: [{ type: 'text' as const, text: 'fc-2-ok' }], + details: {}, + terminate: false, + }, + }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-parallel', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + { id: 'fc-3', functionId: 'shell::run' }, + ]), + ); + + await h.runExecute('sess-parallel'); + const rec = h.loadTurnRecord('sess-parallel'); + + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id).sort()).toEqual(['fc-1', 'fc-3']); + expect(rec?.work?.executed['fc-2']?.result.content[0]).toMatchObject({ text: 'fc-2-ok' }); + expect(rec?.work?.executed['fc-1']).toBeUndefined(); + expect(rec?.work?.executed['fc-3']).toBeUndefined(); + }); + + it('executes one approved call immediately while a sibling stays pending', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'result', + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, + }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-partial', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + { id: 'fc-3', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-partial'); + + const fc1StartsBefore = executionEvents(h.emitted, 'function_execution_start', 'fc-1'); + expect(fc1StartsBefore).toHaveLength(1); + await h.resolveApproval('sess-partial', 'fc-1', 'allow'); + + const rec = h.loadTurnRecord('sess-partial'); + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-3']); + expect(rec?.work?.executed['fc-1']).toBeDefined(); + expect(rec?.work?.executed['fc-3']).toBeUndefined(); + + expect(executionEvents(h.emitted, 'function_execution_start', 'fc-1')).toHaveLength(1); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); + }); + + it('resolves approvals out of order without waiting for batch order', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-order', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-order'); + + await h.resolveApproval('sess-order', 'fc-2', 'allow'); + let rec = h.loadTurnRecord('sess-order'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-1']); + expect(rec?.work?.executed['fc-2']).toBeDefined(); + expect(rec?.state).toBe('function_awaiting_approval'); + + await h.resolveApproval('sess-order', 'fc-1', 'allow'); + rec = h.loadTurnRecord('sess-order'); + expect(rec?.awaiting_approval).toEqual([]); + expect(rec?.state).toBe('steering_check'); + expect(rec?.work).toBeUndefined(); + }); + + it('denies one pending call without affecting an unresolved sibling', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-deny', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-deny'); + + await h.resolveApproval('sess-deny', 'fc-1', 'deny', 'operator rejected'); + + const rec = h.loadTurnRecord('sess-deny'); + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-2']); + expect(rec?.work?.executed['fc-1']?.is_error).toBe(true); + expect(rec?.work?.executed['fc-1']?.result.details).toMatchObject({ + approval_denied: true, + decision: 'deny', + reason: 'operator rejected', + }); + expect(rec?.work?.executed['fc-2']).toBeUndefined(); + }); + + it('is idempotent when the same decision wake is delivered twice', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute('sess-dup', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-dup'); + + await h.resolveApproval('sess-dup', 'fc-1', 'allow'); + const endsAfterFirst = executionEvents(h.emitted, 'function_execution_end', 'fc-1').length; + + await h.resolveApproval('sess-dup', 'fc-1', 'allow'); + const rec = h.loadTurnRecord('sess-dup'); + + expect(rec?.awaiting_approval).toEqual([]); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength( + endsAfterFirst, + ); + }); + + it('persists the decision and wakes function_awaiting_approval via approval::resolve', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute('sess-wake', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-wake'); + + expect(h.loadTurnRecord('sess-wake')?.state).toBe('function_awaiting_approval'); + + await h.resolveApproval('sess-wake', 'fc-1', 'allow'); + + expect(h.stateStore.get('approvals/sess-wake/fc-1')).toEqual({ + decision: 'allow', + reason: null, + }); + expect(h.loadTurnRecord('sess-wake')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-wake')?.work).toBeUndefined(); + }); +}); diff --git a/harness/tests/runtime/state-client.test.ts b/harness/tests/runtime/state-client.test.ts new file mode 100644 index 00000000..b1eab48d --- /dev/null +++ b/harness/tests/runtime/state-client.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { createState } from '../../src/runtime/state.js'; + +function makeIii(triggerImpl: (...args: unknown[]) => unknown): ISdk { + return { + trigger: vi.fn(triggerImpl), + } as unknown as ISdk; +} + +describe('createState', () => { + it('tolerant get returns null and does not throw on trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('backend down'); + }); + await expect(createState(iii).get({ scope: 's', key: 'k' })).resolves.toBeNull(); + }); + + it('strict get propagates trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('backend down'); + }); + await expect( + createState(iii, { tolerant: false }).get({ scope: 's', key: 'k' }), + ).rejects.toThrow('backend down'); + }); + + it('tolerant list returns [] on trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('list failed'); + }); + await expect(createState(iii).list({ scope: 's' })).resolves.toEqual([]); + }); + + it('strict list propagates trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('list failed'); + }); + await expect(createState(iii, { tolerant: false }).list({ scope: 's' })).rejects.toThrow( + 'list failed', + ); + }); + + it('get normalizes undefined to null', async () => { + const iii = makeIii(async () => undefined); + await expect(createState(iii).get({ scope: 's', key: 'missing' })).resolves.toBeNull(); + }); + + it('list parses flat arrays from state::list', async () => { + const rows = [{ id: 'a' }, { id: 'b' }]; + const iii = makeIii(async ({ function_id }: { function_id: string }) => { + if (function_id === 'state::list') return rows; + return null; + }); + await expect(createState(iii).list({ scope: 'agent' })).resolves.toEqual(rows); + }); +}); diff --git a/harness/tests/runtime/state-list.test.ts b/harness/tests/runtime/state-list.test.ts index 12d5bd2e..e043d382 100644 --- a/harness/tests/runtime/state-list.test.ts +++ b/harness/tests/runtime/state-list.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { parseStateListKeyedEntries, parseStateListValues } from '../../src/runtime/state.js'; +import { parseStateListValues } from '../../src/runtime/state.js'; describe('parseStateListValues', () => { it('accepts flat array (official iii shape)', () => { @@ -12,26 +12,9 @@ describe('parseStateListValues', () => { expect(parseStateListValues([{ value: inner }])).toEqual([inner]); }); - it('accepts { items: [...] } envelope', () => { - const inner = { id: 'm1' }; - expect(parseStateListValues({ items: [inner, { value: { id: 'm2' } }] })).toEqual([ - inner, - { id: 'm2' }, - ]); - }); - it('returns [] for non-array responses', () => { expect(parseStateListValues(null)).toEqual([]); expect(parseStateListValues({ ok: true })).toEqual([]); - }); -}); - -describe('parseStateListKeyedEntries', () => { - it('preserves key when present', () => { - expect( - parseStateListKeyedEntries({ - items: [{ key: 'session/s1/turn_state', value: { state: 'stopped' } }], - }), - ).toEqual([{ key: 'session/s1/turn_state', value: { state: 'stopped' } }]); + expect(parseStateListValues({ items: [{ id: 'm1' }] })).toEqual([]); }); }); diff --git a/harness/tests/session/inbox.test.ts b/harness/tests/session/inbox.test.ts index bc7a2d9c..a11280af 100644 --- a/harness/tests/session/inbox.test.ts +++ b/harness/tests/session/inbox.test.ts @@ -3,6 +3,6 @@ import { inboxKey } from '../../src/session/inbox/key.js'; describe('inboxKey', () => { it('namespaces by session and name', () => { - expect(inboxKey('steering', 's1')).toBe('session/s1/steering'); + expect(inboxKey('steering', 's1')).toBe('s1/steering'); }); }); diff --git a/harness/tests/session/tree/store.test.ts b/harness/tests/session/tree/store.test.ts index 78c71d03..e8a9fba2 100644 --- a/harness/tests/session/tree/store.test.ts +++ b/harness/tests/session/tree/store.test.ts @@ -17,7 +17,7 @@ function fakeIii(entries: SessionEntry[]): ISdk { return { trigger: async (req: { function_id: string }): Promise => { if (req.function_id === 'state::list') { - return { items: entries.map((e) => ({ value: e })) } as unknown as R; + return entries as unknown as R; } return null as unknown as R; }, diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts new file mode 100644 index 00000000..e1ff6b0f --- /dev/null +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -0,0 +1,39 @@ +import { vi, type Mock } from 'vitest'; +import type { RunRequest } from '../../../src/turn-orchestrator/run-request.js'; +import * as storeModule from '../../../src/turn-orchestrator/state-runtime/store.js'; +import type { TurnStore } from '../../../src/turn-orchestrator/state-runtime/store.js'; + +export const defaultRunRequest: RunRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent', + system_prompt: '', + function_schemas: [], +}; + +export type MockTurnStore = { + [K in keyof TurnStore]: TurnStore[K] extends (...args: infer A) => infer R + ? Mock<(...args: A) => R> + : TurnStore[K]; +}; + +export function mockTurnStore(overrides: Partial = {}): MockTurnStore { + return { + loadRecord: vi.fn(async () => null), + saveRecord: vi.fn(async () => {}), + writeRecord: vi.fn(async () => {}), + loadMessages: vi.fn(async () => []), + saveMessages: vi.fn(async () => {}), + appendMessages: vi.fn(async () => {}), + loadRunRequest: vi.fn(async () => defaultRunRequest), + saveRunRequest: vi.fn(async () => {}), + ...overrides, + } as MockTurnStore; +} + +/** Mock `createTurnStore` and return the store instance for assertions. */ +export function installMockTurnStore(overrides: Partial = {}): MockTurnStore { + const store = mockTurnStore(overrides); + vi.spyOn(storeModule, 'createTurnStore').mockReturnValue(store); + return store; +} diff --git a/harness/tests/turn-orchestrator/abort.test.ts b/harness/tests/turn-orchestrator/abort.test.ts deleted file mode 100644 index 753c9829..00000000 --- a/harness/tests/turn-orchestrator/abort.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { performAbortSideEffects } from '../../src/turn-orchestrator/abort.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -describe('performAbortSideEffects', () => { - it('sets the abort_signal flag', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(null); - - await performAbortSideEffects(iii, 's1'); - - const setCalls = triggers.filter((t) => t.function_id === 'state::set'); - expect( - setCalls.some( - (c) => (c.payload as Record).key === 'session/s1/abort_signal', - ), - ).toBe(true); - }); - - it('skips approval cleanup when record state is not function_awaiting_approval', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'assistant_streaming'; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - - await performAbortSideEffects(iii, 's1'); - - const approvalWrites = triggers - .filter((t) => t.function_id === 'state::set') - .map((t) => t.payload as Record) - .filter((p) => p.scope === 'approvals'); - expect(approvalWrites).toHaveLength(0); - expect(triggers.some((t) => t.function_id === 'approval::sweep_session')).toBe(false); - }); - - it('invokes resume fns with aborted decision when paused on approval', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'function_awaiting_approval'; - rec.awaiting_approval = [ - { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, - { function_call_id: 'fc-2', function_id: 'shell::run', args: {} }, - ]; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - - await performAbortSideEffects(iii, 's1'); - - const resumeTriggers = triggers.filter((t) => - t.function_id.startsWith('turn::approval_resume::'), - ); - expect(resumeTriggers).toHaveLength(2); - expect(resumeTriggers.map((t) => t.function_id).sort()).toEqual([ - 'turn::approval_resume::s1/fc-1', - 'turn::approval_resume::s1/fc-2', - ]); - for (const t of resumeTriggers) { - expect(t.payload).toMatchObject({ decision: 'aborted', reason: 'session_aborted' }); - } - - const approvalWrites = triggers - .filter((t) => t.function_id === 'state::set') - .map((t) => t.payload as Record) - .filter((p) => p.scope === 'approvals'); - expect(approvalWrites).toHaveLength(0); - - const publishes = triggers.filter((t) => t.function_id === 'iii::durable::publish'); - expect(publishes).toHaveLength(0); - }); -}); diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index ed73f488..5f76ef3c 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -1,6 +1,5 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { IIIInvocationError, type ISdk } from '../../src/runtime/iii.js'; -import type { DispatchResult } from '../../src/turn-orchestrator/agent-trigger.js'; import { TOOL_NAME, agentTriggerTool, @@ -29,29 +28,6 @@ describe('agent_trigger tool schema', () => { }); }); -describe('DispatchResult shape', () => { - it('result variant carries a FunctionResult', () => { - const r: DispatchResult = { - kind: 'result', - result: { content: [], details: {}, terminate: false }, - }; - expect(r.kind).toBe('result'); - }); - - it('deny variant carries a denial FunctionResult', () => { - const r: DispatchResult = { - kind: 'deny', - result: { content: [], details: { status: 'denied' }, terminate: false }, - }; - expect(r.kind).toBe('deny'); - }); - - it('pending variant carries no result', () => { - const r: DispatchResult = { kind: 'pending' }; - expect(r.kind).toBe('pending'); - }); -}); - describe('isErrorResult', () => { it('treats details.error as error', () => { expect( @@ -181,7 +157,7 @@ describe('dispatchWithHook returns DispatchResult', () => { expect(out.kind).toBe('pending'); }); - it('returns kind:deny on hard deny', async () => { + it('returns kind:result with denied details on hard deny', async () => { vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'deny', denial: { @@ -198,8 +174,8 @@ describe('dispatchWithHook returns DispatchResult', () => { function_id: 'shell::run', arguments: {}, }); - expect(out.kind).toBe('deny'); - if (out.kind === 'deny') { + expect(out.kind).toBe('result'); + if (out.kind === 'result') { expect(out.result.details).toMatchObject({ status: 'denied' }); } }); diff --git a/harness/tests/turn-orchestrator/approval-resume.test.ts b/harness/tests/turn-orchestrator/approval-resume.test.ts deleted file mode 100644 index 25001fb3..00000000 --- a/harness/tests/turn-orchestrator/approval-resume.test.ts +++ /dev/null @@ -1,243 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { approvalResumeFnId } from '../../src/approval-gate/schemas.js'; -import { - clearApprovalResumeRegistry, - recoverPendingApprovals, - registerApprovalResume, -} from '../../src/turn-orchestrator/approval-resume.js'; - -type RegisteredFn = { - fnId: string; - handler: (payload: unknown) => Promise; - unregister: ReturnType; -}; - -import { - newRecord, - turnStateKey, - type TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; - -function makeIiiWithRegistry( - stateStore = new Map(), - agentTurnStates: TurnStateRecord[] = [], -) { - const registered = new Map(); - const wakeCalls: Array<{ session_id: string; action?: unknown; function_id?: string }> = []; - - const iii = { - registerFunction: vi.fn((fnId: string, handler: (payload: unknown) => Promise) => { - const entry: RegisteredFn = { - fnId, - handler, - unregister: vi.fn(), - }; - registered.set(fnId, entry); - return { unregister: entry.unregister }; - }), - trigger: vi.fn( - async ({ - function_id, - payload, - action, - }: { - function_id: string; - payload: unknown; - action?: unknown; - }) => { - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - stateStore.set(`${p.scope}/${p.key}`, p.value); - return null; - } - if (function_id === 'state::list') { - return agentTurnStates; - } - if (function_id.startsWith('turn::') && function_id !== 'turn::on_abort_signal') { - const p = payload as { session_id: string }; - wakeCalls.push({ - session_id: p.session_id, - action, - function_id, - }); - return null; - } - return null; - }, - ), - } as unknown as ISdk; - - return { iii, registered, wakeCalls, stateStore }; -} - -afterEach(() => { - clearApprovalResumeRegistry(); - vi.restoreAllMocks(); -}); - -describe('registerApprovalResume', () => { - it('registers turn::approval_resume::s1/fc-1 on first call', () => { - const { iii, registered } = makeIiiWithRegistry(); - registerApprovalResume(iii, 's1', 'fc-1'); - expect(iii.registerFunction).toHaveBeenCalledWith( - 'turn::approval_resume::s1/fc-1', - expect.any(Function), - expect.objectContaining({ description: expect.any(String) }), - ); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(approvalResumeFnId('s1', 'fc-1')).toBe('turn::approval_resume::s1/fc-1'); - }); - - it('returns the same ref when registered twice', () => { - const { iii, registered } = makeIiiWithRegistry(); - const a = registerApprovalResume(iii, 's1', 'fc-1'); - const b = registerApprovalResume(iii, 's1', 'fc-1'); - expect(a).toBe(b); - expect(iii.registerFunction).toHaveBeenCalledTimes(1); - expect(registered.size).toBe(1); - }); -}); - -describe('approval resume handler', () => { - it('persists decision, enqueues turn::{state}, and unregisters', async () => { - const { iii, registered, wakeCalls, stateStore } = makeIiiWithRegistry(); - const rec = newRecord('s1'); - rec.state = 'function_awaiting_approval'; - stateStore.set(`agent/${turnStateKey('s1')}`, rec); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ decision: 'allow', reason: null }); - - expect(stateStore.get('approvals/s1/fc-1')).toEqual({ decision: 'allow', reason: null }); - expect(wakeCalls).toEqual([ - { - session_id: 's1', - function_id: 'turn::function_awaiting_approval', - action: TriggerAction.Enqueue({ queue: 'turn-step' }), - }, - ]); - expect(entry!.unregister).toHaveBeenCalled(); - }); - - it('does not overwrite an existing decision (idempotent persist)', async () => { - const { iii, registered, stateStore } = makeIiiWithRegistry(); - stateStore.set('approvals/s1/fc-1', { decision: 'aborted', reason: 'session_aborted' }); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ - decision: 'allow', - reason: null, - }); - expect(stateStore.get('approvals/s1/fc-1')).toEqual({ - decision: 'aborted', - reason: 'session_aborted', - }); - }); - - it('does not enqueue turn::{state} again after unregister on second invoke', async () => { - const { iii, registered, wakeCalls } = makeIiiWithRegistry(); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ decision: 'deny', reason: 'nope' }); - wakeCalls.length = 0; - - await entry.handler({ decision: 'allow', reason: null }); - expect(wakeCalls).toHaveLength(0); - }); -}); - -describe('recoverPendingApprovals', () => { - it('re-registers resume fns for sessions in function_awaiting_approval', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map(), [ - { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [ - { function_call_id: 'fc-1', function_id: 'tool::x', args: {} }, - { function_call_id: 'fc-2', function_id: 'tool::y', args: {} }, - ], - }, - { - session_id: 's2', - state: 'stopped', - turn_count: 0, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - }, - ]); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(registered.has('turn::approval_resume::s1/fc-2')).toBe(true); - expect(registered.has('turn::approval_resume::s2/fc-1')).toBe(false); - }); - - it('ignores non-turn_state agent scope values (null, messages, etc.)', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map(), [ - null, - { messages: [] }, - { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [{ function_call_id: 'fc-1', function_id: 'tool::x', args: {} }], - }, - ]); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(registered.size).toBe(1); - }); - - it('uses keyed list rows when state::list returns session//turn_state keys', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map()); - const listSpy = iii.trigger as ReturnType; - listSpy.mockImplementation(async ({ function_id }: { function_id: string }) => { - if (function_id === 'state::list') { - return { - items: [ - { key: 'session/s1/messages', value: [{ role: 'user', content: 'hi' }] }, - { - key: 'session/s1/turn_state', - value: { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [{ function_call_id: 'fc-9', function_id: 'tool::z', args: {} }], - }, - }, - ], - }; - } - return null; - }); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-9')).toBe(true); - expect(registered.size).toBe(1); - }); -}); diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts new file mode 100644 index 00000000..ff55b9c1 --- /dev/null +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + finalizeAssistantTurn, + prepareStreamContext, + resolveAssistantMessage, + routeAssistantTurn, + syntheticStreamReason, +} from '../../src/turn-orchestrator/assistant-streaming/run.js'; +import { + parseFunctionSchemas, + type AssistantStreamingPorts, +} from '../../src/turn-orchestrator/assistant-streaming/ports.js'; +import { isDuplicateAssistant } from '../../src/turn-orchestrator/state-runtime/transcript.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +function assistant(overrides: Partial = {}): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'gpt-4o', + provider: 'openai', + timestamp: 1, + ...overrides, + }; +} + +function stubStreamingPorts( + overrides: Partial = {}, +): AssistantStreamingPorts { + return { + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: 'sys', + function_schemas: [{ name: 'agent_trigger', description: 'd', parameters: {} }], + })), + runPreflight: vi.fn(async () => 'ok' as const), + streamTurn: vi.fn(async () => ({ final: null, error: null })), + emitMessageUpdate: vi.fn(async () => {}), + emitMessageComplete: vi.fn(async () => {}), + persistAssistantIfNew: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('parseFunctionSchemas', () => { + it('parses valid function schemas via AgentFunctionSchema', () => { + const tools = parseFunctionSchemas([ + { name: 'agent_trigger', description: 'trigger', parameters: { type: 'object' } }, + ]); + expect(tools).toHaveLength(1); + expect(tools[0]?.name).toBe('agent_trigger'); + }); +}); + +describe('prepareStreamContext', () => { + it('reloads messages when preflight compacts', async () => { + const loadMessages = vi + .fn() + .mockResolvedValueOnce([{ role: 'user', content: [], timestamp: 1 }]) + .mockResolvedValueOnce([{ role: 'user', content: [], timestamp: 2 }]); + const ports = stubStreamingPorts({ + loadMessages, + runPreflight: vi.fn(async () => 'compacted'), + }); + const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; + + const ctx = await prepareStreamContext(ports, rec); + + expect(loadMessages).toHaveBeenCalledTimes(2); + expect(ctx.messages).toEqual([{ role: 'user', content: [], timestamp: 2 }]); + expect(ctx.tools[0]?.name).toBe('agent_trigger'); + }); +}); + +describe('resolveAssistantMessage', () => { + it('returns the provider final message when present', () => { + const final = assistant({ content: [{ type: 'text', text: 'done' }] }); + const msg = resolveAssistantMessage( + { final, error: null, body_streamed: false }, + { provider: 'openai', model: 'gpt-4o' }, + ); + expect(msg).toEqual(final); + expect(syntheticStreamReason({ final, error: null, body_streamed: false })).toBeNull(); + }); + + it('builds a synthetic error when the stream ends without a final', () => { + const msg = resolveAssistantMessage( + { final: null, error: 'channel unavailable', body_streamed: false }, + { provider: 'openai', model: 'gpt-4o' }, + ); + expect(msg.stop_reason).toBe('error'); + expect(msg.error_message).toContain('channel unavailable'); + }); +}); + +describe('routeAssistantTurn', () => { + it('routes error assistants to stopped', () => { + expect(routeAssistantTurn(assistant({ stop_reason: 'error' })).kind).toBe('stopped'); + }); + + it('routes function_call content to function_execute', () => { + expect( + routeAssistantTurn( + assistant({ + content: [ + { type: 'function_call', id: 'fc-1', function_id: 'shell::run', arguments: {} }, + ], + }), + ).kind, + ).toBe('function_execute'); + }); + + it('routes text-only assistants to steering_check', () => { + expect(routeAssistantTurn(assistant()).kind).toBe('steering_check'); + }); +}); + +describe('finalizeAssistantTurn', () => { + it('stops without persisting on error assistant', async () => { + const ports = stubStreamingPorts(); + const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; + const asst = assistant({ stop_reason: 'error', error_message: 'auth failed' }); + + await finalizeAssistantTurn(ports, rec, asst); + + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(ports.persistAssistantIfNew).not.toHaveBeenCalled(); + }); + + it('persists and routes to function_execute when calls exist', async () => { + const ports = stubStreamingPorts(); + const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; + const asst = assistant({ + content: [{ type: 'function_call', id: 'fc-1', function_id: 'shell::run', arguments: {} }], + }); + + await finalizeAssistantTurn(ports, rec, asst); + + expect(ports.persistAssistantIfNew).toHaveBeenCalledOnce(); + expect(rec.state).toBe('function_execute'); + expect(rec.work?.prepared).toHaveLength(1); + expect(rec.function_results).toEqual([]); + }); +}); + +describe('isDuplicateAssistant', () => { + it('detects trailing assistant dup for re-entry', () => { + const asst = assistant({ timestamp: 42, model: 'm', provider: 'p' }); + expect(isDuplicateAssistant([asst], asst)).toBe(true); + expect(isDuplicateAssistant([], asst)).toBe(false); + }); +}); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index b218fd6d..b70e61e6 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -1,12 +1,10 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; -import { TOOL_NAME } from '../../src/turn-orchestrator/agent-trigger.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import * as preflightModule from '../../src/turn-orchestrator/preflight.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleFinished } from '../../src/turn-orchestrator/states/assistant-finished.js'; -import { handleStreaming } from '../../src/turn-orchestrator/states/assistant-streaming.js'; +import { handleStreaming } from '../../src/turn-orchestrator/assistant-streaming/process.js'; type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; @@ -45,10 +43,46 @@ function assistant(overrides: Partial = {}): AssistantMessage }; } +/** Build a fake iii whose createChannel delivers a single done event synchronously on stream.resume(). */ +function fakeIiiWithDone(finalMsg: AssistantMessage): { iii: ISdk; calls: TriggerCall[] } { + return fakeIii({ + createChannel: async () => { + let deliver: ((msg: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (msg: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + deliver?.(JSON.stringify({ type: 'done', message: finalMsg })); + }, + }, + }, + }; + }, + }); +} + afterEach(() => { vi.restoreAllMocks(); }); +function mockStreamingStore(overrides: Parameters[0] = {}) { + return installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + function_schemas: [], + })), + loadMessages: vi.fn(async () => []), + ...overrides, + }); +} + describe('handleStreaming turn start', () => { it('starts a normal assistant turn without approval::consume resurrection', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; @@ -57,73 +91,84 @@ describe('handleStreaming turn start', () => { throw new Error('channel unavailable'); }, }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); expect(rec.turn_count).toBe(1); - expect(rec.turn_end_emitted).toBe(false); - expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); - expect(calls.some((c) => c.function_id === 'stream::set')).toBe(false); - }); - - it('exhausts max_turns and transitions to tearing_down', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1', 2), - state: 'assistant_streaming', - turn_count: 2, - }; - const { iii, calls } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - - await handleStreaming(iii, rec); - - expect(rec.state).toBe('tearing_down'); + // createChannel failure → synthetic error → finalizeAssistant sets turn_end_emitted = true expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.content[0]).toEqual({ - type: 'text', - text: 'loop stopped: max_turns (2) reached', - }); - expect(saveSpy).toHaveBeenCalledOnce(); + expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); + // stream::set is called by emit(message_complete) and emit(turn_end) in the error path expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); }); describe('handleStreaming', () => { - it('transitions to assistant_finished with synthetic error when createChannel fails', async () => { + it('stops with a synthetic error when createChannel fails', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIii({ createChannel: async () => { throw new Error('channel unavailable'); }, }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); - expect(rec.state).toBe('assistant_finished'); + expect(rec.state).toBe('stopped'); expect(rec.last_assistant?.stop_reason).toBe('error'); expect(rec.last_assistant?.error_message).toContain('create_channel failed'); }); - it('captures provider done frame and transitions to assistant_finished', async () => { + it('streaming completion emits message_complete, persists, and routes to function_execute when calls exist', async () => { + const finalMsg = assistant({ + content: [ + { + type: 'function_call', + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls' }, + }, + ], + }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii, calls } = fakeIiiWithDone(finalMsg); + + const store = mockStreamingStore(); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + const appendSpy = store.appendMessages; + + await handleStreaming(iii, rec); + + // emitted message_complete via stream::set trigger + expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); + // assistant persisted + expect(appendSpy).toHaveBeenCalledOnce(); + // routed to function_execute (NOT assistant_finished) + expect(rec.state).toBe('function_execute'); + expect(rec.last_assistant).toEqual(finalMsg); + expect(rec.function_results).toEqual([]); + expect(rec.work?.prepared).toHaveLength(1); + }); + + it('routes to steering_check when the assistant made no calls', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); + + mockStreamingStore(); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + + await handleStreaming(iii, rec); + + expect(rec.state).toBe('steering_check'); + expect(rec.last_assistant).toEqual(finalMsg); + }); + + it('captures provider done frame and routes correctly (text-only → steering_check)', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); let deliver: ((msg: string) => void) | null = null; @@ -149,216 +194,61 @@ describe('handleStreaming', () => { }), }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); - expect(rec.state).toBe('assistant_finished'); + expect(rec.state).toBe('steering_check'); expect(rec.last_assistant).toEqual(finalMsg); }); -}); -describe('handleFinished', () => { - it('throws when last_assistant is missing', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_finished' }; - const { iii } = fakeIii(); - - await expect(handleFinished(iii, rec)).rejects.toThrow( - 'assistant_finished without last_assistant', - ); - }); + it('stops on an error assistant without persisting transcript', async () => { + const finalMsg = assistant({ stop_reason: 'error', error_message: 'auth failed' }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); - it('routes error assistant to tearing_down without persisting transcript', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ stop_reason: 'error', error_message: 'auth failed' }), - }; - const { iii } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const store = mockStreamingStore(); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + const appendSpy = store.appendMessages; - await handleFinished(iii, rec); + await handleStreaming(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); - expect(saveSpy).not.toHaveBeenCalled(); - }); - - it('routes text-only assistant to steering_check and persists message', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant(), - }; - const { iii } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('steering_check'); - expect(rec.pending_function_calls).toEqual([]); - expect(saveSpy).toHaveBeenCalledOnce(); + expect(appendSpy).not.toHaveBeenCalled(); }); - it('prepares function calls and transitions to function_execute', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - const saveExecutedSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); + it('does NOT duplicate the assistant message on re-entry', async () => { + const finalMsg = assistant({ + content: [ + { + type: 'function_call', + id: 'toolu_42', + function_id: 'shell::run', + arguments: { command: 'pwd' }, + }, + ], + }); + // Simulate re-entry: messages already contain the assistant message + let storedMessages: unknown[] = [finalMsg]; - expect(rec.state).toBe('function_execute'); - expect(rec.function_results).toEqual([]); - expect(rec.pending_function_calls).toEqual([ - { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - ]); - expect(saveExecutedSpy).toHaveBeenCalledWith(iii, 's1', []); - expect(savePreparedSpy).toHaveBeenCalledWith(iii, 's1', [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - blocked: null, - }, - ]); - }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); - it('does NOT duplicate the assistant message when handleFinished re-enters', async () => { - // Idempotency guard: a durable retry / crash-before-transitionTo can - // replay handleFinished with the same last_assistant. Re-pushing a - // tool-call assistant makes Anthropic reject the next request with - // "each tool_use must have a unique id". - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'toolu_42', - function_id: 'shell::run', - arguments: { command: 'pwd' }, - }, - ], + mockStreamingStore({ + loadMessages: vi.fn(async () => storedMessages as never), + appendMessages: vi.fn(async (_sid, msgs) => { + storedMessages = [...storedMessages, ...msgs]; }), - }; - const { iii } = fakeIii(); - let storedMessages: unknown[] = []; - vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); - vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { - storedMessages = msgs as never; }); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - await handleFinished(iii, rec); - // Re-entry: same record before the transition was durably observed. - rec.state = 'assistant_finished'; - await handleFinished(iii, rec); + await handleStreaming(iii, rec); const asstMsgs = (storedMessages as Array<{ role?: string }>).filter( (m) => m.role === 'assistant', ); expect(asstMsgs).toHaveLength(1); }); - - it('unwraps agent_trigger wrappers when preparing function calls', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-wrap', - function_id: TOOL_NAME, - arguments: { function: 'shell::run', payload: { command: 'ls' } }, - }, - { - type: 'function_call', - id: 'fc-direct', - function_id: 'shell::echo', - arguments: { text: 'hi' }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('function_execute'); - const prepared = savePreparedSpy.mock.calls[0]?.[2]; - expect(prepared).toEqual([ - { - function_call: { id: 'fc-wrap', function_id: 'shell::run', arguments: { command: 'ls' } }, - blocked: null, - }, - { - function_call: { id: 'fc-direct', function_id: 'shell::echo', arguments: { text: 'hi' } }, - blocked: null, - }, - ]); - }); - - it('blocks agent_trigger calls with missing or empty function at prepare time', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-bad', - function_id: TOOL_NAME, - arguments: { payload: { command: 'ls' } }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('function_execute'); - const prepared = savePreparedSpy.mock.calls[0]?.[2]; - expect(prepared?.[0]?.function_call).toEqual({ - id: 'fc-bad', - function_id: '', - arguments: { command: 'ls' }, - }); - expect(prepared?.[0]?.blocked?.details).toMatchObject({ error: 'missing_function' }); - }); }); diff --git a/harness/tests/turn-orchestrator/awaiting-approval.test.ts b/harness/tests/turn-orchestrator/awaiting-approval.test.ts deleted file mode 100644 index 8153d4d8..00000000 --- a/harness/tests/turn-orchestrator/awaiting-approval.test.ts +++ /dev/null @@ -1,121 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; - -function fakeIii(stateGetImpl: (scope: string, key: string) => unknown): ISdk { - return { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateGetImpl(p.scope, p.key); - } - if (function_id === 'state::set') return null; - return null; - }), - } as unknown as ISdk; -} - -function recordWith( - awaiting: { function_call_id: string; function_id: string; args: unknown }[], -): TurnStateRecord { - return { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - max_turns: undefined, - last_assistant: null, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: awaiting, - }; -} - -describe('handleAwaitingApproval', () => { - it('no-ops when any decision is missing', async () => { - const iii = fakeIii((_scope, _key) => null); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); - await handleAwaitingApproval(iii, rec); - expect(rec.state).toBe('function_awaiting_approval'); - expect(rec.awaiting_approval).toHaveLength(1); - }); - - it('marks prepared entries pre_approved on allow and transitions to function_execute', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'allow', reason: null }; - return null; - }); - const rec = recordWith([ - { function_call_id: 'fc-1', function_id: 'shell::run', args: { command: 'ls' } }, - ]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - blocked: null, - }, - ]); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - expect(rec.awaiting_approval).toEqual([]); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].pre_approved).toBe(true); - expect(savedArg[0].blocked).toBeNull(); - }); - - it('sets blocked denial result on deny and transitions to function_execute', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'deny', reason: 'policy' }; - return null; - }); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].pre_approved).toBeFalsy(); - expect(savedArg[0].blocked).toMatchObject({ - details: expect.objectContaining({ - approval_denied: true, - decision: 'deny', - reason: 'policy', - }), - }); - }); - - it('handles aborted decision like deny', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'aborted', reason: 'session_aborted' }; - return null; - }); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].blocked?.details).toMatchObject({ decision: 'aborted' }); - }); -}); diff --git a/harness/tests/turn-orchestrator/estimate.test.ts b/harness/tests/turn-orchestrator/estimate.test.ts deleted file mode 100644 index 00cdccc4..00000000 --- a/harness/tests/turn-orchestrator/estimate.test.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { estimateMessages } from '../../src/turn-orchestrator/estimate.js'; - -describe('estimateMessages', () => { - it('returns positive count for non-empty messages', () => { - expect( - estimateMessages([ - { role: 'user', content: [{ type: 'text', text: 'x'.repeat(100) }], timestamp: 0 }, - ]), - ).toBeGreaterThan(0); - }); - - it('returns 0 for empty array', () => { - expect(estimateMessages([])).toBe(0); - }); - - it('uses chars/4 heuristic', () => { - const msg = { role: 'user' as const, content: [] as never[], timestamp: 0 }; - const serialized = JSON.stringify(msg); - const expected = Math.floor(serialized.length / 4); - expect(estimateMessages([msg])).toBe(expected); - }); - - it('accumulates across multiple messages', () => { - const msgs = [ - { role: 'user' as const, content: [{ type: 'text' as const, text: 'hello' }], timestamp: 1 }, - { role: 'user' as const, content: [{ type: 'text' as const, text: 'world' }], timestamp: 2 }, - ]; - const single = estimateMessages(msgs.slice(0, 1)); - const both = estimateMessages(msgs); - expect(both).toBeGreaterThan(single); - }); -}); diff --git a/harness/tests/turn-orchestrator/events.test.ts b/harness/tests/turn-orchestrator/events.test.ts new file mode 100644 index 00000000..19895795 --- /dev/null +++ b/harness/tests/turn-orchestrator/events.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { emit } from '../../src/turn-orchestrator/events.js'; +import type { AgentEvent } from '../../src/types/agent-event.js'; + +function buildSdk() { + const calls: Array<{ function_id: string; payload: Record }> = []; + const trigger = vi.fn(async (req: { function_id: string; payload?: unknown }) => { + calls.push({ + function_id: req.function_id, + payload: (req.payload ?? {}) as Record, + }); + if (req.function_id === 'state::update') return { old_value: 0 }; + return {}; + }); + return { iii: { trigger } as unknown as ISdk, calls }; +} + +const SID = 'sess-1'; + +describe('emit (agent event producer)', () => { + it('writes a non-turn_end event only to agent::events', async () => { + const { iii, calls } = buildSdk(); + const event = { type: 'message_update' } as unknown as AgentEvent; + + await emit(iii, SID, event); + + const sets = calls.filter((c) => c.function_id === 'stream::set'); + expect(sets.map((c) => c.payload.stream_name)).toEqual(['agent::events']); + }); + + it('mirrors a turn_end event onto the dedicated agent::turn_end stream', async () => { + const { iii, calls } = buildSdk(); + const event = { + type: 'turn_end', + message: { role: 'assistant' }, + function_results: [], + } as unknown as AgentEvent; + + await emit(iii, SID, event); + + const sets = calls.filter((c) => c.function_id === 'stream::set'); + const streams = sets.map((c) => c.payload.stream_name); + expect(streams).toContain('agent::events'); + expect(streams).toContain('agent::turn_end'); + + const mirror = sets.find((c) => c.payload.stream_name === 'agent::turn_end'); + expect(mirror?.payload.group_id).toBe(SID); + expect(mirror?.payload.data).toEqual(event); + }); +}); diff --git a/harness/tests/turn-orchestrator/finish.test.ts b/harness/tests/turn-orchestrator/finish.test.ts new file mode 100644 index 00000000..3775f750 --- /dev/null +++ b/harness/tests/turn-orchestrator/finish.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { createTurnStatePorts } from '../../src/turn-orchestrator/state-runtime/ports.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; + +describe('TurnStatePorts.finishSession', () => { + it('emits agent_end with the transcript and sets state to stopped', async () => { + const messages = [ + { role: 'user' as const, content: [{ type: 'text' as const, text: 'hi' }], timestamp: 1 }, + ]; + installMockTurnStore({ loadMessages: vi.fn(async () => messages) }); + const emitted: Array<{ type: string; messages?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + if (req.function_id === 'stream::set') { + emitted.push((req.payload as { data: { type: string; messages?: unknown } }).data); + } + return null; + }), + } as unknown as ISdk; + + const rec = newRecord('s1'); + rec.state = 'steering_check'; + await createTurnStatePorts(iii).finishSession(rec); + + expect(rec.state).toBe('stopped'); + const agentEnd = emitted.find((e) => e.type === 'agent_end'); + expect(agentEnd).toBeDefined(); + expect(agentEnd?.messages).toEqual(messages); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts new file mode 100644 index 00000000..77675055 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; +import { handleApprovalStateWrite } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; + +const matchingEvent = { + event_type: 'state:created' as const, + scope: 'approvals' as const, + key: 'sess-abc/fc-1', + old_value: null, + new_value: { decision: 'allow', reason: null }, + message_type: 'state', +}; + +describe('ApprovalDecisionEventSchema', () => { + it('extracts session_id from the / key', () => { + expect(ApprovalDecisionEventSchema.parse(matchingEvent)).toEqual({ session_id: 'sess-abc' }); + }); +}); + +describe('handleApprovalStateWrite', () => { + it('enqueues turn::function_awaiting_approval on a decision write', async () => { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await handleApprovalStateWrite(iii, matchingEvent); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('no-ops on a non-matching event', async () => { + const iii = { trigger: vi.fn() } as unknown as ISdk; + await handleApprovalStateWrite(iii, { ...matchingEvent, new_value: { reason: 'x' } }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts new file mode 100644 index 00000000..4368ea46 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -0,0 +1,180 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import * as events from '../../src/turn-orchestrator/events.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; +import { + applyDecisionToPrepared, + denialResultFromDecision, +} from '../../src/turn-orchestrator/function-awaiting-approval/run.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }> = [], +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +function seedFunctionAwaitingApproval( + rec: TurnStateRecord, + work: FunctionBatchWork, + awaiting: Array<{ function_call_id: string; function_id: string; args?: unknown }>, + asst?: AssistantMessage, +): void { + enterFunctionExecute(rec, asst ?? makeAssistant()); + rec.work = work; + rec.awaiting_approval = awaiting.map((e) => ({ + function_call_id: e.function_call_id, + function_id: e.function_id, + args: e.args ?? {}, + })); + rec.state = 'function_awaiting_approval'; +} + +function makeIii(approvalStore: Map): ISdk { + return { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + if (req.function_id === 'state::get') { + const p = req.payload as { scope: string; key: string }; + return approvalStore.get(`${p.scope}/${p.key}`) ?? null; + } + if (req.function_id === 'state::update') return { old_value: 0 }; + if (req.function_id === 'stream::set') return null; + if (req.function_id === 'shell::run') { + return { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }; + } + return null; + }), + } as unknown as ISdk; +} + +describe('applyDecisionToPrepared', () => { + const dispatchCall = { + route: 'dispatch' as const, + call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + }; + + it('maps allow to pre_approved', () => { + expect(applyDecisionToPrepared(dispatchCall, { decision: 'allow', reason: null })).toEqual({ + route: 'pre_approved', + call: dispatchCall.call, + }); + }); + + it('maps deny to synthetic denial result', () => { + const resolved = applyDecisionToPrepared(dispatchCall, { decision: 'deny', reason: 'policy' }); + expect(resolved.route).toBe('synthetic'); + expect(resolved).toMatchObject({ + result: denialResultFromDecision({ decision: 'deny', reason: 'policy' }), + }); + }); +}); + +describe('handleAwaitingApproval', () => { + it('executes allow decision and finalizes when batch completes', async () => { + const approvalStore = new Map(); + approvalStore.set('approvals/s1/fc-1', { decision: 'allow', reason: null }); + const iii = makeIii(approvalStore); + const rec = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }; + seedFunctionAwaitingApproval( + rec, + { prepared: [{ route: 'dispatch', call: fc }], executed: {} }, + [{ function_call_id: 'fc-1', function_id: 'shell::run' }], + ); + + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleAwaitingApproval(iii, rec); + + expect(rec.awaiting_approval).toEqual([]); + expect(rec.state).toBe('steering_check'); + expect(rec.work).toBeUndefined(); + expect(rec.function_results).toHaveLength(1); + }); + + it('leaves state parked when awaiting entries remain undecided', async () => { + const iii = makeIii(new Map()); + const rec = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionAwaitingApproval( + rec, + { prepared: [{ route: 'dispatch', call: fc }], executed: {} }, + [{ function_call_id: 'fc-1', function_id: 'shell::run' }], + ); + installMockTurnStore(); + + await handleAwaitingApproval(iii, rec); + + expect(rec.state).toBe('function_awaiting_approval'); + expect(rec.awaiting_approval).toHaveLength(1); + }); + + it('returns to function_execute when approvals done but batch incomplete', async () => { + const approvalStore = new Map(); + approvalStore.set('approvals/s1/fc-2', { decision: 'deny', reason: null }); + const iii = makeIii(approvalStore); + const rec = newRecord('s1'); + const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; + const fc3 = { id: 'fc-3', function_id: 'shell::run', arguments: {} }; + seedFunctionAwaitingApproval( + rec, + { + prepared: [ + { route: 'dispatch', call: fc1 }, + { route: 'dispatch', call: fc2 }, + { route: 'dispatch', call: fc3 }, + ], + executed: { + 'fc-1': { + call: fc1, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }, + [{ function_call_id: 'fc-2', function_id: 'shell::run' }], + ); + installMockTurnStore(); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleAwaitingApproval(iii, rec); + + expect(rec.state).toBe('function_execute'); + expect(rec.awaiting_approval).toEqual([]); + expect(rec.work?.executed['fc-2']).toBeDefined(); + expect(rec.work?.executed['fc-3']).toBeUndefined(); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts new file mode 100644 index 00000000..fa6fc90b --- /dev/null +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -0,0 +1,220 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + missingFunctionResult, + unwrapAgentTrigger, +} from '../../src/turn-orchestrator/agent-trigger.js'; +import { + enterFunctionExecute, + finalizeBatch, + runOneCall, +} from '../../src/turn-orchestrator/function-execute/run.js'; +import { withRoutingEnvelope } from '../../src/turn-orchestrator/function-execute/ports.js'; +import type { FunctionExecutePorts } from '../../src/turn-orchestrator/function-execute/ports.js'; +import type { ExecutedCall } from '../../src/turn-orchestrator/function-execute/types.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +function stubPorts(overrides: Partial = {}): FunctionExecutePorts { + return { + emitStart: vi.fn(async () => {}), + emitEnd: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + dispatch: vi.fn(async () => ({ + kind: 'result' as const, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + })), + triggerPreApproved: vi.fn(async () => ({ + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + })), + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + ...overrides, + }; +} + +function preparedFromAssistant(asst: AssistantMessage) { + const rec = newRecord('s1'); + enterFunctionExecute(rec, asst); + return rec.work!.prepared; +} + +describe('batch planning from assistant', () => { + it('unwraps agent_trigger and maps empty function_id to synthetic', () => { + const batch = preparedFromAssistant( + makeAssistant([ + { + id: 'fc-1', + function_id: 'agent_trigger', + arguments: { function: 'shell::run', payload: { x: 1 } }, + }, + { id: 'fc-2', function_id: 'agent_trigger', arguments: {} }, + ]), + ); + + expect(batch[0]).toEqual({ + route: 'dispatch', + call: unwrapAgentTrigger({ + id: 'fc-1', + function_id: 'agent_trigger', + arguments: { function: 'shell::run', payload: { x: 1 } }, + }), + }); + expect(batch[1]).toMatchObject({ + route: 'synthetic', + result: missingFunctionResult(), + }); + }); + + it('maps non-agent_trigger function_id to synthetic error', () => { + const batch = preparedFromAssistant( + makeAssistant([{ id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }]), + ); + expect(batch[0]).toMatchObject({ + route: 'synthetic', + result: missingFunctionResult(), + }); + }); +}); + +describe('withRoutingEnvelope', () => { + it('merges routing fields without mutating the original call object', () => { + const call = { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }; + const augmented = withRoutingEnvelope(call, 'sess-1'); + expect(augmented.arguments).toMatchObject({ + command: 'ls', + session_id: 'sess-1', + function_call_id: 'fc-1', + function_id: 'shell::run', + }); + expect(call.arguments).toEqual({ command: 'ls' }); + }); +}); + +describe('runOneCall', () => { + it('replays end event only when call id is already executed', async () => { + const ports = stubPorts(); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const executed: Record = { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'cached' }], details: {} }, + is_error: false, + duration_ms: 10, + }, + }; + + const outcome = await runOneCall(ports, 's1', { route: 'dispatch', call: fc }, executed); + + expect(outcome.kind).toBe('skipped'); + expect(ports.emitStart).not.toHaveBeenCalled(); + expect(ports.emitEnd).toHaveBeenCalledOnce(); + expect(ports.dispatch).not.toHaveBeenCalled(); + }); + + it('returns pending without mutating executed map', async () => { + const ports = stubPorts({ + dispatch: vi.fn(async () => ({ kind: 'pending' as const })), + }); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const executed: Record = {}; + + const outcome = await runOneCall(ports, 's1', { route: 'dispatch', call: fc }, executed); + + expect(outcome.kind).toBe('pending'); + expect(executed).toEqual({}); + }); +}); + +describe('finalizeBatch', () => { + it('routes to stopped when every result terminates', async () => { + const ports = stubPorts(); + const rec = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { + content: [{ type: 'text' as const, text: 'bye' }], + details: {}, + terminate: true, + }, + is_error: false, + duration_ms: 1, + }, + }, + }; + await finalizeBatch(ports, rec); + + expect(rec.state).toBe('stopped'); + expect(ports.finishSession).toHaveBeenCalledOnce(); + }); + + it('skips duplicate function_result ids on re-entry', async () => { + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const appendMessages = vi.fn(async () => {}); + const ports = stubPorts({ + loadMessages: vi.fn(async () => [ + { + role: 'function_result' as const, + function_call_id: 'fc-1', + function_id: 'shell::run', + content: [{ type: 'text' as const, text: 'existing' }], + details: {}, + is_error: false, + timestamp: 1, + }, + ]), + appendMessages, + }); + const rec = newRecord('s1'); + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }; + await finalizeBatch(ports, rec); + + expect(appendMessages).not.toHaveBeenCalled(); + expect(rec.state).toBe('steering_check'); + }); +}); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index f2d76b44..54a36fe5 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -2,22 +2,47 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as events from '../../src/turn-orchestrator/events.js'; import * as hookModule from '../../src/turn-orchestrator/hook.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; -import * as approvalResumeModule from '../../src/turn-orchestrator/approval-resume.js'; -import { parseApprovalDecision } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; -import { handleExecute } from '../../src/turn-orchestrator/states/function-execute.js'; +import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; +import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { vi.restoreAllMocks(); }); function mockFinalizePersistence(): void { - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); +} + +/** Build a minimal AssistantMessage with the given function_call content blocks. */ +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; } describe('parseApprovalDecision', () => { @@ -53,58 +78,137 @@ describe('parseApprovalDecision', () => { }); }); -describe('handleExecute new flow', () => { - it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { - const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); - dispatchSpy.mockResolvedValueOnce({ kind: 'pending' }); - const registerResumeSpy = vi - .spyOn(approvalResumeModule, 'registerApprovalResume') - .mockReturnValue({ unregister: vi.fn() } as never); +/** Seed required function-batch invariants before handleExecute. */ +function seedFunctionExecute( + rec: TurnStateRecord, + work: FunctionBatchWork, + asst?: AssistantMessage, +): void { + enterFunctionExecute(rec, asst ?? makeAssistant([])); + rec.work = work; + rec.state = 'function_execute'; +} + +/** Wrap a target function id in the agent_trigger envelope (production shape). */ +function agentTriggerCall( + id: string, + functionId: string, + payload: unknown = {}, +): { id: string; function_id: string; arguments: unknown } { + return { id, function_id: 'agent_trigger', arguments: { function: functionId, payload } }; +} +describe('handleExecute new flow', () => { + it('runs the prepared batch from work', async () => { + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'result', + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + }); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); + enterFunctionExecute( + rec, + makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]), + ); rec.state = 'function_execute'; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, + mockFinalizePersistence(); + await handleExecute(iii, rec); + expect(rec.state).toBe('steering_check'); + expect(rec.function_results).toHaveLength(1); + expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); + }); + + it('finishes the session when every function result terminates', async () => { + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec: TurnStateRecord = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionExecute(rec, { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { + content: [{ type: 'text' as const, text: 'bye' }], + details: {}, + terminate: true, + }, + is_error: false, + duration_ms: 1, }, - blocked: null, }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + }); + mockFinalizePersistence(); + await handleExecute(iii, rec); - expect(rec.state).toBe('function_awaiting_approval'); - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.awaiting_approval?.[0]?.function_call_id).toBe('fc-1'); - expect(registerResumeSpy).toHaveBeenCalledWith(iii, 's1', 'fc-1'); + expect(rec.state).toBe('stopped'); + }); + + it('does not re-emit function_execution_start for already-executed calls on re-entry', async () => { + const emitted: Array<{ type: string; function_call_id?: string }> = []; + vi.spyOn(events, 'emit').mockImplementation(async (_iii, _sid, ev: never) => { + emitted.push(ev as { type: string; function_call_id?: string }); + }); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'result', + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, + }); + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec: TurnStateRecord = newRecord('s1'); + const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; + seedFunctionExecute(rec, { + prepared: [ + { route: 'dispatch', call: fc1 }, + { route: 'dispatch', call: fc2 }, + ], + executed: { + 'fc-1': { + call: fc1, + result: { + content: [{ type: 'text' as const, text: 'done' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 5, + }, + }, + }); + mockFinalizePersistence(); + + await handleExecute(iii, rec); + + const starts = emitted + .filter((e) => e.type === 'function_execution_start') + .map((e) => e.function_call_id); + expect(starts).toEqual(['fc-2']); + const fc1Ends = emitted.filter( + (e) => e.type === 'function_execution_end' && e.function_call_id === 'fc-1', + ); + expect(fc1Ends).toHaveLength(0); }); it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { const triggerSpy = vi.fn().mockResolvedValue({ ok: true }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, + seedFunctionExecute(rec, { + prepared: [ + { + route: 'pre_approved', + call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, }, - blocked: null, - pre_approved: true, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + ], + executed: {}, + }); const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); + mockFinalizePersistence(); await handleExecute(iii, rec); @@ -124,62 +228,52 @@ describe('handleExecute new flow', () => { }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::fs::write', - arguments: { content: 'Tue May 19 08:17:10 -03 2026\n' }, + seedFunctionExecute(rec, { + prepared: [ + { + route: 'pre_approved', + call: { + id: 'fc-1', + function_id: 'shell::fs::write', + arguments: { content: 'Tue May 19 08:17:10 -03 2026\n' }, + }, }, - blocked: null, - pre_approved: true, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + ], + executed: {}, + }); mockFinalizePersistence(); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); expect(rec.state).toBe('steering_check'); - expect(saveSpy).toHaveBeenCalled(); - // saveExecutedCalls is invoked twice: once with the synthesized error - // result, then once with `[]` as the idempotency guard clears executed - // calls at the end of finalize. Inspect the persisted-results call, not - // the trailing clear. - const savedResults = saveSpy.mock.calls - .map((c) => c[2] as Array<{ is_error: boolean; result: { details: unknown } }>) - .find((arr) => Array.isArray(arr) && arr.length > 0); - expect(savedResults?.[0]?.is_error).toBe(true); - const details = savedResults?.[0]?.result.details as Record; + expect(rec.function_results).toHaveLength(1); + expect(rec.function_results[0]?.is_error).toBe(true); + const details = rec.function_results[0]?.details as Record; expect(details?.status).toBe('denied'); expect(details?.denied_by).toBe('gate_unavailable'); expect(details?.function_id).toBe('shell::fs::write'); expect(String(details?.reason)).toContain('S210'); }); - it('emits denial result without dispatching when blocked is set', async () => { + it('emits denial result without dispatching when route is synthetic', async () => { const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - const denial = { content: [{ type: 'text' as const, text: 'denied' }], details: { approval_denied: true, decision: 'deny' as const }, terminate: false, }; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: denial, - pre_approved: false, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + seedFunctionExecute(rec, { + prepared: [ + { + route: 'synthetic', + call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: denial, + }, + ], + executed: {}, + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -190,33 +284,28 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('steering_check'); }); - it('replays persisted executed calls without re-dispatching', async () => { + it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.executed)', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'function_execute'; - const existingResult = { content: [{ type: 'text' as const, text: 'cached' }], details: {}, terminate: false, }; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: existingResult, - is_error: false, - duration_ms: 42, + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionExecute(rec, { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: existingResult, + is_error: false, + duration_ms: 42, + }, }, - ]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -237,59 +326,18 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; + enterFunctionExecute(rec, makeAssistant([agentTriggerCall('fc-1', 'shell::run')])); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); mockFinalizePersistence(); - await handleExecute(iii, rec); expect(rec.state).toBe('steering_check'); }); - it('transitions to steering_check when last_assistant is missing after execute', async () => { - const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'function_execute'; - rec.last_assistant = null; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, - }, - is_error: false, - duration_ms: 1, - }, - ]); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleExecute(iii, rec); - - expect(rec.state).toBe('steering_check'); - expect(rec.pending_function_calls).toEqual([]); - expect(rec.function_results).toHaveLength(1); - expect(emitSpy).not.toHaveBeenCalled(); - }); - it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'function_execute'; - rec.last_assistant = { + const asst: AssistantMessage = { role: 'assistant', content: [{ type: 'text', text: 'done' }], stop_reason: 'end', @@ -300,23 +348,30 @@ describe('handleExecute new flow', () => { provider: 'p', timestamp: 1, }; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionExecute( + rec, { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 1, + }, }, - is_error: false, - duration_ms: 1, }, - ]); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + asst, + ); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleExecute(iii, rec); @@ -327,40 +382,54 @@ describe('handleExecute new flow', () => { }); it('does NOT duplicate function_results in flat-state when handleExecute re-enters', async () => { - // Idempotency guard: a durable retry / step-fanout race can replay the - // finalize path with the same persisted executedCalls. Re-pushing the - // same function_result blocks makes Anthropic reject with "each tool_use - // must have a single result. Found multiple tool_result blocks with id". - const executed = [ - { - function_call: { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }, - result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, - is_error: false, - duration_ms: 5, - }, - ]; + const existingResult = { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }; + const fc = { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }; + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; + enterFunctionExecute( + rec, + makeAssistant([agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' })]), + ); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: executed[0].function_call, blocked: null }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue(executed); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); let storedMessages: unknown[] = []; - vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); - vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { - storedMessages = msgs as never; + installMockTurnStore({ + loadMessages: vi.fn(async () => storedMessages as never), + appendMessages: vi.fn(async (_sid, msgs) => { + storedMessages = [...storedMessages, ...msgs]; + }), }); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(events, 'emit').mockResolvedValue(undefined); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValue({ + kind: 'result', + result: existingResult, + }); await handleExecute(iii, rec); - // Re-entry: same persisted executedCalls, before the transition was - // durably observed. - rec.state = 'function_execute'; + + const asst = makeAssistant([agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' })]); + seedFunctionExecute( + rec, + { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + toolu_01: { + call: fc, + result: existingResult, + is_error: false, + duration_ms: 5, + }, + }, + }, + asst, + ); rec.turn_end_emitted = false; + await handleExecute(iii, rec); const fnResults = ( diff --git a/harness/tests/turn-orchestrator/get-state.test.ts b/harness/tests/turn-orchestrator/get-state.test.ts index 5c76220f..362f98da 100644 --- a/harness/tests/turn-orchestrator/get-state.test.ts +++ b/harness/tests/turn-orchestrator/get-state.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; import type { ISdk } from '../../src/runtime/iii.js'; import { execute } from '../../src/turn-orchestrator/get-state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; @@ -42,14 +43,30 @@ describe('GetStatePayloadSchema', () => { }); describe('turn::get_state execute', () => { - it('returns the turn_state record for a known session via persistence.loadRecord', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_awaiting_approval'; + it('returns a lean view for a known session (excludes work/last_assistant)', async () => { + const rec = { + ...newRecord('sess-abc', 5), + state: 'function_awaiting_approval' as const, + awaiting_approval: [{ function_call_id: 'c1', function_id: 'x::y', args: {} }], + last_assistant: { + role: 'assistant', + content: [], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }, + work: { batch: [], results: [] }, + }; const iii = { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { if ( req.function_id === 'state::get' && - (req.payload as Record).key === 'session/sess-abc/turn_state' + (req.payload as Record).scope === TURN_STATE_SCOPE && + (req.payload as Record).key === 'sess-abc' ) { return rec; } @@ -57,8 +74,14 @@ describe('turn::get_state execute', () => { }), } as unknown as ISdk; - const out = await execute(iii, { session_id: 'sess-abc' }); - expect(out).toEqual(rec); + const view: any = await execute(iii, { session_id: 'sess-abc' }); + expect(view.state).toBe('function_awaiting_approval'); + expect(view.awaiting_approval).toHaveLength(1); + expect(view.session_id).toBe('sess-abc'); + expect(view.turn_count).toBe(0); + expect(view.max_turns).toBe(5); + expect(view.work).toBeUndefined(); + expect(view.last_assistant).toBeUndefined(); }); it('returns null when no record exists for the session', async () => { diff --git a/harness/tests/turn-orchestrator/on-abort-signal.test.ts b/harness/tests/turn-orchestrator/on-abort-signal.test.ts deleted file mode 100644 index e185cea3..00000000 --- a/harness/tests/turn-orchestrator/on-abort-signal.test.ts +++ /dev/null @@ -1,219 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { - execute, - handleAbortSignalWrite, - isAbortSignalWrite, - parseAbortSignalWrite, -} from '../../src/turn-orchestrator/on-abort-signal.js'; -import { AbortSignalWriteEventSchema } from '../../src/turn-orchestrator/schemas.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -const matchingEvent = { - event_type: 'state:created' as const, - scope: 'agent' as const, - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true as const, - message_type: 'state', -}; - -describe('AbortSignalWriteEventSchema', () => { - it('accepts the agent state write shape from state::set / engine triggers', () => { - expect(AbortSignalWriteEventSchema.parse(matchingEvent)).toEqual({ - session_id: 'sess-abc', - }); - }); - - it('rejects durable publish envelope shapes (not a state trigger event)', () => { - expect(() => - AbortSignalWriteEventSchema.parse({ - topic: 'turn::step_requested', - data: { session_id: 's1' }, - }), - ).toThrow(); - }); - - it('rejects nested payload wrappers', () => { - expect(() => AbortSignalWriteEventSchema.parse({ payload: matchingEvent })).toThrow(); - expect(() => AbortSignalWriteEventSchema.parse({ data: matchingEvent })).toThrow(); - }); - - it('rejects missing key, wrong new_value, or non-abort_signal keys', () => { - expect(() => AbortSignalWriteEventSchema.parse({})).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - key: 'session/sess-abc/turn_state', - }), - ).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - new_value: false, - }), - ).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - event_type: 'state:deleted', - }), - ).toThrow(); - expect(() => AbortSignalWriteEventSchema.parse(null)).toThrow(); - }); -}); - -describe('parseAbortSignalWrite condition', () => { - it('matches session//abort_signal with new_value === true', () => { - expect(parseAbortSignalWrite(matchingEvent)).toEqual({ session_id: 'sess-abc' }); - expect(isAbortSignalWrite(matchingEvent)).toBe(true); - }); - - it('matches state:updated transitioning to true', () => { - const event = { - event_type: 'state:updated' as const, - scope: 'agent' as const, - key: 'session/sess-abc/abort_signal', - old_value: false, - new_value: true as const, - message_type: 'state', - }; - expect(parseAbortSignalWrite(event)).toEqual({ session_id: 'sess-abc' }); - }); - - it('skips state:deleted', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:deleted', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: null, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips writes that set the signal to false (idempotent clears)', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: false, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips non-abort_signal keys in the agent scope', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'function_execute' }, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips top-level non-session keys', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'harness/index/abc/last_session_id', - old_value: null, - new_value: 'sess-1', - message_type: 'state', - }), - ).toBeNull(); - }); -}); - -function mockIiiWithTurnState(rec: ReturnType): { - iii: ISdk; - triggers: Array<{ function_id: string; payload: unknown; action?: unknown }>; -} { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') return rec; - triggers.push(req); - return null; - }), - } as unknown as ISdk; - return { iii, triggers }; -} - -describe('execute', () => { - it('enqueues turn::{state} on the turn-step FIFO queue', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'assistant_streaming'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await execute(iii, { session_id: 'sess-abc' }); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('swallows enqueue failures (logs only, never rethrows)', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'provisioning'; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - throw new Error('durable down'); - }), - } as unknown as ISdk; - - await expect(execute(iii, { session_id: 'sess-abc' })).resolves.toBeUndefined(); - }); -}); - -describe('handleAbortSignalWrite', () => { - it('extracts session_id and enqueues turn::{state}', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_execute'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await handleAbortSignalWrite(iii, matchingEvent); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_execute'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('no-ops when key does not match the abort_signal pattern', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleAbortSignalWrite(iii, { - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: {}, - message_type: 'state', - }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); - - it('no-ops when new_value is not true (direct invoke bypasses engine condition)', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleAbortSignalWrite(iii, { - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: false, - message_type: 'state', - }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); -}); diff --git a/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts b/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts new file mode 100644 index 00000000..be83190b --- /dev/null +++ b/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from 'vitest'; +import { newRecord, parseTurnStateRecord } from '../../src/turn-orchestrator/state.js'; + +describe('parseTurnStateRecord', () => { + it('returns a valid record for a well-formed turn_state', () => { + const rec = newRecord('sess-1'); + expect(parseTurnStateRecord(rec)).toEqual(rec); + }); + + it('returns null for null, undefined, and primitives', () => { + expect(parseTurnStateRecord(null)).toBeNull(); + expect(parseTurnStateRecord(undefined)).toBeNull(); + expect(parseTurnStateRecord('nope')).toBeNull(); + expect(parseTurnStateRecord(42)).toBeNull(); + }); + + it('returns null when required identity fields are missing', () => { + expect(parseTurnStateRecord({ state: 'provisioning' })).toBeNull(); + expect(parseTurnStateRecord({ session_id: 's1' })).toBeNull(); + expect(parseTurnStateRecord({ messages: [] })).toBeNull(); + }); + + it('applies defaults for missing scalar fields on partial records', () => { + const parsed = parseTurnStateRecord({ + session_id: 's1', + state: 'provisioning', + }); + expect(parsed).toMatchObject({ + session_id: 's1', + state: 'provisioning', + turn_count: 0, + turn_end_emitted: false, + }); + }); +}); diff --git a/harness/tests/turn-orchestrator/persistence-prepared.test.ts b/harness/tests/turn-orchestrator/persistence-prepared.test.ts deleted file mode 100644 index 29035a08..00000000 --- a/harness/tests/turn-orchestrator/persistence-prepared.test.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import type { PreparedEntry } from '../../src/turn-orchestrator/persistence.js'; - -describe('PreparedEntry with pre_approved', () => { - it('accepts a pre_approved: true entry', () => { - const entry: PreparedEntry = { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - pre_approved: true, - }; - expect(entry.pre_approved).toBe(true); - }); - - it('defaults pre_approved to undefined', () => { - const entry: PreparedEntry = { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }; - expect(entry.pre_approved).toBeUndefined(); - }); -}); diff --git a/harness/tests/turn-orchestrator/provider-stream.test.ts b/harness/tests/turn-orchestrator/provider-stream.test.ts new file mode 100644 index 00000000..cf213001 --- /dev/null +++ b/harness/tests/turn-orchestrator/provider-stream.test.ts @@ -0,0 +1,168 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; +import type { ProviderStreamInput } from '../../src/types/provider.js'; +import type { AssistantMessageEvent } from '../../src/types/stream-event.js'; +import { + formatProviderError, + streamProviderTurn, +} from '../../src/turn-orchestrator/provider-stream.js'; + +function assistant(overrides: Partial = {}): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: 'hi' }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'gpt-4o', + provider: 'openai', + timestamp: 1, + ...overrides, + }; +} + +/** + * Fake iii: its channel delivers the given events (JSON-encoded) synchronously + * on `stream.resume()`, and `trigger` resolves to null unless `triggerRejects`. + */ +function fakeIii(opts: { + events?: unknown[]; + triggerRejects?: unknown; + createChannelThrows?: unknown; +}): ISdk { + return { + createChannel: async () => { + if (opts.createChannelThrows) throw opts.createChannelThrows; + let deliver: ((m: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (m: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + for (const e of opts.events ?? []) deliver?.(JSON.stringify(e)); + }, + }, + }, + }; + }, + trigger: async () => { + if (opts.triggerRejects) throw opts.triggerRejects; + return null; + }, + } as unknown as ISdk; +} + +const baseParams = { + session_id: 's1', + targetFn: 'provider::openai::stream', + buildInput: () => ({}) as unknown as ProviderStreamInput, + onDelta: async () => {}, +}; + +describe('streamProviderTurn', () => { + it('returns the done frame as the final message', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'done' }] }); + const iii = fakeIii({ events: [{ type: 'done', message: finalMsg }] }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toEqual(finalMsg); + expect(result.error).toBeNull(); + }); + + it('invokes onDelta per partial and tracks the latest before done', async () => { + const p1 = assistant({ content: [{ type: 'text', text: 'a' }] }); + const p2 = assistant({ content: [{ type: 'text', text: 'ab' }] }); + const finalMsg = assistant({ content: [{ type: 'text', text: 'abc' }] }); + const seen: AssistantMessageEvent[] = []; + const iii = fakeIii({ + events: [ + { type: 'text_delta', partial: p1, delta: 'a' }, + { type: 'text_delta', partial: p2, delta: 'b' }, + { type: 'done', message: finalMsg }, + ], + }); + + const result = await streamProviderTurn(iii, { + ...baseParams, + onDelta: async (_partial, event) => { + seen.push(event); + }, + }); + + expect(seen.map((e) => e.type)).toEqual(['text_delta', 'text_delta']); + expect(result.final).toEqual(finalMsg); + expect(result.error).toBeNull(); + }); + + it('surfaces the error frame as the final message', async () => { + const errMsg = assistant({ stop_reason: 'error', error_message: 'boom' }); + const iii = fakeIii({ events: [{ type: 'error', error: errMsg }] }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toEqual(errMsg); + expect(result.error).toBeNull(); + }); + + it('returns a cleaned error when the provider trigger rejects', async () => { + const iii = fakeIii({ triggerRejects: new Error('IIIInvocationError: upstream 500') }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toBeNull(); + expect(result.error).toBe('upstream 500'); + }); + + it('returns a create_channel error when the channel cannot be created', async () => { + const iii = fakeIii({ createChannelThrows: new Error('channel unavailable') }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toBeNull(); + expect(result.error).toContain('create_channel failed'); + }); + + it('skips undecodable frames and still completes on done', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'ok' }] }); + const onDelta = vi.fn(async () => {}); + const iii = { + createChannel: async () => { + let deliver: ((m: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (m: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + deliver?.('not json'); + deliver?.(JSON.stringify({ type: 'done', message: finalMsg })); + }, + }, + }, + }; + }, + trigger: async () => null, + } as unknown as ISdk; + + const result = await streamProviderTurn(iii, { ...baseParams, onDelta }); + + expect(result.final).toEqual(finalMsg); + expect(onDelta).not.toHaveBeenCalled(); + }); +}); + +describe('formatProviderError', () => { + it('strips iii invocation-error prefixes', () => { + expect(formatProviderError(new Error('IIIInvocationError: nope'))).toBe('nope'); + expect(formatProviderError(new Error('invocation_failed: nope'))).toBe('nope'); + expect(formatProviderError('plain string')).toBe('plain string'); + }); +}); diff --git a/harness/tests/turn-orchestrator/provisioning-layer.test.ts b/harness/tests/turn-orchestrator/provisioning-layer.test.ts new file mode 100644 index 00000000..a711c183 --- /dev/null +++ b/harness/tests/turn-orchestrator/provisioning-layer.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it, vi } from 'vitest'; +import { applyProvisioningOutcome } from '../../src/turn-orchestrator/provisioning/process.js'; +import { loadDefaultSkillBodies } from '../../src/turn-orchestrator/provisioning/load-skills.js'; +import type { ProvisioningPorts } from '../../src/turn-orchestrator/provisioning/ports.js'; +import { processProvisioning } from '../../src/turn-orchestrator/provisioning/process.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +function stubPorts(overrides: Partial = {}): ProvisioningPorts { + return { + defaultSkillUris: [], + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: '', + mode: null, + system_prompt: '', + function_schemas: [], + })), + saveRunRequest: vi.fn(async () => {}), + fetchSkillsIndex: vi.fn(async () => null), + fetchSkillBody: vi.fn(async () => null), + ...overrides, + }; +} + +describe('loadDefaultSkillBodies', () => { + it('fetches each URI and maps to DefaultSkillBody', async () => { + const fetchSkillBody = vi.fn(async (id: string) => + id === 'iii-directory/index' ? 'BODY' : null, + ); + const bodies = await loadDefaultSkillBodies({ fetchSkillBody }, ['iii://iii-directory/index']); + + expect(fetchSkillBody).toHaveBeenCalledWith('iii-directory/index'); + expect(bodies).toEqual([ + { uri: 'iii://iii-directory/index', id: 'iii-directory/index', body: 'BODY' }, + ]); + }); + + it('preserves null bodies for unavailable skills', async () => { + const bodies = await loadDefaultSkillBodies({ fetchSkillBody: vi.fn(async () => null) }, [ + 'iii://missing', + ]); + expect(bodies[0]?.body).toBeNull(); + }); +}); + +describe('processProvisioning', () => { + it('builds prompt with mode and attaches agent_trigger schema', async () => { + const ports = stubPorts({ + defaultSkillUris: [], + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4', + mode: 'agent', + system_prompt: '', + function_schemas: [], + })), + fetchSkillsIndex: vi.fn(async () => 'INDEX'), + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(outcome.kind).toBe('ready'); + expect(outcome.runRequest.system_prompt).toContain('operating in agent mode'); + expect(outcome.runRequest.system_prompt).toContain('INDEX'); + expect(outcome.runRequest.function_schemas).toEqual([ + expect.objectContaining({ name: 'agent_trigger' }), + ]); + }); + + it('preserves a non-empty caller override verbatim', async () => { + const ports = stubPorts({ + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: '', + mode: null, + system_prompt: 'custom override', + function_schemas: [], + })), + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(outcome.runRequest.system_prompt).toBe('custom override'); + }); +}); + +describe('applyProvisioningOutcome', () => { + it('saves run request and transitions to assistant_streaming', async () => { + const saveRunRequest = vi.fn(async () => {}); + const ports = stubPorts({ saveRunRequest }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + const runRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent' as const, + system_prompt: 'prompt', + function_schemas: [], + }; + + await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest }); + + expect(saveRunRequest).toHaveBeenCalledWith('s1', runRequest); + expect(rec.state).toBe('assistant_streaming'); + }); +}); diff --git a/harness/tests/turn-orchestrator/provisioning.test.ts b/harness/tests/turn-orchestrator/provisioning.test.ts index 251ec424..11ccb7b1 100644 --- a/harness/tests/turn-orchestrator/provisioning.test.ts +++ b/harness/tests/turn-orchestrator/provisioning.test.ts @@ -1,14 +1,11 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { TurnOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { defaultRunRequest, installMockTurnStore } from './_helpers/mockTurnStore.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; import { TurnStepPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; -import { - handleProvisioning, - parseDirectoryBody, - register, -} from '../../src/turn-orchestrator/states/provisioning.js'; +import { parseDirectoryBody } from '../../src/turn-orchestrator/provisioning/ports.js'; +import { handleProvisioning, register } from '../../src/turn-orchestrator/provisioning/process.js'; type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; @@ -57,28 +54,24 @@ describe('handleProvisioning', () => { }); const cfg = { system_default_skills: ['iii://iii-directory/index'] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4', - mode: 'agent', - system_prompt: '', + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + mode: 'agent', + })), }); - const saveSchemas = vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(rec.state).toBe('assistant_streaming'); - expect(saveSchemas).toHaveBeenCalledWith(iii, 's1', [ - expect.objectContaining({ name: 'agent_trigger' }), - ]); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ provider: 'openai', model: 'gpt-4', system_prompt: expect.stringContaining('operating in agent mode'), + function_schemas: [expect.objectContaining({ name: 'agent_trigger' })], }), ); expect(calls.some((c) => c.function_id === 'directory::skills::index')).toBe(true); @@ -90,19 +83,18 @@ describe('handleProvisioning', () => { const { iii } = fakeIii(); const cfg = { system_default_skills: [] as string[] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4', - mode: null, - system_prompt: 'custom override', + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + mode: null, + system_prompt: 'custom override', + })), }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ system_prompt: 'custom override' }), ); @@ -113,20 +105,20 @@ describe('handleProvisioning', () => { const { iii } = fakeIii(); const cfg = { system_default_skills: ['iii://missing'] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: '', - model: '', - mode: null, - system_prompt: '', + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + provider: '', + model: '', + mode: null, + })), }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(rec.state).toBe('assistant_streaming'); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ system_prompt: expect.stringContaining('You are an iii agent worker'), @@ -169,16 +161,17 @@ describe('register', () => { it('registers turn::provisioning, threads cfg into the runner, and returns metadata', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); - const loadRunRequest = vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: '', - model: '', - mode: null, - system_prompt: '', + const store = installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + provider: '', + model: '', + mode: null, + })), }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); - vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRecord = store.saveRecord; + const loadRunRequest = store.loadRunRequest; const { iii, getHandler, getId } = captureHandler(); register(iii, cfg); @@ -188,9 +181,8 @@ describe('register', () => { // cfg flows through to handleProvisioning (which reads the run request), // and the runner threads the pre-mutation snapshot into saveRecord. - expect(loadRunRequest).toHaveBeenCalledWith(iii, 's1'); + expect(loadRunRequest).toHaveBeenCalledWith('s1'); expect(saveRecord).toHaveBeenCalledWith( - iii, rec, expect.objectContaining({ state: 'provisioning' }), ); diff --git a/harness/tests/turn-orchestrator/run-request.test.ts b/harness/tests/turn-orchestrator/run-request.test.ts index 745557b8..f2ef28be 100644 --- a/harness/tests/turn-orchestrator/run-request.test.ts +++ b/harness/tests/turn-orchestrator/run-request.test.ts @@ -8,6 +8,7 @@ describe('parseRunRequest', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); }); @@ -17,6 +18,7 @@ describe('parseRunRequest', () => { model: 'gpt-4', mode: null, system_prompt: 'hi', + function_schemas: [], }); }); @@ -33,6 +35,26 @@ describe('parseRunRequest', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); }); + + it('treats null and undefined as empty run request', () => { + const empty = { + provider: '', + model: '', + mode: null, + system_prompt: '', + function_schemas: [], + }; + expect(parseRunRequest(null)).toEqual(empty); + expect(parseRunRequest(undefined)).toEqual(empty); + }); +}); + +describe('parseRunRequest function_schemas', () => { + it('defaults to [] and carries an array', () => { + expect(parseRunRequest({}).function_schemas).toEqual([]); + expect(parseRunRequest({ function_schemas: [{ name: 'x' }] }).function_schemas).toHaveLength(1); + }); }); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 7e08a4ad..937344ce 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -151,8 +151,8 @@ describe('execute', () => { const turnStateSet = calls.find( (c) => c.function_id === 'state::set' && - (c.payload as { scope?: string; key?: string }).scope === 'agent' && - (c.payload as { scope?: string; key?: string }).key === 'session/sess-1/turn_state', + (c.payload as { scope?: string; key?: string }).scope === 'turn_state' && + (c.payload as { scope?: string; key?: string }).key === 'sess-1', ); expect(turnStateSet).toBeDefined(); expect((turnStateSet?.payload as { value: { state: string } }).value.state).toBe( diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index b25761fb..b3c61e92 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -1,12 +1,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { TransientError } from '../../src/turn-orchestrator/errors.js'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; import { type TurnStateRecord, newRecord, transitionTo, } from '../../src/turn-orchestrator/state.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; afterEach(() => { vi.restoreAllMocks(); @@ -14,7 +16,7 @@ afterEach(() => { describe('runTransition', () => { it('throws when the session record is missing, without running the handler', async () => { - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(null); + installMockTurnStore({ loadRecord: vi.fn(async () => null) }); const handle = vi.fn(); await expect( @@ -25,22 +27,20 @@ describe('runTransition', () => { it('returns a stale skip without running the handler or saving', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec) }); const handle = vi.fn(); const result = await runTransition({} as ISdk, 'provisioning', handle, { session_id: 's1' }); expect(result).toEqual({ ok: true, skipped: true, reason: 'stale' }); expect(handle).not.toHaveBeenCalled(); - expect(saveRecord).not.toHaveBeenCalled(); + expect(store.saveRecord).not.toHaveBeenCalled(); }); it('runs the handler and threads the pre-mutation snapshot into saveRecord', async () => { const iii = {} as ISdk; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec) }); const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { transitionTo(r, 'assistant_streaming'); }); @@ -48,8 +48,7 @@ describe('runTransition', () => { const result = await runTransition(iii, 'provisioning', handle, { session_id: 's1' }); expect(handle).toHaveBeenCalledWith(iii, rec); - expect(saveRecord).toHaveBeenCalledWith( - iii, + expect(store.saveRecord).toHaveBeenCalledWith( rec, expect.objectContaining({ state: 'provisioning' }), ); @@ -64,10 +63,12 @@ describe('runTransition', () => { const iii = {} as ISdk; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'function_execute' }; rec.awaiting_approval = []; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); let captured: TurnStateRecord | null | undefined; - vi.spyOn(persistence, 'saveRecord').mockImplementation(async (_i, _r, previous) => { - captured = previous; + installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + saveRecord: vi.fn(async (_r, previous) => { + captured = previous; + }), }); const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { r.awaiting_approval?.push({ function_call_id: 'fc-1', function_id: 'f', args: {} }); @@ -76,23 +77,98 @@ describe('runTransition', () => { await runTransition(iii, 'function_execute', handle, { session_id: 's1' }); - // The snapshot reflects state BEFORE the handler ran, even though the - // handler mutated rec.awaiting_approval in place. expect(captured?.state).toBe('function_execute'); expect(captured?.awaiting_approval).toEqual([]); }); - it('wraps handler failures as transition errors tagged with the from-state', async () => { + it('routes an unexpected handler throw to failed without re-throwing', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const store = installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + loadMessages: vi.fn(async () => []), + }); const handle = vi.fn(async () => { throw new Error('boom'); }); + const result = await runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }); + expect(result).toMatchObject({ ok: true, to_state: 'failed' }); + expect(store.saveRecord).toHaveBeenCalled(); + }); +}); + +function fakeIii(record: unknown) { + const writes: Array<{ function_id: string; payload: any }> = []; + const iii = { + trigger: vi.fn(async ({ function_id, payload }: any) => { + writes.push({ function_id, payload }); + if ( + function_id === 'state::get' && + payload.scope === TURN_STATE_SCOPE && + payload.key === 's1' + ) { + return record; + } + return null; + }), + } as any; + return { iii, writes }; +} + +describe('runTransition error model', () => { + const base = { + session_id: 's1', + state: 'function_execute', + turn_count: 1, + function_results: [], + turn_end_emitted: false, + started_at_ms: 1, + updated_at_ms: 1, + }; + + it('routes an unexpected throw to failed and does not re-throw', async () => { + const { iii, writes } = fakeIii({ ...base }); + const res = await runTransition( + iii, + 'function_execute', + async () => { + throw new Error('boom'); + }, + { session_id: 's1' }, + ); + expect(res).toMatchObject({ ok: true, to_state: 'failed' }); + const saved = writes.find( + (w) => + w.function_id === 'state::set' && + w.payload.scope === TURN_STATE_SCOPE && + w.payload.key === 's1', + ); + expect(saved?.payload.value.state).toBe('failed'); + expect(saved?.payload.value.error.message).toContain('boom'); + const surfaced = writes.some( + (w) => + w.function_id === 'stream::set' && + w.payload.data?.type === 'message_complete' && + w.payload.data?.message?.stop_reason === 'error', + ); + expect(surfaced).toBe(true); + const ended = writes.some( + (w) => w.function_id === 'stream::set' && w.payload.data?.type === 'agent_end', + ); + expect(ended).toBe(true); + }); + + it('re-throws TransientError so the queue retries', async () => { + const { iii } = fakeIii({ ...base }); await expect( - runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }), - ).rejects.toThrow('transition from steering_check failed: Error: boom'); - expect(saveRecord).not.toHaveBeenCalled(); + runTransition( + iii, + 'function_execute', + async () => { + throw new TransientError('retry me'); + }, + { session_id: 's1' }, + ), + ).rejects.toThrow('retry me'); }); }); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 574b6d57..6d341a46 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,83 +1,102 @@ import { describe, expect, it } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import type { - AwaitingApprovalEntry, - TurnState, - TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; +import { TurnStateInvariantError } from '../../src/turn-orchestrator/errors.js'; +import { + parseAssistantStreamingRecord, + parseFunctionBatchRecord, + parseSteeringCheckRecord, +} from '../../src/turn-orchestrator/schemas.js'; import { - isTerminal, - messagesKey, + type TurnStateRecord, newRecord, transitionTo, - turnStateKey, } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; describe('TurnStateRecord', () => { - it('starts in provisioning', () => { + it('starts in provisioning with no work and the given max_turns', () => { const r = newRecord('s1', 32); expect(r.state).toBe('provisioning'); expect(r.session_id).toBe('s1'); expect(r.max_turns).toBe(32); - expect(isTerminal(r)).toBe(false); + expect(r.work).toBeUndefined(); }); it('transitionTo stopped marks terminal', () => { const r = newRecord('s1'); transitionTo(r, 'stopped'); - expect(isTerminal(r)).toBe(true); + expect(r.state).toBe('stopped'); + }); + + it('awaiting_approval defaults to undefined on fresh records', () => { + const rec: TurnStateRecord = newRecord('s1'); + expect(rec.awaiting_approval).toBeUndefined(); }); }); -describe('function_awaiting_approval state', () => { - it('accepts function_awaiting_approval as a TurnState value', () => { +describe('parseFunctionBatchRecord', () => { + const asst: AssistantMessage = { + role: 'assistant', + content: [], + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; + + it('returns a validated record when function-batch fields are present', () => { const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval' as TurnState); - expect(rec.state).toBe('function_awaiting_approval'); + enterFunctionExecute(rec, asst); + rec.state = 'function_execute'; + const batch = parseFunctionBatchRecord(rec); + expect(batch.work).toBeDefined(); + expect(batch.awaiting_approval).toEqual([]); }); - it('is non-terminal', () => { + it('throws TurnStateInvariantError when last_assistant is missing', () => { const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval' as TurnState); - expect(isTerminal(rec)).toBe(false); + rec.state = 'function_execute'; + rec.work = { prepared: [], executed: {} }; + rec.awaiting_approval = []; + expect(() => parseFunctionBatchRecord(rec)).toThrow(TurnStateInvariantError); }); }); -describe('awaiting_approval field', () => { - it('defaults to undefined on fresh records', () => { - const rec: TurnStateRecord = newRecord('s1'); - expect(rec.awaiting_approval).toBeUndefined(); +describe('parseAssistantStreamingRecord', () => { + it('returns a validated record for assistant_streaming', () => { + const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; + const streaming = parseAssistantStreamingRecord(rec); + expect(streaming.state).toBe('assistant_streaming'); + expect(streaming.function_results).toEqual([]); }); - it('accepts AwaitingApprovalEntry items', () => { - const rec: TurnStateRecord = newRecord('s1'); - const entry: AwaitingApprovalEntry = { - function_call_id: 'fc-1', - function_id: 'shell::run', - args: { command: 'ls' }, - }; - rec.awaiting_approval = [entry]; - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.awaiting_approval[0].function_call_id).toBe('fc-1'); + it('throws TurnStateInvariantError when session_id is missing', () => { + const rec = { state: 'assistant_streaming' } as TurnStateRecord; + expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); }); -}); -describe('handleAwaitingApproval with empty queue', () => { - it('advances to function_execute when awaiting_approval is empty', async () => { + it('throws TurnStateInvariantError when state is wrong', () => { const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval'); - rec.awaiting_approval = []; - - await handleAwaitingApproval({} as ISdk, rec); - - expect(rec.state).toBe('function_execute'); + rec.state = 'provisioning'; + expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); }); }); -describe('state keys', () => { - it('namespace by session', () => { - expect(turnStateKey('abc')).toBe('session/abc/turn_state'); - expect(messagesKey('abc')).toBe('session/abc/messages'); +describe('parseSteeringCheckRecord', () => { + it('returns a validated record for steering_check', () => { + const rec = newRecord('s1'); + rec.state = 'steering_check'; + const steering = parseSteeringCheckRecord(rec); + expect(steering.state).toBe('steering_check'); + expect(steering.function_results).toEqual([]); + }); + + it('throws TurnStateInvariantError when session_id is missing', () => { + const rec = { state: 'steering_check' } as TurnStateRecord; + expect(() => parseSteeringCheckRecord(rec)).toThrow(TurnStateInvariantError); }); }); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts new file mode 100644 index 00000000..78d99b67 --- /dev/null +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -0,0 +1,176 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { AgentMessage } from '../../src/types/agent-message.js'; +import { + applySteeringCheckOutcome, + processSteeringCheck, +} from '../../src/turn-orchestrator/steering-check/run.js'; +import { parseDrainItems } from '../../src/turn-orchestrator/steering-check/ports.js'; +import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +function userMessage(text: string): AgentMessage { + return { role: 'user', content: [{ type: 'text', text }] }; +} + +function stubPorts(overrides: Partial = {}): SteeringCheckPorts { + return { + drainInbox: vi.fn(async () => []), + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4', + mode: null, + system_prompt: '', + function_schemas: [], + })), + saveRunRequest: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + emit: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('parseDrainItems', () => { + it('returns items array when present', () => { + const items = [userMessage('hello')]; + expect(parseDrainItems({ items })).toEqual(items); + }); + + it('returns empty array for invalid shapes', () => { + expect(parseDrainItems(null)).toEqual([]); + expect(parseDrainItems({})).toEqual([]); + expect(parseDrainItems({ items: 'bad' })).toEqual([]); + }); +}); + +describe('processSteeringCheck', () => { + it('returns resume_with_inbox for steering messages', async () => { + const steeringItems = [userMessage('steer')]; + const ports = stubPorts({ + drainInbox: vi.fn(async (name) => (name === 'steering' ? steeringItems : [])), + }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'resume_with_inbox', inbox: steeringItems }); + expect(ports.drainInbox).toHaveBeenCalledTimes(1); + }); + + it('drains followup only when steering is empty', async () => { + const followupItems = [userMessage('follow')]; + const drainInbox = vi.fn(async (name: 'steering' | 'followup') => + name === 'followup' ? followupItems : [], + ); + const ports = stubPorts({ drainInbox }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'resume_with_inbox', inbox: followupItems }); + expect(drainInbox).toHaveBeenCalledWith('steering', 's1'); + expect(drainInbox).toHaveBeenCalledWith('followup', 's1'); + }); + + it('returns continue_after_function when function_results present', async () => { + const ports = stubPorts(); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'continue_after_function' }); + }); + + it('returns max_turns_reached when cap hit on continue path', async () => { + const ports = stubPorts(); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + max_turns: 2, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'max_turns_reached' }); + }); + + it('returns end_turn when no steering, followup, or function results', async () => { + const ports = stubPorts(); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'end_turn' }); + }); +}); + +describe('applySteeringCheckOutcome', () => { + it('resume_with_inbox: emits turn_end, saves messages, clears function_results', async () => { + const inbox = [userMessage('new')]; + const emitTurnEnd = vi.fn(async () => {}); + const appendMessages = vi.fn(async () => {}); + const ports = stubPorts({ + emitTurnEnd, + appendMessages, + }); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'resume_with_inbox', inbox }); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(rec.turn_end_emitted).toBe(true); + expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); + expect(appendMessages).toHaveBeenCalledWith('s1', inbox); + }); + + it('continue_after_function: transitions without loading messages', async () => { + const loadMessages = vi.fn(async () => []); + const emitTurnEnd = vi.fn(async () => {}); + const ports = stubPorts({ loadMessages, emitTurnEnd }); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + turn_end_emitted: true, + }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'continue_after_function' }); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(loadMessages).not.toHaveBeenCalled(); + expect(emitTurnEnd).not.toHaveBeenCalled(); + }); + + it('end_turn: emits turn_end and finishes session', async () => { + const emitTurnEnd = vi.fn(async () => {}); + const finishSession = vi.fn(async (rec) => { + rec.state = 'stopped'; + }); + const ports = stubPorts({ emitTurnEnd, finishSession }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'end_turn' }); + + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); + expect(finishSession).toHaveBeenCalled(); + }); +}); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index 28477cb1..a1fb9a14 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -2,13 +2,10 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AgentMessage } from '../../src/types/agent-message.js'; import * as events from '../../src/turn-orchestrator/events.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { - abortSignalKey, - newRecord, - type TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; -import { handleSteering, route } from '../../src/turn-orchestrator/states/steering-check.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import { handleSteering } from '../../src/turn-orchestrator/steering-check/process.js'; +import { route } from '../../src/turn-orchestrator/steering-check/run.js'; afterEach(() => { vi.restoreAllMocks(); @@ -16,16 +13,14 @@ afterEach(() => { describe('steering route()', () => { it.each([ - [true, true, true, true, 'abort'], - [true, false, false, false, 'abort'], - [false, true, true, true, 'steering'], - [false, true, false, false, 'steering'], - [false, false, true, true, 'followup'], - [false, false, true, false, 'followup'], - [false, false, false, true, 'continue_after_function'], - [false, false, false, false, 'end_turn'], - ] as const)('route(%s, %s, %s, %s) -> %s', (abort, has_steering, has_followup, has_function_results, expected) => { - expect(route(abort, has_steering, has_followup, has_function_results)).toBe(expected); + [true, true, true, 'steering'], + [true, false, false, 'steering'], + [false, true, true, 'followup'], + [false, true, false, 'followup'], + [false, false, true, 'continue_after_function'], + [false, false, false, 'end_turn'], + ] as const)('route(%s, %s, %s) -> %s', (has_steering, has_followup, has_function_results, expected) => { + expect(route(has_steering, has_followup, has_function_results)).toBe(expected); }); }); @@ -33,19 +28,13 @@ function userMessage(text: string): AgentMessage { return { role: 'user', content: [{ type: 'text', text }] }; } -function makeIii( - opts: { abort?: boolean; steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}, -) { - const { abort = false, steeringItems = [], followupItems = [] } = opts; +function makeIii(opts: { steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}) { + const { steeringItems = [], followupItems = [] } = opts; const drainCalls: Array<{ name: string; session_id: string }> = []; const iii = { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - if (req.function_id === 'state::get') { - const p = req.payload as { key: string }; - if (p.key.endsWith('/abort_signal')) return abort ? true : null; - return null; - } + if (req.function_id === 'state::get') return null; if (req.function_id === 'session-inbox::drain') { const p = req.payload as { name: string; session_id: string }; drainCalls.push(p); @@ -72,58 +61,14 @@ function steeringRec( } describe('handleSteering', () => { - it('abort: persists aborted assistant, emits turn_end, transitions to tearing_down', async () => { - const { iii } = makeIii({ abort: true }); - const rec = steeringRec('s1'); - const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('tearing_down'); - expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.stop_reason).toBe('aborted'); - expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); - expect(saveSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.arrayContaining([expect.objectContaining({ stop_reason: 'aborted' })]), - ); - expect(emitSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ - type: 'turn_end', - message: expect.objectContaining({ stop_reason: 'aborted' }), - }), - ); - }); - - it('abort: skips inbox drains', async () => { - const { iii, drainCalls } = makeIii({ - abort: true, - steeringItems: [userMessage('steer')], - followupItems: [userMessage('follow')], - }); - const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(drainCalls).toHaveLength(0); - }); - it('steering: appends drained messages and transitions to assistant_streaming', async () => { const steeringItems = [userMessage('steer-me')]; const { iii } = makeIii({ steeringItems }); const rec = steeringRec('s1', { function_results: [{ role: 'function_result', content: [] }] as never, }); - const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([userMessage('prior')]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const store = installMockTurnStore(); + const appendSpy = store.appendMessages; vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -131,23 +76,23 @@ describe('handleSteering', () => { expect(rec.state).toBe('assistant_streaming'); expect(rec.function_results).toEqual([]); expect(rec.turn_end_emitted).toBe(true); - expect(saveSpy).toHaveBeenCalledWith(iii, 's1', [userMessage('prior'), ...steeringItems]); - expect(loadSpy).toHaveBeenCalled(); + expect(appendSpy).toHaveBeenCalledWith('s1', steeringItems); + expect(store.loadMessages).not.toHaveBeenCalled(); }); it('followup: drains followup when steering queue is empty', async () => { const followupItems = [userMessage('follow-up')]; const { iii, drainCalls } = makeIii({ followupItems }); const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const store = installMockTurnStore(); + const appendSpy = store.appendMessages; vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); expect(rec.state).toBe('assistant_streaming'); expect(drainCalls.map((c) => c.name)).toEqual(['steering', 'followup']); - expect(saveSpy).toHaveBeenCalledWith(iii, 's1', followupItems); + expect(appendSpy).toHaveBeenCalledWith('s1', followupItems); }); it('followup: skipped when steering queue has items', async () => { @@ -156,8 +101,7 @@ describe('handleSteering', () => { followupItems: [userMessage('follow')], }); const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore(); vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -172,45 +116,96 @@ describe('handleSteering', () => { function_results: [{ role: 'function_result', content: [] }] as never, turn_end_emitted: true, }); - const loadSpy = vi.spyOn(persistence, 'loadMessages'); + const store = installMockTurnStore(); const emitSpy = vi.spyOn(events, 'emit'); await handleSteering(iii, rec); expect(rec.state).toBe('assistant_streaming'); expect(rec.function_results).toEqual([]); - expect(loadSpy).not.toHaveBeenCalled(); + expect(store.loadMessages).not.toHaveBeenCalled(); expect(emitSpy).not.toHaveBeenCalled(); }); - it('end_turn: emits turn_end once and transitions to tearing_down', async () => { + it('end_turn: emits turn_end then finishes the session (agent_end + stopped)', async () => { const { iii } = makeIii(); const rec = steeringRec('s1'); + installMockTurnStore({ loadMessages: vi.fn(async () => []) }); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - const loadSpy = vi.spyOn(persistence, 'loadMessages'); await handleSteering(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(loadSpy).not.toHaveBeenCalled(); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'agent_end' })); }); - it('reads abort via state::get on abort_signal key', async () => { - const { iii } = makeIii({ abort: true }); - const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); + it('caps at max_turns: emits a max_turns assistant + message_complete + turn_end and tears down instead of continuing', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1', { + max_turns: 2, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }); + const store = installMockTurnStore({ loadMessages: vi.fn(async () => []) }); + const appendSpy = store.appendMessages; + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); - expect(iii.trigger).toHaveBeenCalledWith( + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.content[0]).toEqual( + expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), + ); + expect(emitSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ type: 'message_complete' }), + ); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); + expect(store.loadMessages).toHaveBeenCalledWith('s1'); + expect(appendSpy).toHaveBeenCalledWith('s1', [ expect.objectContaining({ - function_id: 'state::get', - payload: { scope: 'agent', key: abortSignalKey('s1') }, + content: expect.arrayContaining([ + expect.objectContaining({ text: expect.stringContaining('max_turns') }), + ]), }), + ]); + }); + + it('caps at max_turns via steering route: tears down instead of continuing to assistant_streaming', async () => { + const { iii } = makeIii({ steeringItems: [userMessage('steer-me')] }); + const rec = steeringRec('s1', { + max_turns: 3, + turn_count: 3, + }); + installMockTurnStore({ loadMessages: vi.fn(async () => []) }); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.content[0]).toEqual( + expect.objectContaining({ text: expect.stringContaining('max_turns') }), ); }); + + it('continues to assistant_streaming when under max_turns (continue_after_function route)', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1', { + max_turns: 5, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }); + installMockTurnStore(); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + }); }); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts new file mode 100644 index 00000000..6712c714 --- /dev/null +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { + createTurnStore, + parseFlatMessages, + shouldWakeStep, +} from '../../src/turn-orchestrator/state-runtime/store.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +describe('parseFlatMessages', () => { + it('returns the array when messages are objects', () => { + const messages = [{ role: 'user', content: [], timestamp: 1 }]; + expect(parseFlatMessages(messages)).toEqual(messages); + }); + + it('returns [] for null, undefined, and non-arrays', () => { + expect(parseFlatMessages(null)).toEqual([]); + expect(parseFlatMessages(undefined)).toEqual([]); + expect(parseFlatMessages('bad')).toEqual([]); + expect(parseFlatMessages({})).toEqual([]); + }); +}); + +function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { + const emits: Array<{ session_id: string; event: unknown }> = []; + const iii = { + trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { + if (function_id === 'stream::set') { + const p = payload as { group_id: string; data: unknown }; + emits.push({ session_id: p.group_id, event: p.data }); + return null; + } + if (function_id === 'state::set') { + return { old_value: null, new_value: (payload as { value: unknown }).value }; + } + if (function_id === 'state::update') { + return { old_value: 0 }; + } + return null; + }), + } as unknown as ISdk; + return { iii, emits }; +} + +describe('saveRecord turn_state_changed emission', () => { + it('emits turn_state_changed on agent::events with group_id = session_id', async () => { + const { iii, emits } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + rec.state = 'function_awaiting_approval'; + const previous = { ...rec, state: 'function_execute' as const }; + + await store.saveRecord(rec, previous); + + expect(emits).toHaveLength(1); + expect(emits[0]?.session_id).toBe('sess-a'); + expect(emits[0]?.event).toMatchObject({ + type: 'turn_state_changed', + event_type: 'state:updated', + new_value: { state: 'function_awaiting_approval' }, + old_value: { state: 'function_execute' }, + }); + }); + + it('swallows emit failures (logs only, never rethrows)', async () => { + const iii = { + trigger: vi.fn(async () => { + throw new Error('stream::set down'); + }), + } as unknown as ISdk; + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + await expect(store.saveRecord(rec)).resolves.toBeUndefined(); + }); + + it('omits old_value from the emitted event when state:created', async () => { + const { iii, emits } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + rec.state = 'provisioning'; + + await store.saveRecord(rec); + + expect(emits).toHaveLength(1); + const event = emits[0]?.event as Record; + expect(event.type).toBe('turn_state_changed'); + expect(event.event_type).toBe('state:created'); + expect('old_value' in event).toBe(false); + }); +}); + +describe('shouldWakeStep', () => { + it('accepts first write to a stepable state', () => { + expect(shouldWakeStep(null, 'provisioning')).toBe(true); + }); + + it('accepts transitions to another stepable state', () => { + expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); + expect(shouldWakeStep('assistant_streaming', 'function_execute')).toBe(true); + }); + + it('rejects terminal state (stopped)', () => { + expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); + }); + + it('rejects function_awaiting_approval (orchestrator parks here)', () => { + expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); + }); + + it('rejects same-state writes', () => { + expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); + }); +}); diff --git a/harness/tests/turn-orchestrator/system-prompt.test.ts b/harness/tests/turn-orchestrator/system-prompt.test.ts index d7473af2..da7ac12f 100644 --- a/harness/tests/turn-orchestrator/system-prompt.test.ts +++ b/harness/tests/turn-orchestrator/system-prompt.test.ts @@ -1,35 +1,38 @@ import { describe, expect, it } from 'vitest'; -import { buildSystemPrompt, defaultSkillBody } from '../../src/turn-orchestrator/system-prompt.js'; +import { + buildSystemPrompt, + defaultSkillBody, + skillIdFromUri, +} from '../../src/turn-orchestrator/system-prompt.js'; describe('buildSystemPrompt', () => { it('non-empty override returns verbatim', () => { - expect(buildSystemPrompt([defaultSkillBody('iii://iii', 'body')], '/tmp', 'custom')).toBe( + expect(buildSystemPrompt([defaultSkillBody('iii://iii', 'body')], { override: 'custom' })).toBe( 'custom', ); }); it('empty override falls through to canonical assembly', () => { - const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'BODY')], '/tmp', ''); + const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'BODY')], { override: '' }); expect(out).toContain('You are an iii agent worker'); - expect(out).toContain('/tmp'); expect(out).toContain('BODY'); }); it('failed skill produces recovery stub with bare id', () => { - const out = buildSystemPrompt([defaultSkillBody('iii://iii', null)], null); + const out = buildSystemPrompt([defaultSkillBody('iii://iii', null)]); expect(out).toContain('# iii://iii'); expect(out).toContain('directory::skills::get { id: "iii" }'); }); it('preamble identity preserved', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('You are an iii agent worker.'); expect(out).toContain('agent_trigger'); expect(out).toContain('directory::skills::get'); }); it('preamble teaches the @fn() pill syntax', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('@fn()'); expect(out).toContain('@fn(directory::skills::get)'); }); @@ -39,22 +42,22 @@ describe('buildSystemPrompt', () => { // index straight to a function call, guess field names, and burn // turns on retries. The preamble must explicitly tell them to fetch // the per-function skill body first. - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('FIRST time'); expect(out).toContain('/'); expect(out).toContain('sandbox/exec'); }); it('skills appear in config order', () => { - const out = buildSystemPrompt( - [defaultSkillBody('iii://iii', 'AAA'), defaultSkillBody('iii://shell', 'BBB')], - null, - ); + const out = buildSystemPrompt([ + defaultSkillBody('iii://iii', 'AAA'), + defaultSkillBody('iii://shell', 'BBB'), + ]); expect(out.indexOf('AAA')).toBeLessThan(out.indexOf('BBB')); }); it('mode plan prepends planner paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'plan'); + const out = buildSystemPrompt([], { mode: 'plan' }); expect(out).toContain('operating in plan mode'); expect(out.indexOf('operating in plan mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -62,7 +65,7 @@ describe('buildSystemPrompt', () => { }); it('mode ask prepends ask paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'ask'); + const out = buildSystemPrompt([], { mode: 'ask' }); expect(out).toContain('operating in ask mode'); expect(out.indexOf('operating in ask mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -70,7 +73,7 @@ describe('buildSystemPrompt', () => { }); it('mode agent prepends agent paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'agent'); + const out = buildSystemPrompt([], { mode: 'agent' }); expect(out).toContain('operating in agent mode'); expect(out.indexOf('operating in agent mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -78,7 +81,7 @@ describe('buildSystemPrompt', () => { }); it('omitting mode preserves the canonical preamble verbatim (no mode paragraph)', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out.startsWith('You are an iii agent worker')).toBe(true); expect(out).not.toContain('operating in plan mode'); expect(out).not.toContain('operating in ask mode'); @@ -86,30 +89,23 @@ describe('buildSystemPrompt', () => { }); it('mode null behaves like omitted (backwards compat for non-console callers)', () => { - const out = buildSystemPrompt([], null, null, null); + const out = buildSystemPrompt([], { mode: null }); expect(out.startsWith('You are an iii agent worker')).toBe(true); expect(out).not.toContain('operating in'); }); it('non-empty override wins over mode (override returned verbatim)', () => { - const out = buildSystemPrompt([], '/tmp', 'custom-override', 'plan'); + const out = buildSystemPrompt([], { override: 'custom-override', mode: 'plan' }); expect(out).toBe('custom-override'); }); - it('mode interacts with cwd and skills: paragraph, preamble, cwd, skill body in order', () => { - const out = buildSystemPrompt( - [defaultSkillBody('iii://iii', 'SKILLBODY')], - '/work', - null, - 'agent', - ); + it('mode interacts with skills: paragraph, preamble, skill body in order', () => { + const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'SKILLBODY')], { mode: 'agent' }); const pAgent = out.indexOf('operating in agent mode'); const pIdentity = out.indexOf('You are an iii agent worker'); - const pCwd = out.indexOf('/work'); const pSkill = out.indexOf('SKILLBODY'); expect(pAgent).toBeLessThan(pIdentity); - expect(pIdentity).toBeLessThan(pCwd); - expect(pCwd).toBeLessThan(pSkill); + expect(pIdentity).toBeLessThan(pSkill); }); }); @@ -125,3 +121,10 @@ describe('defaultSkillBody', () => { expect(s.id).toBe('iii'); }); }); + +describe('skillIdFromUri', () => { + it('strips the iii:// scheme and passes bare ids through', () => { + expect(skillIdFromUri('iii://iii-directory/index')).toBe('iii-directory/index'); + expect(skillIdFromUri('iii-directory/index')).toBe('iii-directory/index'); + }); +}); diff --git a/harness/tests/turn-orchestrator/tearing-down.test.ts b/harness/tests/turn-orchestrator/tearing-down.test.ts deleted file mode 100644 index cc3ed045..00000000 --- a/harness/tests/turn-orchestrator/tearing-down.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import type { AgentMessage } from '../../src/types/agent-message.js'; -import * as events from '../../src/turn-orchestrator/events.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleTearingDown } from '../../src/turn-orchestrator/states/tearing-down.js'; - -type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; - -function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { - const calls: TriggerCall[] = []; - const iii = { - trigger: async (req: { - function_id: string; - payload: T; - timeoutMs?: number; - }): Promise => { - calls.push({ - function_id: req.function_id, - payload: req.payload, - timeoutMs: req.timeoutMs, - }); - return null as R; - }, - } as unknown as ISdk; - return { iii, calls }; -} - -afterEach(() => { - vi.restoreAllMocks(); -}); - -describe('handleTearingDown', () => { - it('transitions to stopped and emits agent_end with session messages', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'tearing_down' }; - const messages: AgentMessage[] = [{ role: 'user', content: 'hi' }]; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue(messages); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleTearingDown(iii, rec); - - expect(rec.state).toBe('stopped'); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', { type: 'agent_end', messages }); - }); -}); diff --git a/harness/tests/turn-orchestrator/turn-state-write.test.ts b/harness/tests/turn-orchestrator/turn-state-write.test.ts deleted file mode 100644 index 468500c1..00000000 --- a/harness/tests/turn-orchestrator/turn-state-write.test.ts +++ /dev/null @@ -1,62 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { emitTurnStateChanged } from '../../src/turn-orchestrator/turn-state-write.js'; - -function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { - const emits: Array<{ session_id: string; event: unknown }> = []; - const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'stream::set') { - const p = payload as { group_id: string; data: unknown }; - emits.push({ session_id: p.group_id, event: p.data }); - return null; - } - if (function_id === 'state::update') { - return { old_value: 0 }; - } - return null; - }), - } as unknown as ISdk; - return { iii, emits }; -} - -describe('emitTurnStateChanged', () => { - it('emits turn_state_changed on agent::events with group_id = session_id', async () => { - const { iii, emits } = fakeIii(); - await emitTurnStateChanged( - iii, - 'sess-a', - 'state:updated', - { state: 'function_awaiting_approval', awaiting_approval: [] }, - { state: 'function_execute', awaiting_approval: null }, - ); - expect(emits).toHaveLength(1); - expect(emits[0]?.session_id).toBe('sess-a'); - expect(emits[0]?.event).toMatchObject({ - type: 'turn_state_changed', - event_type: 'state:updated', - new_value: { state: 'function_awaiting_approval' }, - old_value: { state: 'function_execute' }, - }); - }); - - it('swallows emit failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('stream::set down'); - }), - } as unknown as ISdk; - await expect( - emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }), - ).resolves.toBeUndefined(); - }); - - it('omits old_value from the emitted event when state:created', async () => { - const { iii, emits } = fakeIii(); - await emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }); - expect(emits).toHaveLength(1); - const event = emits[0]?.event as Record; - expect(event.type).toBe('turn_state_changed'); - expect('old_value' in event).toBe(false); - }); -}); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts deleted file mode 100644 index 115fa729..00000000 --- a/harness/tests/turn-orchestrator/wake.test.ts +++ /dev/null @@ -1,94 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction } from '../../src/runtime/iii.js'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; -import { shouldWakeStep, wakeFromRecord, wakeState } from '../../src/turn-orchestrator/wake.js'; - -describe('shouldWakeStep', () => { - it('accepts first write to a stepable state', () => { - expect(shouldWakeStep(null, 'provisioning')).toBe(true); - }); - - it('accepts transitions to another stepable state', () => { - expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); - expect(shouldWakeStep('assistant_finished', 'function_execute')).toBe(true); - }); - - it('rejects terminal state (stopped)', () => { - expect(shouldWakeStep('tearing_down', 'stopped')).toBe(false); - }); - - it('rejects function_awaiting_approval (orchestrator parks here)', () => { - expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); - }); - - it('rejects same-state writes', () => { - expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); - }); -}); - -describe('wakeState', () => { - it('enqueues turn::{state} on the turn-step FIFO queue', async () => { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await wakeState(iii, 'sess-abc', 'assistant_streaming'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('swallows enqueue failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('queue down'); - }), - } as unknown as ISdk; - - await expect(wakeState(iii, 'sess-abc', 'provisioning')).resolves.toBeUndefined(); - }); -}); - -describe('wakeFromRecord', () => { - it('enqueues turn::{currentState} from persisted record', async () => { - const rec = newRecord('sess-x'); - rec.state = 'function_awaiting_approval'; - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') { - return rec; - } - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await wakeFromRecord(iii, 'sess-x'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-x' }); - }); - - it('no-ops when session is stopped', async () => { - const rec = newRecord('sess-y'); - rec.state = 'stopped'; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - return null; - }), - } as unknown as ISdk; - - await wakeFromRecord(iii, 'sess-y'); - expect(iii.trigger).toHaveBeenCalledTimes(1); - }); -}); diff --git a/iii-permissions.yaml b/iii-permissions.yaml index 1edcc4e3..d8112e04 100644 --- a/iii-permissions.yaml +++ b/iii-permissions.yaml @@ -25,7 +25,6 @@ rules: - '!oauth::openai-codex::login' - '!run::start' - '!router::stream_assistant' - - '!router::abort' # Read-only / introspection (extend below for your tools). - state::get diff --git a/shell/README.md b/shell/README.md index 887adc26..31187961 100644 --- a/shell/README.md +++ b/shell/README.md @@ -4,10 +4,6 @@ Unix shell and filesystem worker on the iii bus. Every agent that needs to touch the OS (run a build, read a file, list a directory, call a CLI) goes through `shell::*` and `shell::fs::*`, so allowlists, timeouts, output caps, and a host-root jail live in one place. Both surfaces accept an optional `target` field that forwards the call into a live `iii-sandbox` microVM, so the same allowlist policy gates host and sandbox execution. - -Host-targeted `shell::exec` is not an isolation boundary. The denylist is a regex tripwire on `argv.join(" ")`. A caller running an allowlisted interpreter (`sh`, `node`, `python3`) can construct any forbidden token at runtime and bypass it. For untrusted input, pass `target: { kind: "sandbox", sandbox_id }` so the call forwards into a microVM. Prefer `shell::fs::ls`, `shell::fs::stat`, and `shell::fs::grep` over `exec`-ing the same tools; the fs backends stay in-process, respect the jail, and return structured results. - - ## Install ```bash