diff --git a/README.md b/README.md index 108b00101..a697685b4 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,12 @@ Codex support is available as a dedicated plugin bundle in: It includes Codex hook wiring, slash command dispatch, and orchestration harness scripts compatible with the Babysitter SDK. +Shared Codex runtime policy now lives in `packages/sdk`, including: +- harness capability reporting +- shared model routing defaults for plan/interactive/execute/review/fix phases +- execution strategy resolution for local, subagent, and future cloud modes +- local subagent fan-out metadata for `orchestrator_task` + --- ## Quick Start diff --git a/packages/sdk/src/runtime/__tests__/intrinsics.behaviors.test.ts b/packages/sdk/src/runtime/__tests__/intrinsics.behaviors.test.ts index b99c89b10..6a084dfcb 100644 --- a/packages/sdk/src/runtime/__tests__/intrinsics.behaviors.test.ts +++ b/packages/sdk/src/runtime/__tests__/intrinsics.behaviors.test.ts @@ -172,4 +172,33 @@ describe("orchestrator task intrinsic", () => { return true; }); }); + + test("captures subagent routing hints and subtask metadata", async () => { + const { runDir, runId } = await createTestRun(tmpRoot); + const context = await buildTaskContext(runDir, runId); + const subtasks = [{ title: "plan" }, { title: "execute" }]; + await expect( + runOrchestratorTaskIntrinsic( + { op: "fanout" }, + context, + { executionMode: "subagent", modelPhase: "interactive", parallelism: 2, subtasks } + ) + ).rejects.toSatisfy((error) => { + expect(error).toBeInstanceOf(EffectRequestedError); + const action = (error as EffectRequestedError).action; + expect(action.taskDef.orchestratorTask).toMatchObject({ + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 2, + subtasks, + }); + expect(action.taskDef.metadata).toMatchObject({ + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 2, + subtaskCount: 2, + }); + return true; + }); + }); }); diff --git a/packages/sdk/src/runtime/__tests__/orchestrateIteration.integration.test.ts b/packages/sdk/src/runtime/__tests__/orchestrateIteration.integration.test.ts index 0106fb2a6..8790c4b5f 100644 --- a/packages/sdk/src/runtime/__tests__/orchestrateIteration.integration.test.ts +++ b/packages/sdk/src/runtime/__tests__/orchestrateIteration.integration.test.ts @@ -23,7 +23,25 @@ function writeProcessFile(dir: string, filename: string) { const echoTask = { id: "echo-task", async build(args) { - return { kind: "node", title: "echo", metadata: args }; + return { + kind: "orchestrator_task", + title: "echo", + metadata: { + value: args.value, + orchestratorTask: true, + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 2, + subtaskCount: 2, + }, + orchestratorTask: { + payload: { value: args.value }, + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 2, + subtasks: [{ title: "plan" }, { title: "execute" }], + }, + }; } }; @@ -58,7 +76,21 @@ describe("orchestrateIteration integration", () => { } const action = firstIteration.nextActions[0]; - expect(action.kind).toBe("node"); + expect(action.kind).toBe("orchestrator_task"); + expect(action.executionHints).toMatchObject({ + requestedMode: "subagent", + effectiveMode: "subagent", + modelPhase: "interactive", + model: "gpt-5.3-codex-spark", + parallelism: 2, + subtaskCount: 2, + }); + expect(firstIteration.metadata?.routedModelsByPhase).toEqual({ + interactive: "gpt-5.3-codex-spark", + }); + expect(firstIteration.metadata?.pendingEffectsByMode).toEqual({ + subagent: 1, + }); await commitEffectResult({ runDir, @@ -105,6 +137,7 @@ describe("orchestrateIteration integration", () => { if (waitingResult.status !== "waiting") { throw new Error("Expected waiting status"); } + expect(waitingResult.metadata?.pendingEffectsByMode).toEqual({ subagent: 1 }); await commitEffectResult({ runDir, diff --git a/packages/sdk/src/runtime/__tests__/strategyRuntime.test.ts b/packages/sdk/src/runtime/__tests__/strategyRuntime.test.ts new file mode 100644 index 000000000..14bc06c12 --- /dev/null +++ b/packages/sdk/src/runtime/__tests__/strategyRuntime.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from "vitest"; +import { detectHarnessCapabilities, resolveExecutionStrategy, resolveModelRoute } from ".."; +import { createDispatchEnvelope } from "../strategyDispatch"; +import type { TaskDef } from "../types"; + +describe("shared runtime strategy helpers", () => { + it("reports codex capabilities", () => { + expect(detectHarnessCapabilities("codex")).toMatchObject({ + requirementList: true, + explicitSkillInvocation: true, + sessionThreads: true, + symlinkSkillDiscovery: true, + approvalFlow: true, + subagentFanOut: true, + }); + }); + + it("routes interactive work to spark by default", () => { + expect(resolveModelRoute("interactive")).toEqual({ + phase: "interactive", + model: "gpt-5.3-codex-spark", + source: "default", + }); + }); + + it("keeps cloud requests local in PR 1", () => { + const taskDef: TaskDef = { + kind: "orchestrator_task", + orchestratorTask: { + executionMode: "cloud", + }, + }; + const strategy = resolveExecutionStrategy({ taskDef, harness: "codex" }); + expect(strategy.requestedMode).toBe("cloud"); + expect(strategy.effectiveMode).toBe("local"); + expect(strategy.reason).toBe("cloud-execution-not-enabled-in-pr1"); + }); + + it("builds a subagent dispatch envelope when subtasks are available", () => { + const taskDef: TaskDef = { + kind: "orchestrator_task", + orchestratorTask: { + executionMode: "subagent", + parallelism: 2, + subtasks: [{ title: "plan" }, { title: "review" }], + }, + }; + expect(createDispatchEnvelope({ taskDef, harness: "codex" })).toEqual({ + mode: "subagent", + promptTemplate: "subagent", + parallelism: 2, + subtasks: [{ title: "plan" }, { title: "review" }], + }); + }); +}); diff --git a/packages/sdk/src/runtime/capabilityReport.ts b/packages/sdk/src/runtime/capabilityReport.ts new file mode 100644 index 000000000..cceb05977 --- /dev/null +++ b/packages/sdk/src/runtime/capabilityReport.ts @@ -0,0 +1,50 @@ +import type { HarnessAdapter } from "../harness/types"; + +export interface HarnessCapabilityReport { + requirementList: boolean; + explicitSkillInvocation: boolean; + sessionThreads: boolean; + symlinkSkillDiscovery: boolean; + approvalFlow: boolean; + subagentFanOut: boolean; +} + +const DEFAULT_CAPABILITIES: HarnessCapabilityReport = { + requirementList: false, + explicitSkillInvocation: false, + sessionThreads: false, + symlinkSkillDiscovery: false, + approvalFlow: false, + subagentFanOut: false, +}; + +const CODEX_CAPABILITIES: HarnessCapabilityReport = { + requirementList: true, + explicitSkillInvocation: true, + sessionThreads: true, + symlinkSkillDiscovery: true, + approvalFlow: true, + subagentFanOut: true, +}; + +const HARNESS_CAPABILITIES: Record = { + codex: CODEX_CAPABILITIES, +}; + +export function resolveActiveHarnessName(env: NodeJS.ProcessEnv = process.env): string | undefined { + if (env.CODEX_THREAD_ID || env.CODEX_SESSION_ID || env.CODEX_PLUGIN_ROOT) { + return "codex"; + } + return undefined; +} + +export function detectHarnessCapabilities( + input?: string | Pick +): HarnessCapabilityReport { + const name = typeof input === "string" ? input : input?.name; + if (!name) { + return { ...DEFAULT_CAPABILITIES }; + } + const matched = HARNESS_CAPABILITIES[name]; + return matched ? { ...matched } : { ...DEFAULT_CAPABILITIES }; +} diff --git a/packages/sdk/src/runtime/executionStrategy.ts b/packages/sdk/src/runtime/executionStrategy.ts new file mode 100644 index 000000000..c10f4412a --- /dev/null +++ b/packages/sdk/src/runtime/executionStrategy.ts @@ -0,0 +1,115 @@ +import type { TaskDef } from "../tasks/types"; +import { detectHarnessCapabilities, type HarnessCapabilityReport } from "./capabilityReport"; +import { resolveModelPhase, resolveModelRoute, type ModelRoute } from "./modelRouting"; + +export type ExecutionMode = "local" | "subagent" | "cloud"; + +export interface ExecutionStrategy { + requestedMode: ExecutionMode; + effectiveMode: ExecutionMode; + reason: string; + parallelism?: number; + subtaskCount?: number; + modelRoute: ModelRoute; + capabilities: HarnessCapabilityReport; +} + +export function resolveExecutionStrategy(options: { + taskDef: TaskDef; + harness?: string; + capabilities?: HarnessCapabilityReport; + env?: NodeJS.ProcessEnv; +}): ExecutionStrategy { + const capabilities = options.capabilities ?? detectHarnessCapabilities(options.harness); + const requestedMode = resolveRequestedMode(options.taskDef); + const subtasks = options.taskDef.orchestratorTask?.subtasks ?? []; + const parallelism = resolveParallelism(options.taskDef); + const phase = resolveModelPhase(options.taskDef); + const modelRoute = resolveModelRoute(phase, options.env); + + if (requestedMode === "cloud") { + return { + requestedMode, + effectiveMode: "local", + reason: "cloud-execution-not-enabled-in-pr1", + parallelism, + subtaskCount: subtasks.length || undefined, + modelRoute, + capabilities, + }; + } + + if (requestedMode === "subagent") { + if (subtasks.length > 0 && capabilities.subagentFanOut) { + return { + requestedMode, + effectiveMode: "subagent", + reason: "subtasks-available-for-local-fanout", + parallelism, + subtaskCount: subtasks.length, + modelRoute, + capabilities, + }; + } + return { + requestedMode, + effectiveMode: "local", + reason: subtasks.length === 0 ? "subagent-mode-requested-without-subtasks" : "subagent-capability-unavailable", + parallelism, + subtaskCount: subtasks.length || undefined, + modelRoute, + capabilities, + }; + } + + return { + requestedMode, + effectiveMode: "local", + reason: "default-local-execution", + parallelism, + subtaskCount: subtasks.length || undefined, + modelRoute, + capabilities, + }; +} + +function resolveRequestedMode(taskDef: TaskDef): ExecutionMode { + const candidates = [ + taskDef.orchestratorTask?.executionMode, + getString(taskDef.metadata, "executionMode"), + ]; + const matched = candidates.find((value): value is ExecutionMode => isExecutionMode(value)); + return matched ?? "local"; +} + +function resolveParallelism(taskDef: TaskDef): number | undefined { + const direct = taskDef.orchestratorTask?.parallelism; + if (typeof direct === "number" && Number.isFinite(direct) && direct > 0) { + return Math.floor(direct); + } + const metadataValue = getNumber(taskDef.metadata, "parallelism"); + if (typeof metadataValue === "number" && Number.isFinite(metadataValue) && metadataValue > 0) { + return Math.floor(metadataValue); + } + return undefined; +} + +function isExecutionMode(value: string | undefined): value is ExecutionMode { + return value === "local" || value === "subagent" || value === "cloud"; +} + +function getString(record: unknown, key: string): string | undefined { + if (!record || typeof record !== "object" || Array.isArray(record)) { + return undefined; + } + const value = (record as Record)[key]; + return typeof value === "string" ? value : undefined; +} + +function getNumber(record: unknown, key: string): number | undefined { + if (!record || typeof record !== "object" || Array.isArray(record)) { + return undefined; + } + const value = (record as Record)[key]; + return typeof value === "number" ? value : undefined; +} diff --git a/packages/sdk/src/runtime/index.ts b/packages/sdk/src/runtime/index.ts index e499b878d..a3740795d 100644 --- a/packages/sdk/src/runtime/index.ts +++ b/packages/sdk/src/runtime/index.ts @@ -39,3 +39,7 @@ export type { export { hashInvocationKey } from "./invocation"; export { EffectRequestedError, EffectPendingError, ParallelPendingError, RunFailedError } from "./exceptions"; export { replaySchemaVersion } from "./constants"; +export { detectHarnessCapabilities } from "./capabilityReport"; +export { resolveModelPhase, resolveModelRoute, summarizeModelRoutes } from "./modelRouting"; +export { resolveExecutionStrategy } from "./executionStrategy"; +export { createDispatchEnvelope } from "./strategyDispatch"; diff --git a/packages/sdk/src/runtime/intrinsics/orchestratorTask.ts b/packages/sdk/src/runtime/intrinsics/orchestratorTask.ts index 78df92c07..f75995283 100644 --- a/packages/sdk/src/runtime/intrinsics/orchestratorTask.ts +++ b/packages/sdk/src/runtime/intrinsics/orchestratorTask.ts @@ -4,6 +4,10 @@ import { runTaskIntrinsic, TaskIntrinsicContext } from "./task"; interface OrchestratorTaskArgs { payload: T; label: string; + executionMode?: "local" | "subagent" | "cloud"; + modelPhase?: "plan" | "interactive" | "execute" | "review" | "fix"; + parallelism?: number; + subtasks?: Record[]; } const ORCHESTRATOR_TASK_ID = "__sdk.orchestratorTask"; @@ -17,6 +21,19 @@ const orchestratorTask: DefinedTask = { metadata: { payload: args?.payload, orchestratorTask: true, + executionMode: args.executionMode, + modelPhase: args.modelPhase, + parallelism: args.parallelism, + subtaskCount: args.subtasks?.length, + }, + orchestratorTask: { + payload: typeof args?.payload === "object" && args.payload !== null && !Array.isArray(args.payload) + ? (args.payload as Record) + : undefined, + executionMode: args.executionMode, + modelPhase: args.modelPhase, + parallelism: args.parallelism, + subtasks: args.subtasks, }, }; }, @@ -25,13 +42,25 @@ const orchestratorTask: DefinedTask = { export function runOrchestratorTaskIntrinsic( payload: TPayload, context: TaskIntrinsicContext, - options?: TaskInvokeOptions + options?: TaskInvokeOptions & { + executionMode?: "local" | "subagent" | "cloud"; + modelPhase?: "plan" | "interactive" | "execute" | "review" | "fix"; + parallelism?: number; + subtasks?: Record[]; + } ): Promise { const label = options?.label ?? "orchestrator-task"; const invokeOptions = { ...options, label }; return runTaskIntrinsic({ task: orchestratorTask as DefinedTask, TResult>, - args: { payload, label }, + args: { + payload, + label, + executionMode: options?.executionMode, + modelPhase: options?.modelPhase, + parallelism: options?.parallelism, + subtasks: options?.subtasks, + }, invokeOptions, context, }); diff --git a/packages/sdk/src/runtime/intrinsics/task.ts b/packages/sdk/src/runtime/intrinsics/task.ts index 6c44bc4e7..883dede99 100644 --- a/packages/sdk/src/runtime/intrinsics/task.ts +++ b/packages/sdk/src/runtime/intrinsics/task.ts @@ -18,6 +18,7 @@ import { ReplayCursor } from "../replay/replayCursor"; import { DefinedTask, EffectAction, + EffectExecutionHints, EffectRecord, EffectSchedulerHints, ProcessLogger, @@ -29,6 +30,8 @@ import { emitRuntimeMetric } from "../instrumentation"; import { createTaskBuildContext } from "../../tasks/context"; import { globalTaskRegistry } from "../../tasks/registry"; import { serializeAndWriteTaskDefinition } from "../../tasks/serializer"; +import { resolveExecutionStrategy } from "../executionStrategy"; +import { resolveActiveHarnessName } from "../capabilityReport"; export interface TaskIntrinsicContext { runId: string; @@ -214,6 +217,7 @@ async function ensureTaskDefinition(runDir: string, record: EffectRecord): Promi function buildEffectAction(record: EffectRecord, taskDef: TaskDef): EffectAction { const schedulerHints = deriveSchedulerHints(taskDef); + const executionHints = deriveExecutionHints(taskDef); return { effectId: record.effectId, invocationKey: record.invocationKey, @@ -227,6 +231,7 @@ function buildEffectAction(record: EffectRecord, taskDef: TaskDef): EffectAction inputsRef: record.inputsRef, requestedAt: record.requestedAt, schedulerHints, + executionHints, }; } @@ -239,6 +244,19 @@ function deriveSchedulerHints(taskDef: TaskDef): EffectSchedulerHints | undefine return Object.keys(hints).length ? hints : undefined; } +function deriveExecutionHints(taskDef: TaskDef): EffectExecutionHints | undefined { + const strategy = resolveExecutionStrategy({ taskDef, harness: resolveActiveHarnessName() }); + return { + requestedMode: strategy.requestedMode, + effectiveMode: strategy.effectiveMode, + reason: strategy.reason, + modelPhase: strategy.modelRoute.phase, + model: strategy.modelRoute.model, + parallelism: strategy.parallelism, + subtaskCount: strategy.subtaskCount, + }; +} + function extractSleepTarget(taskDef: TaskDef): number | undefined { if (typeof taskDef.sleep?.targetEpochMs === "number") { return taskDef.sleep.targetEpochMs; diff --git a/packages/sdk/src/runtime/modelRouting.ts b/packages/sdk/src/runtime/modelRouting.ts new file mode 100644 index 000000000..89f554529 --- /dev/null +++ b/packages/sdk/src/runtime/modelRouting.ts @@ -0,0 +1,72 @@ +import type { TaskDef } from "../tasks/types"; + +export type ModelPhase = "plan" | "interactive" | "execute" | "review" | "fix"; + +export interface ModelRoute { + phase: ModelPhase; + model: string; + source: "default" | "env"; +} + +const DEFAULT_MODEL = "gpt-5.3-codex"; +const DEFAULT_INTERACTIVE_MODEL = "gpt-5.3-codex-spark"; +const VALID_PHASES: ModelPhase[] = ["plan", "interactive", "execute", "review", "fix"]; + +export function resolveModelPhase(taskDef: TaskDef): ModelPhase { + const candidates = [ + taskDef.orchestratorTask?.modelPhase, + getString(taskDef.metadata, "modelPhase"), + getString(taskDef.metadata, "phase"), + ]; + const matched = candidates.find((value): value is ModelPhase => isModelPhase(value)); + return matched ?? "execute"; +} + +export function resolveModelRoute( + phase: ModelPhase, + env: NodeJS.ProcessEnv = process.env +): ModelRoute { + const phaseKey = phase.toUpperCase(); + const phaseOverride = env[`BABYSITTER_MODEL_${phaseKey}`]; + if (phaseOverride) { + return { phase, model: phaseOverride, source: "env" }; + } + if (phase === "interactive") { + const interactiveOverride = env.BABYSITTER_INTERACTIVE_MODEL; + if (interactiveOverride) { + return { phase, model: interactiveOverride, source: "env" }; + } + } + if (env.BABYSITTER_MODEL) { + return { phase, model: env.BABYSITTER_MODEL, source: "env" }; + } + return { + phase, + model: phase === "interactive" ? DEFAULT_INTERACTIVE_MODEL : DEFAULT_MODEL, + source: "default", + }; +} + +export function summarizeModelRoutes(routes: ModelRoute[]): Record | undefined { + const summary = {} as Record; + let added = false; + for (const phase of VALID_PHASES) { + const match = routes.find((route) => route.phase === phase); + if (!match) continue; + summary[phase] = match.model; + added = true; + } + return added ? summary : undefined; +} + +function isModelPhase(value: string | undefined): value is ModelPhase { + return Boolean(value && VALID_PHASES.includes(value as ModelPhase)); +} + +function getString(record: unknown, key: string): string | undefined { + if (!record || typeof record !== "object" || Array.isArray(record)) { + return undefined; + } + const value = (record as Record)[key]; + return typeof value === "string" ? value : undefined; +} diff --git a/packages/sdk/src/runtime/orchestrateIteration.ts b/packages/sdk/src/runtime/orchestrateIteration.ts index e5679ccae..966f46ae0 100644 --- a/packages/sdk/src/runtime/orchestrateIteration.ts +++ b/packages/sdk/src/runtime/orchestrateIteration.ts @@ -23,6 +23,8 @@ import { import { serializeUnknownError } from "./errorUtils"; import { emitRuntimeMetric } from "./instrumentation"; import { callRuntimeHook } from "./hooks/runtime"; +import { detectHarnessCapabilities, resolveActiveHarnessName } from "./capabilityReport"; +import { summarizeModelRoutes } from "./modelRouting"; type ProcessFunction = (inputs: unknown, ctx: ProcessContext, extra?: unknown) => Promise; // Use an indirect dynamic import so TypeScript does not downlevel to require() in CommonJS builds. @@ -107,10 +109,11 @@ export async function orchestrateIteration(options: OrchestrateOptions): Promise const waiting = asWaitingResult(error); if (waiting) { finalStatus = waiting.status; + const nextActions = annotateWaitingActions(waiting.nextActions); return { status: "waiting", - nextActions: annotateWaitingActions(waiting.nextActions), - metadata: createIterationMetadata(engine), + nextActions, + metadata: createIterationMetadataWithActions(engine, nextActions), }; } const failure = serializeUnknownError(error); @@ -315,11 +318,40 @@ function mergeSchedulerHints( } function createIterationMetadata(engine: ReplayEngine): IterationMetadata { + return createIterationMetadataWithActions(engine); +} + +function createIterationMetadataWithActions( + engine: ReplayEngine, + actions: EffectAction[] = [] +): IterationMetadata { + const harnessName = resolveActiveHarnessName(); + const routedModelsByPhase = summarizeModelRoutes( + actions + .filter((action) => action.executionHints) + .map((action) => ({ + phase: action.executionHints!.modelPhase, + model: action.executionHints!.model, + source: "default" as const, + })) + ); + const pendingEffectsByMode = actions.reduce>>( + (acc, action) => { + const mode = action.executionHints?.effectiveMode; + if (!mode) return acc; + acc[mode] = (acc[mode] ?? 0) + 1; + return acc; + }, + {} + ); return { stateVersion: engine.stateCache?.stateVersion, pendingEffectsByKind: engine.stateCache?.pendingEffectsByKind, journalHead: engine.stateCache?.journalHead ?? null, stateRebuilt: Boolean(engine.stateRebuild), stateRebuildReason: engine.stateRebuild?.reason ?? null, + harnessCapabilities: detectHarnessCapabilities(harnessName), + routedModelsByPhase, + pendingEffectsByMode: Object.keys(pendingEffectsByMode).length ? pendingEffectsByMode : undefined, }; } diff --git a/packages/sdk/src/runtime/strategyDispatch.ts b/packages/sdk/src/runtime/strategyDispatch.ts new file mode 100644 index 000000000..420c8104a --- /dev/null +++ b/packages/sdk/src/runtime/strategyDispatch.ts @@ -0,0 +1,44 @@ +import type { JsonRecord } from "../storage/types"; +import type { TaskDef } from "../tasks/types"; +import { resolveExecutionStrategy } from "./executionStrategy"; + +export interface DispatchEnvelope { + mode: "local" | "subagent" | "cloud"; + promptTemplate: "standard" | "subagent"; + parallelism?: number; + subtasks?: JsonRecord[]; + fallbackReason?: string; +} + +export function createDispatchEnvelope(options: { + taskDef: TaskDef; + harness?: string; + env?: NodeJS.ProcessEnv; +}): DispatchEnvelope { + const strategy = resolveExecutionStrategy(options); + const subtasks = normalizeSubtasks(options.taskDef.orchestratorTask?.subtasks); + + if (strategy.effectiveMode === "subagent") { + return { + mode: "subagent", + promptTemplate: "subagent", + parallelism: strategy.parallelism, + subtasks, + }; + } + + return { + mode: strategy.effectiveMode, + promptTemplate: "standard", + parallelism: strategy.parallelism, + fallbackReason: + strategy.requestedMode !== strategy.effectiveMode ? strategy.reason : undefined, + }; +} + +function normalizeSubtasks(subtasks: JsonRecord[] | undefined): JsonRecord[] | undefined { + if (!Array.isArray(subtasks) || subtasks.length === 0) { + return undefined; + } + return subtasks.map((subtask) => ({ ...subtask })); +} diff --git a/packages/sdk/src/runtime/types.ts b/packages/sdk/src/runtime/types.ts index cebd6677e..6a279c8a1 100644 --- a/packages/sdk/src/runtime/types.ts +++ b/packages/sdk/src/runtime/types.ts @@ -1,9 +1,15 @@ import type { JsonRecord, RunMetadata } from "../storage/types"; import type { DefinedTask, TaskDef, TaskInvokeOptions } from "../tasks/types"; import type { StateCacheJournalHead } from "./replay/stateCache"; +import type { HarnessCapabilityReport } from "./capabilityReport"; +import type { ExecutionMode } from "./executionStrategy"; +import type { ModelPhase } from "./modelRouting"; export type { DefinedTask, TaskBuildContext, TaskDef, TaskInvokeOptions } from "../tasks/types"; export type { StateCacheJournalHead } from "./replay/stateCache"; +export type { HarnessCapabilityReport } from "./capabilityReport"; +export type { ExecutionMode } from "./executionStrategy"; +export type { ModelPhase } from "./modelRouting"; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type ProcessLogger = (...args: any[]) => void; @@ -43,6 +49,16 @@ export interface EffectSchedulerHints { sleepUntilEpochMs?: number; } +export interface EffectExecutionHints { + requestedMode: ExecutionMode; + effectiveMode: ExecutionMode; + reason: string; + modelPhase: ModelPhase; + model: string; + parallelism?: number; + subtaskCount?: number; +} + export interface EffectAction { effectId: string; invocationKey: string; @@ -56,6 +72,7 @@ export interface EffectAction { inputsRef?: string; requestedAt?: string; schedulerHints?: EffectSchedulerHints; + executionHints?: EffectExecutionHints; } export interface CreateRunOptions { @@ -127,6 +144,9 @@ export interface IterationMetadata { stateRebuildReason?: string | null; pendingEffectsByKind?: Record; journalHead?: StateCacheJournalHead | null; + harnessCapabilities?: HarnessCapabilityReport; + routedModelsByPhase?: Partial>; + pendingEffectsByMode?: Partial>; } export type IterationResult = diff --git a/packages/sdk/src/tasks/__tests__/kinds.test.ts b/packages/sdk/src/tasks/__tests__/kinds.test.ts index 3edddee53..a285cf01f 100644 --- a/packages/sdk/src/tasks/__tests__/kinds.test.ts +++ b/packages/sdk/src/tasks/__tests__/kinds.test.ts @@ -101,6 +101,32 @@ describe("task kind helpers", () => { orchestratorTask: true, }); }); + + it("includes execution strategy fields for subagent fan-out", async () => { + const helper = orchestratorTask("fixtures.orchestrator.subagent", { + payload: () => orchestratorKindFixtures.payload, + executionMode: () => "subagent", + modelPhase: () => "interactive", + parallelism: () => 3, + subtasks: () => [{ title: "plan" }, { title: "review" }], + }); + const ctx = createTestBuildContext(); + const def = await helper.build(orchestratorKindFixtures.payload, ctx); + + expect(def.orchestratorTask).toMatchObject({ + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 3, + subtasks: [{ title: "plan" }, { title: "review" }], + }); + expect(def.metadata).toMatchObject({ + orchestratorTask: true, + executionMode: "subagent", + modelPhase: "interactive", + parallelism: 3, + subtaskCount: 2, + }); + }); }); describe("sleepTask", () => { diff --git a/packages/sdk/src/tasks/kinds/index.ts b/packages/sdk/src/tasks/kinds/index.ts index 2602ac104..aff100e01 100644 --- a/packages/sdk/src/tasks/kinds/index.ts +++ b/packages/sdk/src/tasks/kinds/index.ts @@ -123,19 +123,42 @@ export function orchestratorTask( return defineTask( id, async (args, ctx) => { - const [title, description, helperLabels, metadata, payload, resumeCommand] = await Promise.all([ + const [title, description, helperLabels, metadata, payload, resumeCommand, executionMode, modelPhase, parallelism, subtasks] = await Promise.all([ resolveOptionalValue(options.title, args, ctx), resolveOptionalValue(options.description, args, ctx), resolveLabelList(options.labels, args, ctx), resolveMetadata(options.metadata, args, ctx), resolvePayload(options.payload, args, ctx), resolveOptionalValue(options.resumeCommand, args, ctx), + resolveOptionalValue(options.executionMode, args, ctx), + resolveOptionalValue(options.modelPhase, args, ctx), + resolveNumber(options.parallelism, args, ctx), + resolveJsonRecordArray(options.subtasks, args, ctx), ]); const labels = mergeLabels(ctx, helperLabels, DEFAULT_ORCHESTRATOR_LABEL); - const orchestrator = buildOrchestratorOptions(payload ?? pickJsonRecord(args), resumeCommand); + const orchestrator = buildOrchestratorOptions( + payload ?? pickJsonRecord(args), + resumeCommand, + executionMode, + modelPhase, + parallelism, + subtasks + ); const mergedMetadata = ensureMetadata(metadata); mergedMetadata.orchestratorTask = true; + if (executionMode) { + mergedMetadata.executionMode = executionMode; + } + if (modelPhase) { + mergedMetadata.modelPhase = modelPhase; + } + if (typeof parallelism === "number") { + mergedMetadata.parallelism = parallelism; + } + if (subtasks?.length) { + mergedMetadata.subtaskCount = subtasks.length; + } const resolvedTitle = title ?? ctx.label ?? labels?.[0] ?? DEFAULT_ORCHESTRATOR_LABEL; return { @@ -309,6 +332,21 @@ async function resolvePayload( return { ...value }; } +async function resolveJsonRecordArray( + source: TaskValueOrFactory | undefined, + args: TArgs, + ctx: TaskBuildContext +): Promise { + const value = await resolveOptionalValue(source, args, ctx); + if (!Array.isArray(value)) { + return undefined; + } + const records = value + .filter((item): item is JsonRecord => isJsonRecord(item)) + .map((item) => ({ ...item })); + return records.length ? records : undefined; +} + function isFactory(value: TaskValueOrFactory): value is TaskValueFactory { return typeof value === "function"; } @@ -448,7 +486,14 @@ function buildBreakpointOptions(payload: unknown, confirmationRequired?: boolean return breakpoint; } -function buildOrchestratorOptions(payload?: JsonRecord, resumeCommand?: string): OrchestratorTaskOptions { +function buildOrchestratorOptions( + payload?: JsonRecord, + resumeCommand?: string, + executionMode?: OrchestratorTaskOptions["executionMode"], + modelPhase?: OrchestratorTaskOptions["modelPhase"], + parallelism?: number, + subtasks?: JsonRecord[] +): OrchestratorTaskOptions { const orchestrator: OrchestratorTaskOptions = {}; if (payload) { orchestrator.payload = payload; @@ -456,6 +501,18 @@ function buildOrchestratorOptions(payload?: JsonRecord, resumeCommand?: string): if (resumeCommand) { orchestrator.resumeCommand = resumeCommand; } + if (executionMode) { + orchestrator.executionMode = executionMode; + } + if (modelPhase) { + orchestrator.modelPhase = modelPhase; + } + if (typeof parallelism === "number") { + orchestrator.parallelism = parallelism; + } + if (subtasks?.length) { + orchestrator.subtasks = subtasks; + } return orchestrator; } diff --git a/packages/sdk/src/tasks/types.ts b/packages/sdk/src/tasks/types.ts index 06297acbc..760d2ebd6 100644 --- a/packages/sdk/src/tasks/types.ts +++ b/packages/sdk/src/tasks/types.ts @@ -29,6 +29,10 @@ export interface BreakpointTaskOptions { export interface OrchestratorTaskOptions { payload?: JsonRecord; resumeCommand?: string; + executionMode?: "local" | "subagent" | "cloud"; + modelPhase?: "plan" | "interactive" | "execute" | "review" | "fix"; + parallelism?: number; + subtasks?: JsonRecord[]; } export interface SleepTaskOptions { @@ -159,6 +163,10 @@ export interface OrchestratorTaskDefinitionOptions { metadata?: TaskValueOrFactory; payload?: TaskValueOrFactory; resumeCommand?: TaskValueOrFactory; + executionMode?: TaskValueOrFactory; + modelPhase?: TaskValueOrFactory; + parallelism?: TaskValueOrFactory; + subtasks?: TaskValueOrFactory; } export interface SleepTaskBuilderArgs { diff --git a/plugins/babysitter-codex/README.md b/plugins/babysitter-codex/README.md index ca28edc4d..aa4bbc3ab 100644 --- a/plugins/babysitter-codex/README.md +++ b/plugins/babysitter-codex/README.md @@ -12,6 +12,7 @@ This project was created by Babysitter already running on Codex. - Yolo mode for non-interactive auto-approval - Compatibility mode for SDK builds that expose only core run/task commands - Deterministic per-run trace log at `/run-trace.jsonl` +- Shared SDK runtime routing for model policy, capability detection, and execution strategy metadata ## Version Control Documentation