Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ Codex support is available as a dedicated plugin bundle in:

It includes Codex hook wiring, slash command dispatch, and orchestration harness scripts compatible with the Babysitter SDK.

Shared Codex runtime policy now lives in `packages/sdk`, including:
- harness capability reporting
- shared model routing defaults for plan/interactive/execute/review/fix phases
- execution strategy resolution for local, subagent, and future cloud modes
- local subagent fan-out metadata for `orchestrator_task`

---

## Quick Start
Expand Down
29 changes: 29 additions & 0 deletions packages/sdk/src/runtime/__tests__/intrinsics.behaviors.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,33 @@ describe("orchestrator task intrinsic", () => {
return true;
});
});

test("captures subagent routing hints and subtask metadata", async () => {
const { runDir, runId } = await createTestRun(tmpRoot);
const context = await buildTaskContext(runDir, runId);
const subtasks = [{ title: "plan" }, { title: "execute" }];
await expect(
runOrchestratorTaskIntrinsic(
{ op: "fanout" },
context,
{ executionMode: "subagent", modelPhase: "interactive", parallelism: 2, subtasks }
)
).rejects.toSatisfy((error) => {
expect(error).toBeInstanceOf(EffectRequestedError);
const action = (error as EffectRequestedError).action;
expect(action.taskDef.orchestratorTask).toMatchObject({
executionMode: "subagent",
modelPhase: "interactive",
parallelism: 2,
subtasks,
});
expect(action.taskDef.metadata).toMatchObject({
executionMode: "subagent",
modelPhase: "interactive",
parallelism: 2,
subtaskCount: 2,
});
return true;
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,25 @@ function writeProcessFile(dir: string, filename: string) {
const echoTask = {
id: "echo-task",
async build(args) {
return { kind: "node", title: "echo", metadata: args };
return {
kind: "orchestrator_task",
title: "echo",
metadata: {
value: args.value,
orchestratorTask: true,
executionMode: "subagent",
modelPhase: "interactive",
parallelism: 2,
subtaskCount: 2,
},
orchestratorTask: {
payload: { value: args.value },
executionMode: "subagent",
modelPhase: "interactive",
parallelism: 2,
subtasks: [{ title: "plan" }, { title: "execute" }],
},
};
}
};

Expand Down Expand Up @@ -58,7 +76,21 @@ describe("orchestrateIteration integration", () => {
}

const action = firstIteration.nextActions[0];
expect(action.kind).toBe("node");
expect(action.kind).toBe("orchestrator_task");
expect(action.executionHints).toMatchObject({
requestedMode: "subagent",
effectiveMode: "subagent",
modelPhase: "interactive",
model: "gpt-5.3-codex-spark",
parallelism: 2,
subtaskCount: 2,
});
expect(firstIteration.metadata?.routedModelsByPhase).toEqual({
interactive: "gpt-5.3-codex-spark",
});
expect(firstIteration.metadata?.pendingEffectsByMode).toEqual({
subagent: 1,
});

await commitEffectResult({
runDir,
Expand Down Expand Up @@ -105,6 +137,7 @@ describe("orchestrateIteration integration", () => {
if (waitingResult.status !== "waiting") {
throw new Error("Expected waiting status");
}
expect(waitingResult.metadata?.pendingEffectsByMode).toEqual({ subagent: 1 });

await commitEffectResult({
runDir,
Expand Down
55 changes: 55 additions & 0 deletions packages/sdk/src/runtime/__tests__/strategyRuntime.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { describe, expect, it } from "vitest";
import { detectHarnessCapabilities, resolveExecutionStrategy, resolveModelRoute } from "..";
import { createDispatchEnvelope } from "../strategyDispatch";
import type { TaskDef } from "../types";

describe("shared runtime strategy helpers", () => {
it("reports codex capabilities", () => {
expect(detectHarnessCapabilities("codex")).toMatchObject({
requirementList: true,
explicitSkillInvocation: true,
sessionThreads: true,
symlinkSkillDiscovery: true,
approvalFlow: true,
subagentFanOut: true,
});
});

it("routes interactive work to spark by default", () => {
expect(resolveModelRoute("interactive")).toEqual({
phase: "interactive",
model: "gpt-5.3-codex-spark",
source: "default",
});
});

it("keeps cloud requests local in PR 1", () => {
const taskDef: TaskDef = {
kind: "orchestrator_task",
orchestratorTask: {
executionMode: "cloud",
},
};
const strategy = resolveExecutionStrategy({ taskDef, harness: "codex" });
expect(strategy.requestedMode).toBe("cloud");
expect(strategy.effectiveMode).toBe("local");
expect(strategy.reason).toBe("cloud-execution-not-enabled-in-pr1");
});

it("builds a subagent dispatch envelope when subtasks are available", () => {
const taskDef: TaskDef = {
kind: "orchestrator_task",
orchestratorTask: {
executionMode: "subagent",
parallelism: 2,
subtasks: [{ title: "plan" }, { title: "review" }],
},
};
expect(createDispatchEnvelope({ taskDef, harness: "codex" })).toEqual({
mode: "subagent",
promptTemplate: "subagent",
parallelism: 2,
subtasks: [{ title: "plan" }, { title: "review" }],
});
});
});
50 changes: 50 additions & 0 deletions packages/sdk/src/runtime/capabilityReport.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import type { HarnessAdapter } from "../harness/types";

export interface HarnessCapabilityReport {
requirementList: boolean;
explicitSkillInvocation: boolean;
sessionThreads: boolean;
symlinkSkillDiscovery: boolean;
approvalFlow: boolean;
subagentFanOut: boolean;
}

const DEFAULT_CAPABILITIES: HarnessCapabilityReport = {
requirementList: false,
explicitSkillInvocation: false,
sessionThreads: false,
symlinkSkillDiscovery: false,
approvalFlow: false,
subagentFanOut: false,
};

const CODEX_CAPABILITIES: HarnessCapabilityReport = {
requirementList: true,
explicitSkillInvocation: true,
sessionThreads: true,
symlinkSkillDiscovery: true,
approvalFlow: true,
subagentFanOut: true,
};

const HARNESS_CAPABILITIES: Record<string, HarnessCapabilityReport> = {
codex: CODEX_CAPABILITIES,
};

export function resolveActiveHarnessName(env: NodeJS.ProcessEnv = process.env): string | undefined {
if (env.CODEX_THREAD_ID || env.CODEX_SESSION_ID || env.CODEX_PLUGIN_ROOT) {
return "codex";
}
return undefined;
}

export function detectHarnessCapabilities(
input?: string | Pick<HarnessAdapter, "name">
): HarnessCapabilityReport {
const name = typeof input === "string" ? input : input?.name;
if (!name) {
return { ...DEFAULT_CAPABILITIES };
}
const matched = HARNESS_CAPABILITIES[name];
return matched ? { ...matched } : { ...DEFAULT_CAPABILITIES };
}
115 changes: 115 additions & 0 deletions packages/sdk/src/runtime/executionStrategy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import type { TaskDef } from "../tasks/types";
import { detectHarnessCapabilities, type HarnessCapabilityReport } from "./capabilityReport";
import { resolveModelPhase, resolveModelRoute, type ModelRoute } from "./modelRouting";

export type ExecutionMode = "local" | "subagent" | "cloud";

export interface ExecutionStrategy {
requestedMode: ExecutionMode;
effectiveMode: ExecutionMode;
reason: string;
parallelism?: number;
subtaskCount?: number;
modelRoute: ModelRoute;
capabilities: HarnessCapabilityReport;
}

export function resolveExecutionStrategy(options: {
taskDef: TaskDef;
harness?: string;
capabilities?: HarnessCapabilityReport;
env?: NodeJS.ProcessEnv;
}): ExecutionStrategy {
const capabilities = options.capabilities ?? detectHarnessCapabilities(options.harness);
const requestedMode = resolveRequestedMode(options.taskDef);
const subtasks = options.taskDef.orchestratorTask?.subtasks ?? [];
const parallelism = resolveParallelism(options.taskDef);
const phase = resolveModelPhase(options.taskDef);
const modelRoute = resolveModelRoute(phase, options.env);

if (requestedMode === "cloud") {
return {
requestedMode,
effectiveMode: "local",
reason: "cloud-execution-not-enabled-in-pr1",
parallelism,
subtaskCount: subtasks.length || undefined,
modelRoute,
capabilities,
};
}

if (requestedMode === "subagent") {
if (subtasks.length > 0 && capabilities.subagentFanOut) {
return {
requestedMode,
effectiveMode: "subagent",
reason: "subtasks-available-for-local-fanout",
parallelism,
subtaskCount: subtasks.length,
modelRoute,
capabilities,
};
}
return {
requestedMode,
effectiveMode: "local",
reason: subtasks.length === 0 ? "subagent-mode-requested-without-subtasks" : "subagent-capability-unavailable",
parallelism,
subtaskCount: subtasks.length || undefined,
modelRoute,
capabilities,
};
}

return {
requestedMode,
effectiveMode: "local",
reason: "default-local-execution",
parallelism,
subtaskCount: subtasks.length || undefined,
modelRoute,
capabilities,
};
}

function resolveRequestedMode(taskDef: TaskDef): ExecutionMode {
const candidates = [
taskDef.orchestratorTask?.executionMode,
getString(taskDef.metadata, "executionMode"),
];
const matched = candidates.find((value): value is ExecutionMode => isExecutionMode(value));
return matched ?? "local";
}

function resolveParallelism(taskDef: TaskDef): number | undefined {
const direct = taskDef.orchestratorTask?.parallelism;
if (typeof direct === "number" && Number.isFinite(direct) && direct > 0) {
return Math.floor(direct);
}
const metadataValue = getNumber(taskDef.metadata, "parallelism");
if (typeof metadataValue === "number" && Number.isFinite(metadataValue) && metadataValue > 0) {
return Math.floor(metadataValue);
}
return undefined;
}

function isExecutionMode(value: string | undefined): value is ExecutionMode {
return value === "local" || value === "subagent" || value === "cloud";
}

function getString(record: unknown, key: string): string | undefined {
if (!record || typeof record !== "object" || Array.isArray(record)) {
return undefined;
}
const value = (record as Record<string, unknown>)[key];
return typeof value === "string" ? value : undefined;
}

function getNumber(record: unknown, key: string): number | undefined {
if (!record || typeof record !== "object" || Array.isArray(record)) {
return undefined;
}
const value = (record as Record<string, unknown>)[key];
return typeof value === "number" ? value : undefined;
}
4 changes: 4 additions & 0 deletions packages/sdk/src/runtime/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,7 @@ export type {
export { hashInvocationKey } from "./invocation";
export { EffectRequestedError, EffectPendingError, ParallelPendingError, RunFailedError } from "./exceptions";
export { replaySchemaVersion } from "./constants";
export { detectHarnessCapabilities } from "./capabilityReport";
export { resolveModelPhase, resolveModelRoute, summarizeModelRoutes } from "./modelRouting";
export { resolveExecutionStrategy } from "./executionStrategy";
export { createDispatchEnvelope } from "./strategyDispatch";
33 changes: 31 additions & 2 deletions packages/sdk/src/runtime/intrinsics/orchestratorTask.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import { runTaskIntrinsic, TaskIntrinsicContext } from "./task";
interface OrchestratorTaskArgs<T = unknown> {
payload: T;
label: string;
executionMode?: "local" | "subagent" | "cloud";
modelPhase?: "plan" | "interactive" | "execute" | "review" | "fix";
parallelism?: number;
subtasks?: Record<string, unknown>[];
}

const ORCHESTRATOR_TASK_ID = "__sdk.orchestratorTask";
Expand All @@ -17,6 +21,19 @@ const orchestratorTask: DefinedTask<OrchestratorTaskArgs, unknown> = {
metadata: {
payload: args?.payload,
orchestratorTask: true,
executionMode: args.executionMode,
modelPhase: args.modelPhase,
parallelism: args.parallelism,
subtaskCount: args.subtasks?.length,
},
orchestratorTask: {
payload: typeof args?.payload === "object" && args.payload !== null && !Array.isArray(args.payload)
? (args.payload as Record<string, unknown>)
: undefined,
executionMode: args.executionMode,
modelPhase: args.modelPhase,
parallelism: args.parallelism,
subtasks: args.subtasks,
},
};
},
Expand All @@ -25,13 +42,25 @@ const orchestratorTask: DefinedTask<OrchestratorTaskArgs, unknown> = {
export function runOrchestratorTaskIntrinsic<TPayload, TResult>(
payload: TPayload,
context: TaskIntrinsicContext,
options?: TaskInvokeOptions
options?: TaskInvokeOptions & {
executionMode?: "local" | "subagent" | "cloud";
modelPhase?: "plan" | "interactive" | "execute" | "review" | "fix";
parallelism?: number;
subtasks?: Record<string, unknown>[];
}
): Promise<TResult> {
const label = options?.label ?? "orchestrator-task";
const invokeOptions = { ...options, label };
return runTaskIntrinsic({
task: orchestratorTask as DefinedTask<OrchestratorTaskArgs<TPayload>, TResult>,
args: { payload, label },
args: {
payload,
label,
executionMode: options?.executionMode,
modelPhase: options?.modelPhase,
parallelism: options?.parallelism,
subtasks: options?.subtasks,
},
invokeOptions,
context,
});
Expand Down
Loading