Skip to content

Commit f117fa0

Browse files
dcramercodex
andcommitted
fix(scheduler): Dispatch scheduled tasks through agent runtime
Scheduled task heartbeats now dispatch the stored task text through the public agent dispatch API with explicit source, destination, and metadata instead of compiling a scheduler-specific prompt. Core dispatch handling renders generic dispatch context and relies on normal final-answer delivery for API dispatches. Add focused scheduler eval coverage for schedule creation and due task execution, plus an ast-grep guard against opaque dispatch context blobs. Fixes GH-611 Co-Authored-By: GPT-5 Codex <noreply@openai.com>
1 parent 4670388 commit f117fa0

26 files changed

Lines changed: 724 additions & 255 deletions
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
id: no-plugin-dispatch-context
2+
language: TypeScript
3+
severity: error
4+
message: Plugin agent dispatch must use typed source/destination/metadata, not an opaque context prompt blob.
5+
files:
6+
- packages/**/*.ts
7+
- packages/**/*.tsx
8+
rule:
9+
pattern: "$CTX.agent.dispatch({ $$$BEFORE, context: $VALUE, $$$AFTER })"

packages/junior-evals/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ These knobs work by overriding services on the eval-local runtime instance. They
7878

7979
Tool replay:
8080

81-
- `webFetch` and `webSearch` are wrapped with `vitest-evals/replay` in the eval harness. `pnpm evals` uses `auto` replay mode; use `pnpm evals:record` to force fresh recordings under `.vitest-evals/recordings`.
81+
- `webFetch` and `webSearch` are wrapped with `vitest-evals/replay` in the eval harness. Use `pnpm evals:record` to force fresh recordings under `.vitest-evals/recordings`.
8282
- Keep committed recordings minimal and source-specific. Regenerate them from the evals that need replay, then review for stale exploratory fetches and secret-like values before committing.
8383

8484
## Running

packages/junior-evals/evals/behavior-harness.ts

Lines changed: 145 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,16 @@ import {
4545
import { generateAssistantReply } from "@/chat/respond";
4646
import type { JuniorDatabase } from "@/chat/sql/db";
4747
import { juniorSqlSchema } from "@/chat/sql/schema";
48-
import { schedulerPlugin } from "@sentry/junior-scheduler";
48+
import {
49+
createSchedulerSqlStore,
50+
schedulerPlugin,
51+
type ScheduledTask,
52+
} from "@sentry/junior-scheduler";
53+
import { runPluginHeartbeats } from "@/chat/agent-dispatch/heartbeat";
54+
import { runAgentDispatchSlice } from "@/chat/agent-dispatch/runner";
55+
import { verifyDispatchCallbackRequest } from "@/chat/agent-dispatch/signing";
56+
import { getDispatchRecord } from "@/chat/agent-dispatch/store";
57+
import type { DispatchCallback } from "@/chat/agent-dispatch/types";
4958
import { getStateAdapter } from "@/chat/state/adapter";
5059
import { resetSkillDiscoveryCache } from "@/chat/skills";
5160
import { createWebFetchTool } from "@/chat/tools/web/fetch-tool";
@@ -130,11 +139,22 @@ interface AssistantContextChangedEvent extends EvalBaseEvent {
130139
user_id?: string;
131140
}
132141

142+
interface ScheduledTaskDueEvent extends EvalBaseEvent {
143+
type: "scheduled_task_due";
144+
now_ms?: number;
145+
recurrence?: "daily" | "weekly" | "monthly" | "yearly";
146+
schedule?: string;
147+
schedule_kind?: "one_off" | "recurring";
148+
task_text: string;
149+
timezone?: string;
150+
}
151+
133152
export type EvalEvent =
134153
| MentionEvent
135154
| SubscribedMessageEvent
136155
| AssistantThreadStartedEvent
137-
| AssistantContextChangedEvent;
156+
| AssistantContextChangedEvent
157+
| ScheduledTaskDueEvent;
138158

139159
interface SubscribedDecisionFixture {
140160
reason: string;
@@ -1276,7 +1296,7 @@ async function setupHarnessEnvironment(
12761296
scenario.events.flatMap((event) =>
12771297
"message" in event
12781298
? [event.message.author?.user_id?.trim() || "U-test"]
1279-
: event.user_id
1299+
: "user_id" in event && event.user_id
12801300
? [event.user_id]
12811301
: [],
12821302
),
@@ -1569,12 +1589,21 @@ function buildRuntimeServices(
15691589
async function processEvents(args: {
15701590
scenario: EvalScenario;
15711591
env: HarnessEnvironment;
1592+
generateAssistantReply: typeof generateAssistantReply;
15721593
slackRuntime: ReturnType<typeof createSlackRuntime>;
15731594
getThreadRecord: (fixture: EvalEventThreadFixture) => EvalThreadRecord;
15741595
readyQueueDeliveries: QueueDelivery[];
1596+
schedulerDb: PluginDb;
15751597
}): Promise<void> {
1576-
const { scenario, env, slackRuntime, getThreadRecord, readyQueueDeliveries } =
1577-
args;
1598+
const {
1599+
scenario,
1600+
env,
1601+
generateAssistantReply,
1602+
slackRuntime,
1603+
getThreadRecord,
1604+
readyQueueDeliveries,
1605+
schedulerDb,
1606+
} = args;
15781607

15791608
const consumedOauthStates = new Set<string>();
15801609
const consumedMcpAuthSessions = new Set<string>();
@@ -1650,9 +1679,113 @@ async function processEvents(args: {
16501679
await slackRuntime.handleAssistantContextChanged(lifecycleEvent);
16511680
};
16521681

1682+
const runScheduledTaskDue = async (
1683+
event: ScheduledTaskDueEvent,
1684+
): Promise<void> => {
1685+
const { thread } = getThreadRecord(event.thread);
1686+
const nowMs = event.now_ms ?? Date.parse("2026-05-26T12:00:00.000Z");
1687+
const scheduleKind = event.schedule_kind ?? "one_off";
1688+
const taskId = `eval_schedule_${thread.channelId}_${nowMs}`;
1689+
const task: ScheduledTask = {
1690+
id: taskId,
1691+
createdAtMs: nowMs - 60_000,
1692+
createdBy: { slackUserId: "U-test", userName: "testuser" },
1693+
destination: createEvalDestination(
1694+
thread,
1695+
) as ScheduledTask["destination"],
1696+
nextRunAtMs: nowMs,
1697+
schedule: {
1698+
description:
1699+
event.schedule ??
1700+
(scheduleKind === "recurring" ? "Weekly at noon" : "Once now"),
1701+
kind: scheduleKind,
1702+
timezone: event.timezone ?? "UTC",
1703+
...(scheduleKind === "recurring"
1704+
? {
1705+
recurrence: {
1706+
frequency: event.recurrence ?? "weekly",
1707+
interval: 1,
1708+
startDate: new Date(nowMs).toISOString().slice(0, 10),
1709+
time: { hour: 12, minute: 0 },
1710+
},
1711+
}
1712+
: {}),
1713+
},
1714+
status: "active",
1715+
task: { text: event.task_text },
1716+
updatedAtMs: nowMs - 60_000,
1717+
};
1718+
const schedulerStore = createSchedulerSqlStore(schedulerDb);
1719+
await schedulerStore.saveTask(task);
1720+
1721+
const callbacks: DispatchCallback[] = [];
1722+
const expectedCallbackUrl = new URL(
1723+
"/api/internal/agent-dispatch",
1724+
process.env.JUNIOR_BASE_URL,
1725+
).href;
1726+
const originalFetch = globalThis.fetch;
1727+
globalThis.fetch = (async (input, init) => {
1728+
const url =
1729+
typeof input === "string"
1730+
? input
1731+
: input instanceof URL
1732+
? input.href
1733+
: input.url;
1734+
if (new URL(url).href === expectedCallbackUrl) {
1735+
const callback = await verifyDispatchCallbackRequest(
1736+
new Request(input, init),
1737+
);
1738+
if (!callback) {
1739+
return new Response("Unauthorized", { status: 401 });
1740+
}
1741+
callbacks.push(callback);
1742+
return new Response("Accepted", { status: 202 });
1743+
}
1744+
return await originalFetch(input, init);
1745+
}) as typeof fetch;
1746+
try {
1747+
await runPluginHeartbeats({ nowMs });
1748+
} finally {
1749+
globalThis.fetch = originalFetch;
1750+
}
1751+
if (callbacks.length === 0) {
1752+
throw new Error(
1753+
"Scheduled eval task did not enqueue a dispatch callback.",
1754+
);
1755+
}
1756+
1757+
const dispatchedRuns = (await schedulerStore.listIncompleteRuns()).filter(
1758+
(run) => run.taskId === taskId && run.dispatchId,
1759+
);
1760+
if (dispatchedRuns.length === 0) {
1761+
const runs = (await schedulerStore.listIncompleteRuns()).filter(
1762+
(run) => run.taskId === taskId,
1763+
);
1764+
const savedTask = await schedulerStore.getTask(taskId);
1765+
throw new Error(
1766+
`Scheduled eval task did not create a dispatch: ${JSON.stringify({ runs, savedTask })}`,
1767+
);
1768+
}
1769+
for (const run of dispatchedRuns) {
1770+
const dispatch = await getDispatchRecord(run.dispatchId!);
1771+
if (!dispatch) {
1772+
throw new Error("Scheduled eval dispatch record was not found.");
1773+
}
1774+
const callback = callbacks.find(
1775+
(candidate) => candidate.id === dispatch.id,
1776+
);
1777+
if (!callback) {
1778+
throw new Error("Scheduled eval dispatch callback was not captured.");
1779+
}
1780+
await runAgentDispatchSlice(callback, { generateAssistantReply });
1781+
}
1782+
};
1783+
16531784
for (const event of scenario.events) {
16541785
if (event.type === "new_mention" || event.type === "subscribed_message") {
16551786
enqueueEvent(event);
1787+
} else if (event.type === "scheduled_task_due") {
1788+
await runScheduledTaskDue(event);
16561789
} else {
16571790
await runLifecycleEvent(event);
16581791
}
@@ -1803,6 +1936,11 @@ export async function runEvalScenario(
18031936
threadRecordsById,
18041937
observations,
18051938
);
1939+
const generateEvalAssistantReply =
1940+
services.replyExecutor?.generateAssistantReply;
1941+
if (!generateEvalAssistantReply) {
1942+
throw new Error("Eval reply executor was not configured.");
1943+
}
18061944

18071945
const slackRuntime = createSlackRuntime({
18081946
getSlackAdapter: () => slackAdapter as any,
@@ -1812,9 +1950,11 @@ export async function runEvalScenario(
18121950
await processEvents({
18131951
scenario,
18141952
env,
1953+
generateAssistantReply: generateEvalAssistantReply,
18151954
slackRuntime,
18161955
getThreadRecord,
18171956
readyQueueDeliveries,
1957+
schedulerDb: schedulerSql.db,
18181958
});
18191959

18201960
return collectResults(

packages/junior-evals/evals/core/scheduler.eval.ts

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,87 @@
11
import { describeEval } from "vitest-evals";
2-
import { mention, rubric, slackEvals } from "../helpers";
2+
import { mention, rubric, scheduledTaskDue, slackEvals } from "../helpers";
33

44
describeEval("Scheduler", slackEvals, (it) => {
5-
it("when asked for a simple one-off reminder, create it without asking for confirmation", async ({
5+
it("when asked for a specific one-off reminder, preserve the future work in the schedule", async ({
66
run,
77
}) => {
88
await run({
9-
events: [mention("@bot remind me in 1 minute to wash my hands")],
9+
events: [
10+
mention(
11+
"@bot remind me in 2 minutes to tell the channel standup moved",
12+
),
13+
],
1014
criteria: rubric({
1115
contract:
12-
"A simple one-off reminder request is scheduled immediately for the active Slack context.",
16+
"A one-off reminder request is scheduled with the future reminder work preserved as the task.",
1317
pass: [
14-
"The reply confirms that a one-off reminder to wash hands was scheduled.",
15-
"The schedule creation omits recurrence.",
16-
"The reply does not ask the user to confirm first.",
18+
"The observed slackScheduleCreateTask tool call has schedule_kind=one_off.",
19+
"The observed slackScheduleCreateTask tool call omits recurrence.",
20+
"The observed slackScheduleCreateTask task is the reminder work to perform later, not instructions for how to create or manage a schedule.",
1721
],
1822
fail: [
19-
"Do not ask the user to confirm the reminder before creating it.",
20-
"Do not ask the user to provide a channel ID.",
21-
"Do not describe the reminder as a recurring schedule.",
23+
"Do not store task text that tells Junior to schedule a reminder later.",
24+
"Do not ask the user to confirm before creating this clear reminder.",
2225
],
2326
}),
2427
});
2528
});
2629

27-
it("when asked for a terse one-off reminder, create it without recurrence", async ({
30+
it("when a one-off reminder becomes due, deliver the reminder outcome", async ({
2831
run,
2932
}) => {
3033
await run({
31-
events: [mention("@bot remind me to drink water in 1m")],
34+
events: [
35+
scheduledTaskDue("Post this reminder: Standup moved to 10:30 today.", {
36+
schedule: "Once at noon UTC",
37+
schedule_kind: "one_off",
38+
}),
39+
],
3240
criteria: rubric({
3341
contract:
34-
"A terse one-off reminder request is scheduled immediately for the active Slack context.",
42+
"A due one-off scheduled task is executed now and posts the requested reminder outcome to the destination channel.",
3543
pass: [
36-
"The reply confirms that a one-off reminder to drink water was scheduled.",
37-
"The schedule creation omits recurrence.",
38-
"The reply does not ask the user to retry with a different one-time format.",
44+
"The channel_posts output contains a Slack channel message saying standup moved to 10:30 today.",
45+
"The delivered message is the reminder content itself, not a schedule creation confirmation.",
46+
"The delivered message does not ask for clarification or confirmation.",
3947
],
4048
fail: [
41-
"Do not reject the request as an invalid one-off task format.",
42-
"Do not ask the user to confirm the reminder before creating it.",
43-
"Do not describe the reminder as a recurring schedule.",
49+
"Do not use webSearch, webFetch, bash, callMcpTool, sandbox, or Slack history tools for this reminder-only task.",
50+
"Do not say that a reminder was scheduled or will be scheduled.",
51+
"Do not omit the 10:30 standup update.",
52+
"Do not ask the user what to do with the reminder.",
4453
],
4554
}),
4655
});
4756
});
4857

49-
it("when asked to schedule clear recurring work, create it without confirmation", async ({
58+
it("when a recurring scheduled task becomes due, deliver that occurrence", async ({
5059
run,
5160
}) => {
5261
await run({
5362
events: [
54-
mention(
55-
"@bot schedule this every Monday at 9am Pacific: check open GitHub issues about the scheduler and post a short digest here.",
63+
scheduledTaskDue(
64+
"Post this reminder: Submit timesheets by 5pm today.",
65+
{
66+
recurrence: "weekly",
67+
schedule: "Weekly on Monday at noon UTC",
68+
schedule_kind: "recurring",
69+
},
5670
),
5771
],
5872
criteria: rubric({
5973
contract:
60-
"A clear future or recurring task request is normalized and scheduled immediately for the active Slack context.",
74+
"A due recurring scheduled task is executed for the current occurrence and posts the requested reminder outcome to the destination channel.",
6175
pass: [
62-
"The created task describes checking scheduler-related GitHub issues, not creating a schedule.",
63-
"The schedule creation sets recurrence=weekly.",
64-
"The reply confirms the recurring schedule was created for Monday at 9am Pacific.",
76+
"The channel_posts output contains a Slack channel message reminding people to submit timesheets by 5pm today.",
77+
"The delivered message treats this as the current due occurrence.",
78+
"The delivered message is not just a confirmation that a recurring task exists.",
6579
],
6680
fail: [
67-
"Do not ask the user to confirm before creating the clear recurring task.",
68-
"Do not ask the user to provide a channel ID.",
69-
"Do not only give instructions for how the user can set up an external cron.",
81+
"Do not use webSearch, webFetch, bash, callMcpTool, sandbox, or Slack history tools for this reminder-only task.",
82+
"Do not say only that a weekly reminder was scheduled.",
83+
"Do not omit the timesheets by 5pm content.",
84+
"Do not ask the user to confirm the recurring task before posting.",
7085
],
7186
}),
7287
});

packages/junior-evals/evals/helpers.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,36 @@ export function threadMessage(
518518
};
519519
}
520520

521+
/** Builds an event for a scheduled task becoming due and dispatching output. */
522+
export function scheduledTaskDue(
523+
taskText: string,
524+
opts?: {
525+
now_ms?: number;
526+
recurrence?: "daily" | "weekly" | "monthly" | "yearly";
527+
schedule?: string;
528+
schedule_kind?: "one_off" | "recurring";
529+
thread?: ThreadOverrides;
530+
timezone?: string;
531+
},
532+
) {
533+
const seq = nextId();
534+
return {
535+
type: "scheduled_task_due" as const,
536+
thread: {
537+
id: `thread-${seq}`,
538+
channel_id: `C${seq}`,
539+
thread_ts: `17000000.${seq}`,
540+
...opts?.thread,
541+
},
542+
task_text: taskText,
543+
...(opts?.now_ms ? { now_ms: opts.now_ms } : {}),
544+
...(opts?.recurrence ? { recurrence: opts.recurrence } : {}),
545+
...(opts?.schedule ? { schedule: opts.schedule } : {}),
546+
...(opts?.schedule_kind ? { schedule_kind: opts.schedule_kind } : {}),
547+
...(opts?.timezone ? { timezone: opts.timezone } : {}),
548+
};
549+
}
550+
521551
/** Builds an assistant thread lifecycle start event for a harnessed Slack eval. */
522552
export function threadStart(opts?: {
523553
thread?: ThreadOverrides;

packages/junior-evals/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
"type": "module",
66
"scripts": {
77
"test": "vitest run",
8-
"evals": "JUNIOR_STATE_ADAPTER=memory VITEST_EVALS_REPLAY_MODE=auto pnpm exec vitest run -c vitest.evals.config.ts",
9-
"evals:record": "JUNIOR_STATE_ADAPTER=memory VITEST_EVALS_REPLAY_MODE=record pnpm exec vitest run -c vitest.evals.config.ts"
8+
"evals": "pnpm exec vitest run -c vitest.evals.config.ts",
9+
"evals:record": "VITEST_EVALS_REPLAY_MODE=record pnpm exec vitest run -c vitest.evals.config.ts"
1010
},
1111
"devDependencies": {
1212
"@sentry/junior": "workspace:*",

0 commit comments

Comments
 (0)