diff --git a/apps/daemon/src/prompts/discovery.ts b/apps/daemon/src/prompts/discovery.ts index 7309a1d10d..0b2a7dbee0 100644 --- a/apps/daemon/src/prompts/discovery.ts +++ b/apps/daemon/src/prompts/discovery.ts @@ -189,7 +189,7 @@ Emit \`\` **only when this turn wrote a new canonical HTML file**. If ## RULE 3 — TodoWrite the plan, then live updates -Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of 5–10 short imperative items in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. +Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. (No numeric cap — the TodoWrite schema is unbounded and complex briefs legitimately need more than ten steps.) The standard plan template (adapt the middle steps to the brief): diff --git a/apps/daemon/tests/prompts/discovery-todo-cap.test.ts b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts new file mode 100644 index 0000000000..76392b5c07 --- /dev/null +++ b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest'; + +import { DISCOVERY_AND_PHILOSOPHY } from '../../src/prompts/discovery.js'; + +// The system prompt historically told the model to write "a plan of 5–10 short +// imperative items". That upper bound caused the agent to cap every plan at +// exactly ten steps and then stop or skip additional items — even when the task +// genuinely needed more. There is no maxItems constraint in the upstream +// TodoWrite JSON schema (the array is unbounded), so the cap is entirely +// prompt-driven and can be removed here. +// +// This test locks the absence of the cap so a future prompt edit cannot +// accidentally re-introduce the "5–10" or "5 to 10" wording. + +describe('discovery.ts RULE 3 — TodoWrite plan item count', () => { + it('does not cap the plan at 10 items via "5–10" wording', () => { + // The old wording was "a plan of 5–10 short imperative items". + // After the fix the sentence must not mention an upper bound of 10. + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); + + it('does not cap the plan at 10 items via "5 to 10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5 to 10\s+(?:short\s+)?items/i); + }); + + it('does not re-introduce a numeric cap via "at most / maximum / no more than" phrasing', () => { + // Guard against semantically equivalent upper-bound re-introduction. + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch( + /(?:at most|maximum|no more than)\s+1[0-9]\s+(?:todo|plan|step|item)/i, + ); + }); + + it('still instructs the agent to write at least a few items', () => { + // The intent — plan with TodoWrite before building — must survive the fix. + expect(DISCOVERY_AND_PHILOSOPHY).toContain('TodoWrite'); + expect(DISCOVERY_AND_PHILOSOPHY).toContain('RULE 3'); + }); +}); diff --git a/packages/contracts/src/prompts/discovery.ts b/packages/contracts/src/prompts/discovery.ts index 725eaabb0d..ff6c60c1a3 100644 --- a/packages/contracts/src/prompts/discovery.ts +++ b/packages/contracts/src/prompts/discovery.ts @@ -161,7 +161,7 @@ Skip directly to RULE 3. Do **not** emit any second direction-picking form and d ## RULE 3 — TodoWrite the plan, then live updates -Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of 5–10 short imperative items in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. +Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. (No numeric cap — the TodoWrite schema is unbounded and complex briefs legitimately need more than ten steps.) The standard plan template (adapt the middle steps to the brief): diff --git a/packages/contracts/tests/system-prompt.test.ts b/packages/contracts/tests/system-prompt.test.ts index c42211e4e6..57718ee06c 100644 --- a/packages/contracts/tests/system-prompt.test.ts +++ b/packages/contracts/tests/system-prompt.test.ts @@ -1,6 +1,37 @@ import { describe, expect, it } from 'vitest'; import { composeSystemPrompt } from '../src/prompts/system.js'; +import { DISCOVERY_AND_PHILOSOPHY } from '../src/prompts/discovery.js'; + +// Guard: the contracts copy of DISCOVERY_AND_PHILOSOPHY must have the same +// cap removal as apps/daemon/src/prompts/discovery.ts. The web app imports +// composeSystemPrompt from @open-design/contracts, so only testing the daemon +// copy leaves the web-originated chat path unguarded. +describe('DISCOVERY_AND_PHILOSOPHY (contracts copy) — TodoWrite plan item count', () => { + it('does not cap the plan at 10 items via "5–10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); + + it('does not cap the plan at 10 items via "5 to 10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5 to 10\s+(?:short\s+)?items/i); + }); + + it('does not re-introduce a numeric cap via "at most / maximum / no more than" phrasing', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch( + /(?:at most|maximum|no more than)\s+1[0-9]\s+(?:todo|plan|step|item)/i, + ); + }); + + it('still instructs the agent to write a TodoWrite plan', () => { + expect(DISCOVERY_AND_PHILOSOPHY).toContain('TodoWrite'); + expect(DISCOVERY_AND_PHILOSOPHY).toContain('RULE 3'); + }); + + it('also absent from the composed system prompt', () => { + const prompt = composeSystemPrompt({}); + expect(prompt).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); +}); describe('composeSystemPrompt', () => { it('treats an active design system as the visual direction', () => {