From f9e8a75f5e407782a0930233b26d6e6ab5f8eeb5 Mon Sep 17 00:00:00 2001 From: Patrick A <141967+neogenix@users.noreply.github.com> Date: Tue, 19 May 2026 13:13:15 -0400 Subject: [PATCH 1/2] fix(daemon): remove 10-item cap from discovery TodoWrite plan prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RULE 3 sentence in DISCOVERY_AND_PHILOSOPHY told the model to write 'a plan of 5–10 short imperative items'. That upper bound caused the agent to cap every plan at exactly ten steps even when the task genuinely needed more. The TodoWrite JSON schema imposes no maxItems constraint, so the cap was entirely prompt-driven. Replace '5–10 short imperative items' with 'short imperative items covering the work'. TodoWrite intent, RULE 3 label, and planning-before-building requirement all survive unchanged. Red spec: apps/daemon/tests/prompts/discovery-todo-cap.test.ts --- apps/daemon/src/prompts/discovery.ts | 2 +- .../tests/prompts/discovery-todo-cap.test.ts | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 apps/daemon/tests/prompts/discovery-todo-cap.test.ts diff --git a/apps/daemon/src/prompts/discovery.ts b/apps/daemon/src/prompts/discovery.ts index 7309a1d10d..a14dc604fc 100644 --- a/apps/daemon/src/prompts/discovery.ts +++ b/apps/daemon/src/prompts/discovery.ts @@ -189,7 +189,7 @@ Emit \`\` **only when this turn wrote a new canonical HTML file**. If ## RULE 3 — TodoWrite the plan, then live updates -Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of 5–10 short imperative items in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. +Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. The standard plan template (adapt the middle steps to the brief): diff --git a/apps/daemon/tests/prompts/discovery-todo-cap.test.ts b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts new file mode 100644 index 0000000000..44d91a6b55 --- /dev/null +++ b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from 'vitest'; + +import { DISCOVERY_AND_PHILOSOPHY } from '../../src/prompts/discovery.js'; + +// The system prompt historically told the model to write "a plan of 5–10 short +// imperative items". That upper bound caused the agent to cap every plan at +// exactly ten steps and then stop or skip additional items — even when the task +// genuinely needed more. There is no maxItems constraint in the upstream +// TodoWrite JSON schema (the array is unbounded), so the cap is entirely +// prompt-driven and can be removed here. +// +// This test locks the absence of the cap so a future prompt edit cannot +// accidentally re-introduce the "5–10" or "5 to 10" wording. + +describe('discovery.ts RULE 3 — TodoWrite plan item count', () => { + it('does not cap the plan at 10 items via "5–10" wording', () => { + // The old wording was "a plan of 5–10 short imperative items". + // After the fix the sentence must not mention an upper bound of 10. + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); + + it('does not cap the plan at 10 items via "5 to 10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5 to 10\s+(?:short\s+)?items/i); + }); + + it('still instructs the agent to write at least a few items', () => { + // The intent — plan with TodoWrite before building — must survive the fix. + expect(DISCOVERY_AND_PHILOSOPHY).toContain('TodoWrite'); + expect(DISCOVERY_AND_PHILOSOPHY).toContain('RULE 3'); + }); +}); From 65c3972c7a14064f29a3bee41a9fad35fd901c0f Mon Sep 17 00:00:00 2001 From: Patrick A <141967+neogenix@users.noreply.github.com> Date: Tue, 19 May 2026 13:58:42 -0400 Subject: [PATCH 2/2] fix(prompts): remove 10-item cap from contracts discovery copy and harden tests [pass-6,7 BLOCKER] packages/contracts/src/prompts/discovery.ts still had the old '5-10 short imperative items' wording. apps/web imports composeSystemPrompt from @open-design/contracts (ProjectView.tsx:43), so web-originated chat runs were still subject to the cap. [pass-8 WARNING] discovery-todo-cap.test.ts did not cover the contracts copy, leaving that path unguarded. Also no guard against semantically equivalent re-introduction via 'at most / maximum / no more than'. Changes: - packages/contracts/src/prompts/discovery.ts: apply same wording fix as apps/daemon; add inline rationale comment - apps/daemon/src/prompts/discovery.ts: add inline rationale comment - apps/daemon/tests/prompts/discovery-todo-cap.test.ts: add 4th assertion blocking 'at most|maximum|no more than N item' re-introduction - packages/contracts/tests/system-prompt.test.ts: add 5-assertion suite guarding the contracts copy and composed prompt output --- apps/daemon/src/prompts/discovery.ts | 2 +- .../tests/prompts/discovery-todo-cap.test.ts | 7 +++++ packages/contracts/src/prompts/discovery.ts | 2 +- .../contracts/tests/system-prompt.test.ts | 31 +++++++++++++++++++ 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/apps/daemon/src/prompts/discovery.ts b/apps/daemon/src/prompts/discovery.ts index a14dc604fc..0b2a7dbee0 100644 --- a/apps/daemon/src/prompts/discovery.ts +++ b/apps/daemon/src/prompts/discovery.ts @@ -189,7 +189,7 @@ Emit \`\` **only when this turn wrote a new canonical HTML file**. If ## RULE 3 — TodoWrite the plan, then live updates -Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. +Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. (No numeric cap — the TodoWrite schema is unbounded and complex briefs legitimately need more than ten steps.) The standard plan template (adapt the middle steps to the brief): diff --git a/apps/daemon/tests/prompts/discovery-todo-cap.test.ts b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts index 44d91a6b55..76392b5c07 100644 --- a/apps/daemon/tests/prompts/discovery-todo-cap.test.ts +++ b/apps/daemon/tests/prompts/discovery-todo-cap.test.ts @@ -23,6 +23,13 @@ describe('discovery.ts RULE 3 — TodoWrite plan item count', () => { expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5 to 10\s+(?:short\s+)?items/i); }); + it('does not re-introduce a numeric cap via "at most / maximum / no more than" phrasing', () => { + // Guard against semantically equivalent upper-bound re-introduction. + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch( + /(?:at most|maximum|no more than)\s+1[0-9]\s+(?:todo|plan|step|item)/i, + ); + }); + it('still instructs the agent to write at least a few items', () => { // The intent — plan with TodoWrite before building — must survive the fix. expect(DISCOVERY_AND_PHILOSOPHY).toContain('TodoWrite'); diff --git a/packages/contracts/src/prompts/discovery.ts b/packages/contracts/src/prompts/discovery.ts index 725eaabb0d..ff6c60c1a3 100644 --- a/packages/contracts/src/prompts/discovery.ts +++ b/packages/contracts/src/prompts/discovery.ts @@ -161,7 +161,7 @@ Skip directly to RULE 3. Do **not** emit any second direction-picking form and d ## RULE 3 — TodoWrite the plan, then live updates -Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of 5–10 short imperative items in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. +Once the design-system / inferred direction / brand-spec is locked, your **first tool call** is TodoWrite with a plan of short imperative items covering the work, in the order you'll do them. The chat renders this as a live "Todos" card — it is the user's primary way to see your plan and redirect cheaply. (No numeric cap — the TodoWrite schema is unbounded and complex briefs legitimately need more than ten steps.) The standard plan template (adapt the middle steps to the brief): diff --git a/packages/contracts/tests/system-prompt.test.ts b/packages/contracts/tests/system-prompt.test.ts index c42211e4e6..57718ee06c 100644 --- a/packages/contracts/tests/system-prompt.test.ts +++ b/packages/contracts/tests/system-prompt.test.ts @@ -1,6 +1,37 @@ import { describe, expect, it } from 'vitest'; import { composeSystemPrompt } from '../src/prompts/system.js'; +import { DISCOVERY_AND_PHILOSOPHY } from '../src/prompts/discovery.js'; + +// Guard: the contracts copy of DISCOVERY_AND_PHILOSOPHY must have the same +// cap removal as apps/daemon/src/prompts/discovery.ts. The web app imports +// composeSystemPrompt from @open-design/contracts, so only testing the daemon +// copy leaves the web-originated chat path unguarded. +describe('DISCOVERY_AND_PHILOSOPHY (contracts copy) — TodoWrite plan item count', () => { + it('does not cap the plan at 10 items via "5–10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); + + it('does not cap the plan at 10 items via "5 to 10" wording', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch(/5 to 10\s+(?:short\s+)?items/i); + }); + + it('does not re-introduce a numeric cap via "at most / maximum / no more than" phrasing', () => { + expect(DISCOVERY_AND_PHILOSOPHY).not.toMatch( + /(?:at most|maximum|no more than)\s+1[0-9]\s+(?:todo|plan|step|item)/i, + ); + }); + + it('still instructs the agent to write a TodoWrite plan', () => { + expect(DISCOVERY_AND_PHILOSOPHY).toContain('TodoWrite'); + expect(DISCOVERY_AND_PHILOSOPHY).toContain('RULE 3'); + }); + + it('also absent from the composed system prompt', () => { + const prompt = composeSystemPrompt({}); + expect(prompt).not.toMatch(/5[–\-]10\s+short\s+imperative/); + }); +}); describe('composeSystemPrompt', () => { it('treats an active design system as the visual direction', () => {