diff --git a/agents/gsd-executor.md b/agents/gsd-executor.md index 7d95f26238..e0a5c7d15d 100644 --- a/agents/gsd-executor.md +++ b/agents/gsd-executor.md @@ -171,6 +171,16 @@ Track auto-fix attempts per task. After 3 auto-fix attempts on a single task: - Do NOT restart the build to find more issues + +**During task execution, if you make 5+ consecutive Read/Grep/Glob calls without any Edit/Write/Bash action:** + +STOP. State in one sentence why you haven't written anything yet. Then either: +1. Write code (you have enough context), or +2. Report "blocked" with the specific missing information. + +Do NOT continue reading. Analysis without action is a stuck signal. + + **Auth errors during `type="auto"` execution are gates, not failures.** diff --git a/agents/gsd-plan-checker.md b/agents/gsd-plan-checker.md index 3ef73ea366..fd0cd6b518 100644 --- a/agents/gsd-plan-checker.md +++ b/agents/gsd-plan-checker.md @@ -445,6 +445,8 @@ Session persists | 01 | 3 | COVERED For each requirement: find covering task(s), verify action is specific, flag gaps. +**Exhaustive cross-check:** Also read PROJECT.md requirements (not just phase goal). Verify no PROJECT.md requirement relevant to this phase is silently dropped. Any unmapped requirement is an automatic blocker — list it explicitly in issues. + ## Step 5: Validate Task Structure Use gsd-tools plan-structure verification (already run in Step 2): diff --git a/agents/gsd-planner.md b/agents/gsd-planner.md index ffa1723f83..aa358824c7 100644 --- a/agents/gsd-planner.md +++ b/agents/gsd-planner.md @@ -234,6 +234,26 @@ This prevents the "scavenger hunt" anti-pattern where executors explore the code **Why TDD gets own plan:** TDD requires RED→GREEN→REFACTOR cycles consuming 40-50% context. Embedding in multi-task plans degrades quality. +**Task-level TDD** (for code-producing tasks in standard plans): When a task creates or modifies production code, add `tdd="true"` and a `` block to make test expectations explicit before implementation: + +```xml + + Task: [name] + src/feature.ts, src/feature.test.ts + + - Test 1: [expected behavior] + - Test 2: [edge case] + + [Implementation after tests pass] + + npm test -- --filter=feature + + [Criteria] + +``` + +Exceptions where `tdd="true"` is not needed: `type="checkpoint:*"` tasks, configuration-only files, documentation, migration scripts, glue code wiring existing tested components, styling-only changes. + ## User Setup Detection For tasks involving external services, identify human-required configuration: diff --git a/bin/install.js b/bin/install.js index 116998231a..12e96f8dbc 100755 --- a/bin/install.js +++ b/bin/install.js @@ -13,6 +13,23 @@ const yellow = '\x1b[33m'; const dim = '\x1b[2m'; const reset = '\x1b[0m'; +// Codex config.toml constants +const GSD_CODEX_MARKER = '# GSD Agent Configuration \u2014 managed by get-shit-done installer'; + +const CODEX_AGENT_SANDBOX = { + 'gsd-executor': 'workspace-write', + 'gsd-planner': 'workspace-write', + 'gsd-phase-researcher': 'workspace-write', + 'gsd-project-researcher': 'workspace-write', + 'gsd-research-synthesizer': 'workspace-write', + 'gsd-verifier': 'workspace-write', + 'gsd-codebase-mapper': 'workspace-write', + 'gsd-roadmapper': 'workspace-write', + 'gsd-debugger': 'workspace-write', + 'gsd-plan-checker': 'read-only', + 'gsd-integration-checker': 'read-only', +}; + // Get version from package.json const pkg = require('../package.json'); @@ -437,15 +454,43 @@ function convertClaudeToCodexMarkdown(content) { function getCodexSkillAdapterHeader(skillName) { const invocation = `$${skillName}`; return ` -Codex skills-first mode: +## A. Skill Invocation - This skill is invoked by mentioning \`${invocation}\`. - Treat all user text after \`${invocation}\` as \`{{GSD_ARGS}}\`. - If no arguments are present, treat \`{{GSD_ARGS}}\` as empty. -Legacy orchestration compatibility: -- Any \`Task(...)\` pattern in referenced workflow docs is legacy syntax. -- Implement equivalent behavior with Codex collaboration tools: \`spawn_agent\`, \`wait\`, \`send_input\`, and \`close_agent\`. -- Treat legacy \`subagent_type\` names as role hints in the spawned message. +## B. AskUserQuestion → request_user_input Mapping +GSD workflows use \`AskUserQuestion\` (Claude Code syntax). Translate to Codex \`request_user_input\`: + +Parameter mapping: +- \`header\` → \`header\` +- \`question\` → \`question\` +- Options formatted as \`"Label" — description\` → \`{label: "Label", description: "description"}\` +- Generate \`id\` from header: lowercase, replace spaces with underscores + +Batched calls: +- \`AskUserQuestion([q1, q2])\` → single \`request_user_input\` with multiple entries in \`questions[]\` + +Multi-select workaround: +- Codex has no \`multiSelect\`. Use sequential single-selects, or present a numbered freeform list asking the user to enter comma-separated numbers. + +Execute mode fallback: +- When \`request_user_input\` is rejected (Execute mode), present a plain-text numbered list and pick a reasonable default. + +## C. Task() → spawn_agent Mapping +GSD workflows use \`Task(...)\` (Claude Code syntax). Translate to Codex collaboration tools: + +Direct mapping: +- \`Task(subagent_type="X", prompt="Y")\` → \`spawn_agent(agent_type="X", message="Y")\` +- \`Task(model="...")\` → omit (Codex uses per-role config, not inline model selection) +- \`fork_context: false\` by default — GSD agents load their own context via \`\` blocks + +Parallel fan-out: +- Spawn multiple agents → collect agent IDs → \`wait(ids)\` for all to complete + +Result parsing: +- Look for structured markers in agent output: \`CHECKPOINT\`, \`PLAN COMPLETE\`, \`SUMMARY\`, etc. +- \`close_agent(id)\` after collecting results from each agent `; } @@ -466,6 +511,191 @@ function convertClaudeCommandToCodexSkill(content, skillName) { return `---\nname: ${yamlQuote(skillName)}\ndescription: ${yamlQuote(description)}\nmetadata:\n short-description: ${yamlQuote(shortDescription)}\n---\n\n${adapter}\n\n${body.trimStart()}`; } +/** + * Convert Claude Code agent markdown to Codex agent format. + * Applies base markdown conversions, then adds a header + * and cleans up frontmatter (removes tools/color fields). + */ +function convertClaudeAgentToCodexAgent(content) { + let converted = convertClaudeToCodexMarkdown(content); + + const { frontmatter, body } = extractFrontmatterAndBody(converted); + if (!frontmatter) return converted; + + const name = extractFrontmatterField(frontmatter, 'name') || 'unknown'; + const description = extractFrontmatterField(frontmatter, 'description') || ''; + const tools = extractFrontmatterField(frontmatter, 'tools') || ''; + + const roleHeader = ` +role: ${name} +tools: ${tools} +purpose: ${toSingleLine(description)} +`; + + const cleanFrontmatter = `---\nname: ${yamlQuote(name)}\ndescription: ${yamlQuote(toSingleLine(description))}\n---`; + + return `${cleanFrontmatter}\n\n${roleHeader}\n${body}`; +} + +/** + * Generate a per-agent .toml config file for Codex. + * Sets sandbox_mode and developer_instructions from the agent markdown body. + */ +function generateCodexAgentToml(agentName, agentContent) { + const sandboxMode = CODEX_AGENT_SANDBOX[agentName] || 'read-only'; + const { body } = extractFrontmatterAndBody(agentContent); + const instructions = body.trim(); + + const lines = [ + `sandbox_mode = "${sandboxMode}"`, + `developer_instructions = """`, + instructions, + `"""`, + ]; + return lines.join('\n') + '\n'; +} + +/** + * Generate the GSD config block for Codex config.toml. + * @param {Array<{name: string, description: string}>} agents + */ +function generateCodexConfigBlock(agents) { + const lines = [ + GSD_CODEX_MARKER, + '[features]', + 'multi_agent = true', + 'default_mode_request_user_input = true', + '', + '[agents]', + 'max_threads = 4', + 'max_depth = 2', + '', + ]; + + for (const { name, description } of agents) { + lines.push(`[agents.${name}]`); + lines.push(`description = ${JSON.stringify(description)}`); + lines.push(`config_file = "agents/${name}.toml"`); + lines.push(''); + } + + return lines.join('\n'); +} + +/** + * Strip GSD sections from Codex config.toml content. + * Returns cleaned content, or null if file would be empty. + */ +function stripGsdFromCodexConfig(content) { + const markerIndex = content.indexOf(GSD_CODEX_MARKER); + + if (markerIndex !== -1) { + // Has GSD marker — remove everything from marker to EOF + let before = content.substring(0, markerIndex).trimEnd(); + // Also strip GSD-injected feature keys above the marker (Case 3 inject) + before = before.replace(/^multi_agent\s*=\s*true\s*\n?/m, ''); + before = before.replace(/^default_mode_request_user_input\s*=\s*true\s*\n?/m, ''); + before = before.replace(/^\[features\]\s*\n(?=\[|$)/m, ''); + before = before.replace(/\n{3,}/g, '\n\n').trim(); + if (!before) return null; + return before + '\n'; + } + + // No marker but may have GSD-injected feature keys + let cleaned = content; + cleaned = cleaned.replace(/^multi_agent\s*=\s*true\s*\n?/m, ''); + cleaned = cleaned.replace(/^default_mode_request_user_input\s*=\s*true\s*\n?/m, ''); + + // Remove [agents.gsd-*] sections (from header to next section or EOF) + cleaned = cleaned.replace(/^\[agents\.gsd-[^\]]+\]\n(?:(?!\[)[^\n]*\n?)*/gm, ''); + + // Remove [features] section if now empty (only header, no keys before next section) + cleaned = cleaned.replace(/^\[features\]\s*\n(?=\[|$)/m, ''); + + // Remove [agents] section if now empty + cleaned = cleaned.replace(/^\[agents\]\s*\n(?=\[|$)/m, ''); + + // Clean up excessive blank lines + cleaned = cleaned.replace(/\n{3,}/g, '\n\n').trim(); + + if (!cleaned) return null; + return cleaned + '\n'; +} + +/** + * Merge GSD config block into an existing or new config.toml. + * Three cases: new file, existing with GSD marker, existing without marker. + */ +function mergeCodexConfig(configPath, gsdBlock) { + // Case 1: No config.toml — create fresh + if (!fs.existsSync(configPath)) { + fs.writeFileSync(configPath, gsdBlock + '\n'); + return; + } + + const existing = fs.readFileSync(configPath, 'utf8'); + const markerIndex = existing.indexOf(GSD_CODEX_MARKER); + + // Case 2: Has GSD marker — truncate and re-append + if (markerIndex !== -1) { + const before = existing.substring(0, markerIndex).trimEnd(); + const newContent = before ? before + '\n\n' + gsdBlock + '\n' : gsdBlock + '\n'; + fs.writeFileSync(configPath, newContent); + return; + } + + // Case 3: No marker — inject features if needed, append agents + let content = existing; + const featuresRegex = /^\[features\]\s*$/m; + const hasFeatures = featuresRegex.test(content); + + if (hasFeatures) { + if (!content.includes('multi_agent')) { + content = content.replace(featuresRegex, '[features]\nmulti_agent = true'); + } + if (!content.includes('default_mode_request_user_input')) { + content = content.replace(/^\[features\].*$/m, '$&\ndefault_mode_request_user_input = true'); + } + // Append agents block (skip the [features] section from gsdBlock) + const agentsBlock = gsdBlock.substring(gsdBlock.indexOf('[agents]')); + content = content.trimEnd() + '\n\n' + GSD_CODEX_MARKER + '\n' + agentsBlock + '\n'; + } else { + content = content.trimEnd() + '\n\n' + gsdBlock + '\n'; + } + + fs.writeFileSync(configPath, content); +} + +/** + * Generate config.toml and per-agent .toml files for Codex. + * Reads agent .md files from source, extracts metadata, writes .toml configs. + */ +function installCodexConfig(targetDir, agentsSrc) { + const configPath = path.join(targetDir, 'config.toml'); + const agentsTomlDir = path.join(targetDir, 'agents'); + fs.mkdirSync(agentsTomlDir, { recursive: true }); + + const agentEntries = fs.readdirSync(agentsSrc).filter(f => f.startsWith('gsd-') && f.endsWith('.md')); + const agents = []; + + for (const file of agentEntries) { + const content = fs.readFileSync(path.join(agentsSrc, file), 'utf8'); + const { frontmatter } = extractFrontmatterAndBody(content); + const name = extractFrontmatterField(frontmatter, 'name') || file.replace('.md', ''); + const description = extractFrontmatterField(frontmatter, 'description') || ''; + + agents.push({ name, description: toSingleLine(description) }); + + const tomlContent = generateCodexAgentToml(name, content); + fs.writeFileSync(path.join(agentsTomlDir, `${name}.toml`), tomlContent); + } + + const gsdBlock = generateCodexConfigBlock(agents); + mergeCodexConfig(configPath, gsdBlock); + + return agents.length; +} + /** * Strip HTML tags for Gemini CLI output * Terminals don't support subscript — Gemini renders these as raw HTML. @@ -1033,6 +1263,40 @@ function uninstall(isGlobal, runtime = 'claude') { console.log(` ${green}✓${reset} Removed ${skillCount} Codex skills`); } } + + // Codex: remove GSD agent .toml config files + const codexAgentsDir = path.join(targetDir, 'agents'); + if (fs.existsSync(codexAgentsDir)) { + const tomlFiles = fs.readdirSync(codexAgentsDir); + let tomlCount = 0; + for (const file of tomlFiles) { + if (file.startsWith('gsd-') && file.endsWith('.toml')) { + fs.unlinkSync(path.join(codexAgentsDir, file)); + tomlCount++; + } + } + if (tomlCount > 0) { + removedCount++; + console.log(` ${green}✓${reset} Removed ${tomlCount} agent .toml configs`); + } + } + + // Codex: clean GSD sections from config.toml + const configPath = path.join(targetDir, 'config.toml'); + if (fs.existsSync(configPath)) { + const content = fs.readFileSync(configPath, 'utf8'); + const cleaned = stripGsdFromCodexConfig(content); + if (cleaned === null) { + // File is empty after stripping — delete it + fs.unlinkSync(configPath); + removedCount++; + console.log(` ${green}✓${reset} Removed config.toml (was GSD-only)`); + } else if (cleaned !== content) { + fs.writeFileSync(configPath, cleaned); + removedCount++; + console.log(` ${green}✓${reset} Cleaned GSD sections from config.toml`); + } + } } else { // Claude Code & Gemini: remove commands/gsd/ directory const gsdCommandsDir = path.join(targetDir, 'commands', 'gsd'); @@ -1675,7 +1939,7 @@ function install(isGlobal, runtime = 'claude') { } else if (isGemini) { content = convertClaudeToGeminiAgent(content); } else if (isCodex) { - content = convertClaudeToCodexMarkdown(content); + content = convertClaudeAgentToCodexAgent(content); } fs.writeFileSync(path.join(agentsDest, entry.name), content); } @@ -1759,6 +2023,10 @@ function install(isGlobal, runtime = 'claude') { reportLocalPatches(targetDir, runtime); if (isCodex) { + // Generate Codex config.toml and per-agent .toml files + const agentCount = installCodexConfig(targetDir, agentsSrc); + console.log(` ${green}✓${reset} Generated config.toml with ${agentCount} agent roles`); + console.log(` ${green}✓${reset} Generated ${agentCount} agent .toml config files`); return { settingsPath: null, settings: null, statuslineCommand: null, runtime }; } @@ -2052,6 +2320,22 @@ function installAllRuntimes(runtimes, isGlobal, isInteractive) { } } +// Test-only exports — skip main logic when loaded as a module for testing +if (process.env.GSD_TEST_MODE) { + module.exports = { + getCodexSkillAdapterHeader, + convertClaudeAgentToCodexAgent, + generateCodexAgentToml, + generateCodexConfigBlock, + stripGsdFromCodexConfig, + mergeCodexConfig, + installCodexConfig, + convertClaudeCommandToCodexSkill, + GSD_CODEX_MARKER, + CODEX_AGENT_SANDBOX, + }; +} else { + // Main logic if (hasGlobal && hasLocal) { console.error(` ${yellow}Cannot specify both --global and --local${reset}`); @@ -2088,3 +2372,5 @@ if (hasGlobal && hasLocal) { }); } } + +} // end of else block for GSD_TEST_MODE diff --git a/commands/gsd/discuss-phase.md b/commands/gsd/discuss-phase.md index dbeb2a4172..cdfddce02d 100644 --- a/commands/gsd/discuss-phase.md +++ b/commands/gsd/discuss-phase.md @@ -10,6 +10,8 @@ allowed-tools: - Grep - AskUserQuestion - Task + - mcp__context7__resolve-library-id + - mcp__context7__query-docs --- @@ -38,11 +40,12 @@ Context files are resolved in-workflow using `init phase-op` and roadmap/state t 1. Validate phase number (error if missing or not in roadmap) 2. Check if CONTEXT.md exists (offer update/view/skip if yes) -3. **Analyze phase** — Identify domain and generate phase-specific gray areas -4. **Present gray areas** — Multi-select: which to discuss? (NO skip option) -5. **Deep-dive each area** — 4 questions per area, then offer more/next -6. **Write CONTEXT.md** — Sections match areas discussed -7. Offer next steps (research or plan) +3. **Scout codebase** — Find reusable assets, patterns, and integration points +4. **Analyze phase** — Identify domain and generate code-informed gray areas +5. **Present gray areas** — Multi-select: which to discuss? (NO skip option) +6. **Deep-dive each area** — 4 questions per area, code-informed options, Context7 for library choices +7. **Write CONTEXT.md** — Sections match areas discussed + code_context section +8. Offer next steps (research or plan) **CRITICAL: Scope guardrail** - Phase boundary from ROADMAP.md is FIXED diff --git a/get-shit-done/templates/context.md b/get-shit-done/templates/context.md index 574e2e490d..eed462bacf 100644 --- a/get-shit-done/templates/context.md +++ b/get-shit-done/templates/context.md @@ -54,6 +54,20 @@ Template for `.planning/phases/XX-name/{phase_num}-CONTEXT.md` - captures implem + +## Existing Code Insights + +### Reusable Assets +- [Component/hook/utility]: [How it could be used in this phase] + +### Established Patterns +- [Pattern]: [How it constrains/enables this phase] + +### Integration Points +- [Where new code connects to existing system] + + + ## Deferred Ideas diff --git a/get-shit-done/workflows/discuss-phase.md b/get-shit-done/workflows/discuss-phase.md index 1b9596dac3..b3ef810552 100644 --- a/get-shit-done/workflows/discuss-phase.md +++ b/get-shit-done/workflows/discuss-phase.md @@ -165,30 +165,73 @@ If "Continue and replan after": Continue to analyze_phase. If "View existing plans": Display plan files, then offer "Continue" / "Cancel". If "Cancel": Exit workflow. -**If `has_plans` is false:** Continue to analyze_phase. +**If `has_plans` is false:** Continue to scout_codebase. + + + +Lightweight scan of existing code to inform gray area identification and discussion. Uses ~10% context — acceptable for an interactive session. + +**Step 1: Check for existing codebase maps** +```bash +ls .planning/codebase/*.md 2>/dev/null +``` + +**If codebase maps exist:** Read the most relevant ones (CONVENTIONS.md, STRUCTURE.md, STACK.md based on phase type). Extract: +- Reusable components/hooks/utilities +- Established patterns (state management, styling, data fetching) +- Integration points (where new code would connect) + +Skip to Step 3 below. + +**Step 2: If no codebase maps, do targeted grep** + +Extract key terms from the phase goal (e.g., "feed" → "post", "card", "list"; "auth" → "login", "session", "token"). + +```bash +# Find files related to phase goal terms +grep -rl "{term1}\|{term2}" src/ app/ --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" 2>/dev/null | head -10 + +# Find existing components/hooks +ls src/components/ 2>/dev/null +ls src/hooks/ 2>/dev/null +ls src/lib/ src/utils/ 2>/dev/null +``` + +Read the 3-5 most relevant files to understand existing patterns. + +**Step 3: Build internal codebase_context** + +From the scan, identify: +- **Reusable assets** — existing components, hooks, utilities that could be used in this phase +- **Established patterns** — how the codebase does state management, styling, data fetching +- **Integration points** — where new code would connect (routes, nav, providers) +- **Creative options** — approaches the existing architecture enables or constrains + +Store as internal `` for use in analyze_phase and present_gray_areas. This is NOT written to a file — it's used within this session only. -Analyze the phase to identify gray areas worth discussing. +Analyze the phase to identify gray areas worth discussing. **Use codebase_context from scout step to ground the analysis.** **Read the phase description from ROADMAP.md and determine:** 1. **Domain boundary** — What capability is this phase delivering? State it clearly. -2. **Gray areas by category** — For each relevant category (UI, UX, Behavior, Empty States, Content), identify 1-2 specific ambiguities that would change implementation. +2. **Gray areas by category** — For each relevant category (UI, UX, Behavior, Empty States, Content), identify 1-2 specific ambiguities that would change implementation. **Annotate with code context where relevant** (e.g., "You already have a Card component" or "No existing pattern for this"). 3. **Skip assessment** — If no meaningful gray areas exist (pure infrastructure, clear-cut implementation), the phase may not need discussion. **Output your analysis internally, then present to user.** -Example analysis for "Post Feed" phase: +Example analysis for "Post Feed" phase (with code context): ``` Domain: Displaying posts from followed users +Existing: Card component (src/components/ui/Card.tsx), useInfiniteQuery hook, Tailwind CSS Gray areas: -- UI: Layout style (cards vs timeline vs grid) -- UI: Information density (full posts vs previews) -- Behavior: Loading pattern (infinite scroll vs pagination) -- Empty State: What shows when no posts exist +- UI: Layout style (cards vs timeline vs grid) — Card component exists with shadow/rounded variants +- UI: Information density (full posts vs previews) — no existing density patterns +- Behavior: Loading pattern (infinite scroll vs pagination) — useInfiniteQuery already set up +- Empty State: What shows when no posts exist — EmptyState component exists in ui/ - Content: What metadata displays (time, author, reactions count) ``` @@ -210,17 +253,23 @@ We'll clarify HOW to implement this. - question: "Which areas do you want to discuss for [phase name]?" - options: Generate 3-4 phase-specific gray areas, each with: - "[Specific area]" (label) — concrete, not generic - - [1-2 questions this covers] (description) + - [1-2 questions this covers + code context annotation] (description) - **Highlight the recommended choice with brief explanation why** +**Code context annotations:** When the scout found relevant existing code, annotate the gray area description: +``` +☐ Layout style — Cards vs list vs timeline? + (You already have a Card component with shadow/rounded variants. Reusing it keeps the app consistent.) +``` + **Do NOT include a "skip" or "you decide" option.** User ran this command to discuss — give them real choices. -**Examples by domain:** +**Examples by domain (with code context):** For "Post Feed" (visual feature): ``` -☐ Layout style — Cards vs list vs timeline? Information density? -☐ Loading behavior — Infinite scroll or pagination? Pull to refresh? +☐ Layout style — Cards vs list vs timeline? (Card component exists with variants) +☐ Loading behavior — Infinite scroll or pagination? (useInfiniteQuery hook available) ☐ Content ordering — Chronological, algorithmic, or user choice? ☐ Post metadata — What info per post? Timestamps, reactions, author? ``` @@ -262,7 +311,15 @@ Ask 4 questions per area before offering to continue or move on. Each answer oft - header: "[Area]" (max 12 chars — abbreviate if needed) - question: Specific decision for this area - options: 2-3 concrete choices (AskUserQuestion adds "Other" automatically), with the recommended choice highlighted and brief explanation why + - **Annotate options with code context** when relevant: + ``` + "How should posts be displayed?" + - Cards (reuses existing Card component — consistent with Messages) + - List (simpler, would be a new pattern) + - Timeline (needs new Timeline component — none exists yet) + ``` - Include "You decide" as an option when reasonable — captures Claude discretion + - **Context7 for library choices:** When a gray area involves library selection (e.g., "magic links" → query next-auth docs) or API approach decisions, use `mcp__context7__*` tools to fetch current documentation and inform the options. Don't use Context7 for every question — only when library-specific knowledge improves the options. 3. **After 4 questions, check:** - header: "[Area]" (max 12 chars) @@ -346,6 +403,20 @@ mkdir -p ".planning/phases/${padded_phase}-${phase_slug}" + +## Existing Code Insights + +### Reusable Assets +- [Component/hook/utility]: [How it could be used in this phase] + +### Established Patterns +- [Pattern]: [How it constrains/enables this phase] + +### Integration Points +- [Where new code connects to existing system] + + + ## Specific Ideas @@ -531,11 +602,13 @@ Route to `confirm_creation` step (existing behavior — show manual next steps). - Phase validated against roadmap -- Gray areas identified through intelligent analysis (not generic questions) +- Codebase scouted for reusable assets, patterns, and integration points +- Gray areas identified through intelligent analysis with code context annotations - User selected which areas to discuss -- Each selected area explored until user satisfied +- Each selected area explored until user satisfied (with code-informed options) - Scope creep redirected to deferred ideas - CONTEXT.md captures actual decisions, not vague vision +- CONTEXT.md includes code_context section with reusable assets and patterns - Deferred ideas preserved for future phases - STATE.md updated with session info - User knows next steps diff --git a/tests/codex-config.test.cjs b/tests/codex-config.test.cjs new file mode 100644 index 0000000000..e9cf8ad321 --- /dev/null +++ b/tests/codex-config.test.cjs @@ -0,0 +1,412 @@ +/** + * GSD Tools Tests - codex-config.cjs + * + * Tests for Codex adapter header, agent conversion, config.toml generation/merge, + * per-agent .toml generation, and uninstall cleanup. + */ + +// Enable test exports from install.js (skips main CLI logic) +process.env.GSD_TEST_MODE = '1'; + +const { test, describe, beforeEach, afterEach } = require('node:test'); +const assert = require('node:assert'); +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const { + getCodexSkillAdapterHeader, + convertClaudeAgentToCodexAgent, + generateCodexAgentToml, + generateCodexConfigBlock, + stripGsdFromCodexConfig, + mergeCodexConfig, + GSD_CODEX_MARKER, + CODEX_AGENT_SANDBOX, +} = require('../bin/install.js'); + +// ─── getCodexSkillAdapterHeader ───────────────────────────────────────────────── + +describe('getCodexSkillAdapterHeader', () => { + test('contains all three sections', () => { + const result = getCodexSkillAdapterHeader('gsd-execute-phase'); + assert.ok(result.includes(''), 'has opening tag'); + assert.ok(result.includes(''), 'has closing tag'); + assert.ok(result.includes('## A. Skill Invocation'), 'has section A'); + assert.ok(result.includes('## B. AskUserQuestion'), 'has section B'); + assert.ok(result.includes('## C. Task() → spawn_agent'), 'has section C'); + }); + + test('includes correct invocation syntax', () => { + const result = getCodexSkillAdapterHeader('gsd-plan-phase'); + assert.ok(result.includes('`$gsd-plan-phase`'), 'has $skillName invocation'); + assert.ok(result.includes('{{GSD_ARGS}}'), 'has GSD_ARGS variable'); + }); + + test('section B maps AskUserQuestion parameters', () => { + const result = getCodexSkillAdapterHeader('gsd-discuss-phase'); + assert.ok(result.includes('request_user_input'), 'maps to request_user_input'); + assert.ok(result.includes('header'), 'maps header parameter'); + assert.ok(result.includes('question'), 'maps question parameter'); + assert.ok(result.includes('label'), 'maps options label'); + assert.ok(result.includes('description'), 'maps options description'); + assert.ok(result.includes('multiSelect'), 'documents multiSelect workaround'); + assert.ok(result.includes('Execute mode'), 'documents Execute mode fallback'); + }); + + test('section C maps Task to spawn_agent', () => { + const result = getCodexSkillAdapterHeader('gsd-execute-phase'); + assert.ok(result.includes('spawn_agent'), 'maps to spawn_agent'); + assert.ok(result.includes('agent_type'), 'maps subagent_type to agent_type'); + assert.ok(result.includes('fork_context'), 'documents fork_context default'); + assert.ok(result.includes('wait(ids)'), 'documents parallel wait pattern'); + assert.ok(result.includes('close_agent'), 'documents close_agent cleanup'); + assert.ok(result.includes('CHECKPOINT'), 'documents result markers'); + }); +}); + +// ─── convertClaudeAgentToCodexAgent ───────────────────────────────────────────── + +describe('convertClaudeAgentToCodexAgent', () => { + test('adds codex_agent_role header and cleans frontmatter', () => { + const input = `--- +name: gsd-executor +description: Executes GSD plans with atomic commits +tools: Read, Write, Edit, Bash, Grep, Glob +color: yellow +--- + + +You are a GSD plan executor. +`; + + const result = convertClaudeAgentToCodexAgent(input); + + // Frontmatter rebuilt with only name and description + assert.ok(result.startsWith('---\n'), 'starts with frontmatter'); + assert.ok(result.includes('"gsd-executor"'), 'has quoted name'); + assert.ok(result.includes('"Executes GSD plans with atomic commits"'), 'has quoted description'); + assert.ok(!result.includes('color: yellow'), 'drops color field'); + // Tools should be in but NOT in frontmatter + const fmEnd = result.indexOf('---', 4); + const frontmatterSection = result.substring(0, fmEnd); + assert.ok(!frontmatterSection.includes('tools:'), 'drops tools from frontmatter'); + + // Has codex_agent_role block + assert.ok(result.includes(''), 'has role header'); + assert.ok(result.includes('role: gsd-executor'), 'role matches agent name'); + assert.ok(result.includes('tools: Read, Write, Edit, Bash, Grep, Glob'), 'tools in role block'); + assert.ok(result.includes('purpose: Executes GSD plans with atomic commits'), 'purpose from description'); + assert.ok(result.includes(''), 'has closing tag'); + + // Body preserved + assert.ok(result.includes(''), 'body content preserved'); + }); + + test('converts slash commands in body', () => { + const input = `--- +name: gsd-test +description: Test agent +tools: Read +--- + +Run /gsd:execute-phase to proceed.`; + + const result = convertClaudeAgentToCodexAgent(input); + assert.ok(result.includes('$gsd-execute-phase'), 'converts slash commands'); + assert.ok(!result.includes('/gsd:execute-phase'), 'original slash command removed'); + }); + + test('handles content without frontmatter', () => { + const input = 'Just some content without frontmatter.'; + const result = convertClaudeAgentToCodexAgent(input); + assert.strictEqual(result, input, 'returns input unchanged'); + }); +}); + +// ─── generateCodexAgentToml ───────────────────────────────────────────────────── + +describe('generateCodexAgentToml', () => { + const sampleAgent = `--- +name: gsd-executor +description: Executes plans +tools: Read, Write, Edit +color: yellow +--- + +You are an executor.`; + + test('sets workspace-write for executor', () => { + const result = generateCodexAgentToml('gsd-executor', sampleAgent); + assert.ok(result.includes('sandbox_mode = "workspace-write"'), 'has workspace-write'); + }); + + test('sets read-only for plan-checker', () => { + const checker = `--- +name: gsd-plan-checker +description: Checks plans +tools: Read, Grep, Glob +--- + +You check plans.`; + const result = generateCodexAgentToml('gsd-plan-checker', checker); + assert.ok(result.includes('sandbox_mode = "read-only"'), 'has read-only'); + }); + + test('includes developer_instructions from body', () => { + const result = generateCodexAgentToml('gsd-executor', sampleAgent); + assert.ok(result.includes('developer_instructions = """'), 'has triple-quoted instructions'); + assert.ok(result.includes('You are an executor.'), 'body content in instructions'); + assert.ok(result.includes('"""'), 'has closing triple quotes'); + }); + + test('defaults unknown agents to read-only', () => { + const result = generateCodexAgentToml('gsd-unknown', sampleAgent); + assert.ok(result.includes('sandbox_mode = "read-only"'), 'defaults to read-only'); + }); +}); + +// ─── CODEX_AGENT_SANDBOX mapping ──────────────────────────────────────────────── + +describe('CODEX_AGENT_SANDBOX', () => { + test('has all 11 agents mapped', () => { + const agentNames = Object.keys(CODEX_AGENT_SANDBOX); + assert.strictEqual(agentNames.length, 11, 'has 11 agents'); + }); + + test('workspace-write agents have write tools', () => { + const writeAgents = [ + 'gsd-executor', 'gsd-planner', 'gsd-phase-researcher', + 'gsd-project-researcher', 'gsd-research-synthesizer', 'gsd-verifier', + 'gsd-codebase-mapper', 'gsd-roadmapper', 'gsd-debugger', + ]; + for (const name of writeAgents) { + assert.strictEqual(CODEX_AGENT_SANDBOX[name], 'workspace-write', `${name} is workspace-write`); + } + }); + + test('read-only agents have no write tools', () => { + const readOnlyAgents = ['gsd-plan-checker', 'gsd-integration-checker']; + for (const name of readOnlyAgents) { + assert.strictEqual(CODEX_AGENT_SANDBOX[name], 'read-only', `${name} is read-only`); + } + }); +}); + +// ─── generateCodexConfigBlock ─────────────────────────────────────────────────── + +describe('generateCodexConfigBlock', () => { + const agents = [ + { name: 'gsd-executor', description: 'Executes plans' }, + { name: 'gsd-planner', description: 'Creates plans' }, + ]; + + test('starts with GSD marker', () => { + const result = generateCodexConfigBlock(agents); + assert.ok(result.startsWith(GSD_CODEX_MARKER), 'starts with marker'); + }); + + test('includes feature flags', () => { + const result = generateCodexConfigBlock(agents); + assert.ok(result.includes('[features]'), 'has features table'); + assert.ok(result.includes('multi_agent = true'), 'has multi_agent'); + assert.ok(result.includes('default_mode_request_user_input = true'), 'has request_user_input'); + }); + + test('includes agents table with limits', () => { + const result = generateCodexConfigBlock(agents); + assert.ok(result.includes('[agents]'), 'has agents table'); + assert.ok(result.includes('max_threads = 4'), 'has max_threads'); + assert.ok(result.includes('max_depth = 2'), 'has max_depth'); + }); + + test('includes per-agent sections', () => { + const result = generateCodexConfigBlock(agents); + assert.ok(result.includes('[agents.gsd-executor]'), 'has executor section'); + assert.ok(result.includes('[agents.gsd-planner]'), 'has planner section'); + assert.ok(result.includes('config_file = "agents/gsd-executor.toml"'), 'has executor config_file'); + assert.ok(result.includes('"Executes plans"'), 'has executor description'); + }); +}); + +// ─── stripGsdFromCodexConfig ──────────────────────────────────────────────────── + +describe('stripGsdFromCodexConfig', () => { + test('returns null for GSD-only config', () => { + const content = `${GSD_CODEX_MARKER}\n[features]\nmulti_agent = true\n`; + const result = stripGsdFromCodexConfig(content); + assert.strictEqual(result, null, 'returns null when GSD-only'); + }); + + test('preserves user content before marker', () => { + const content = `[model]\nname = "o3"\n\n${GSD_CODEX_MARKER}\n[features]\nmulti_agent = true\n`; + const result = stripGsdFromCodexConfig(content); + assert.ok(result.includes('[model]'), 'preserves user section'); + assert.ok(result.includes('name = "o3"'), 'preserves user values'); + assert.ok(!result.includes('multi_agent'), 'removes GSD content'); + assert.ok(!result.includes(GSD_CODEX_MARKER), 'removes marker'); + }); + + test('strips injected feature keys without marker', () => { + const content = `[features]\nmulti_agent = true\ndefault_mode_request_user_input = true\nother_feature = false\n`; + const result = stripGsdFromCodexConfig(content); + assert.ok(!result.includes('multi_agent'), 'removes multi_agent'); + assert.ok(!result.includes('default_mode_request_user_input'), 'removes request_user_input'); + assert.ok(result.includes('other_feature = false'), 'preserves user features'); + }); + + test('removes empty [features] section', () => { + const content = `[features]\nmulti_agent = true\n[model]\nname = "o3"\n`; + const result = stripGsdFromCodexConfig(content); + assert.ok(!result.includes('[features]'), 'removes empty features section'); + assert.ok(result.includes('[model]'), 'preserves other sections'); + }); + + test('strips injected keys above marker on uninstall', () => { + // Case 3 install injects keys into [features] AND appends marker block + const content = `[model]\nname = "o3"\n\n[features]\nmulti_agent = true\ndefault_mode_request_user_input = true\nsome_custom_flag = true\n\n${GSD_CODEX_MARKER}\n[agents]\nmax_threads = 4\n`; + const result = stripGsdFromCodexConfig(content); + assert.ok(result.includes('[model]'), 'preserves user model section'); + assert.ok(result.includes('some_custom_flag = true'), 'preserves user feature'); + assert.ok(!result.includes('multi_agent'), 'strips injected multi_agent'); + assert.ok(!result.includes('default_mode_request_user_input'), 'strips injected request_user_input'); + assert.ok(!result.includes(GSD_CODEX_MARKER), 'strips marker'); + }); + + test('removes [agents.gsd-*] sections', () => { + const content = `[agents.gsd-executor]\ndescription = "test"\nconfig_file = "agents/gsd-executor.toml"\n\n[agents.custom-agent]\ndescription = "user agent"\n`; + const result = stripGsdFromCodexConfig(content); + assert.ok(!result.includes('[agents.gsd-executor]'), 'removes GSD agent section'); + assert.ok(result.includes('[agents.custom-agent]'), 'preserves user agent section'); + }); +}); + +// ─── mergeCodexConfig ─────────────────────────────────────────────────────────── + +describe('mergeCodexConfig', () => { + let tmpDir; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-codex-merge-')); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + const sampleBlock = generateCodexConfigBlock([ + { name: 'gsd-executor', description: 'Executes plans' }, + ]); + + test('case 1: creates new config.toml', () => { + const configPath = path.join(tmpDir, 'config.toml'); + mergeCodexConfig(configPath, sampleBlock); + + assert.ok(fs.existsSync(configPath), 'file created'); + const content = fs.readFileSync(configPath, 'utf8'); + assert.ok(content.includes(GSD_CODEX_MARKER), 'has marker'); + assert.ok(content.includes('multi_agent = true'), 'has feature flag'); + assert.ok(content.includes('[agents.gsd-executor]'), 'has agent'); + }); + + test('case 2: replaces existing GSD block', () => { + const configPath = path.join(tmpDir, 'config.toml'); + const userContent = '[model]\nname = "o3"\n'; + fs.writeFileSync(configPath, userContent + '\n' + sampleBlock + '\n'); + + // Re-merge with updated block + const newBlock = generateCodexConfigBlock([ + { name: 'gsd-executor', description: 'Updated description' }, + { name: 'gsd-planner', description: 'New agent' }, + ]); + mergeCodexConfig(configPath, newBlock); + + const content = fs.readFileSync(configPath, 'utf8'); + assert.ok(content.includes('[model]'), 'preserves user content'); + assert.ok(content.includes('Updated description'), 'has new description'); + assert.ok(content.includes('[agents.gsd-planner]'), 'has new agent'); + // Verify no duplicate markers + const markerCount = (content.match(new RegExp(GSD_CODEX_MARKER.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g')) || []).length; + assert.strictEqual(markerCount, 1, 'exactly one marker'); + }); + + test('case 3: appends to config without GSD marker', () => { + const configPath = path.join(tmpDir, 'config.toml'); + fs.writeFileSync(configPath, '[model]\nname = "o3"\n'); + + mergeCodexConfig(configPath, sampleBlock); + + const content = fs.readFileSync(configPath, 'utf8'); + assert.ok(content.includes('[model]'), 'preserves user content'); + assert.ok(content.includes(GSD_CODEX_MARKER), 'adds marker'); + assert.ok(content.includes('multi_agent = true'), 'has features'); + }); + + test('case 3 with existing [features]: injects keys', () => { + const configPath = path.join(tmpDir, 'config.toml'); + fs.writeFileSync(configPath, '[features]\nother_feature = true\n\n[model]\nname = "o3"\n'); + + mergeCodexConfig(configPath, sampleBlock); + + const content = fs.readFileSync(configPath, 'utf8'); + assert.ok(content.includes('other_feature = true'), 'preserves existing feature'); + assert.ok(content.includes('multi_agent = true'), 'injects multi_agent'); + assert.ok(content.includes('default_mode_request_user_input = true'), 'injects request_user_input'); + assert.ok(content.includes(GSD_CODEX_MARKER), 'adds marker for agents block'); + }); + + test('idempotent: re-merge produces same result', () => { + const configPath = path.join(tmpDir, 'config.toml'); + mergeCodexConfig(configPath, sampleBlock); + const first = fs.readFileSync(configPath, 'utf8'); + + mergeCodexConfig(configPath, sampleBlock); + const second = fs.readFileSync(configPath, 'utf8'); + + assert.strictEqual(first, second, 'idempotent merge'); + }); +}); + +// ─── Integration: installCodexConfig ──────────────────────────────────────────── + +describe('installCodexConfig (integration)', () => { + let tmpTarget; + const agentsSrc = path.join(__dirname, '..', 'agents'); + + beforeEach(() => { + tmpTarget = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-codex-install-')); + }); + + afterEach(() => { + fs.rmSync(tmpTarget, { recursive: true, force: true }); + }); + + // Only run if agents/ directory exists (not in CI without full checkout) + const hasAgents = fs.existsSync(agentsSrc); + + (hasAgents ? test : test.skip)('generates config.toml and agent .toml files', () => { + const { installCodexConfig } = require('../bin/install.js'); + const count = installCodexConfig(tmpTarget, agentsSrc); + + assert.ok(count >= 11, `installed ${count} agents (expected >= 11)`); + + // Verify config.toml + const configPath = path.join(tmpTarget, 'config.toml'); + assert.ok(fs.existsSync(configPath), 'config.toml exists'); + const config = fs.readFileSync(configPath, 'utf8'); + assert.ok(config.includes('multi_agent = true'), 'has multi_agent feature'); + assert.ok(config.includes('[agents.gsd-executor]'), 'has executor agent'); + + // Verify per-agent .toml files + const agentsDir = path.join(tmpTarget, 'agents'); + assert.ok(fs.existsSync(path.join(agentsDir, 'gsd-executor.toml')), 'executor .toml exists'); + assert.ok(fs.existsSync(path.join(agentsDir, 'gsd-plan-checker.toml')), 'plan-checker .toml exists'); + + const executorToml = fs.readFileSync(path.join(agentsDir, 'gsd-executor.toml'), 'utf8'); + assert.ok(executorToml.includes('sandbox_mode = "workspace-write"'), 'executor is workspace-write'); + assert.ok(executorToml.includes('developer_instructions'), 'has developer_instructions'); + + const checkerToml = fs.readFileSync(path.join(agentsDir, 'gsd-plan-checker.toml'), 'utf8'); + assert.ok(checkerToml.includes('sandbox_mode = "read-only"'), 'plan-checker is read-only'); + }); +});