From 23d45ff7ba092e218483f8709db09efdca6b8d47 Mon Sep 17 00:00:00 2001 From: Will Griffin Date: Sat, 23 May 2026 20:27:56 -0600 Subject: [PATCH] feat(have): use ContextForge for workflows --- README.md | 53 +- claude/.claude-plugin/marketplace.json | 4 +- claude/have/.claude-plugin/plugin.json | 4 +- claude/have/commands/review-cycle.md | 606 +----------------- claude/have/commands/ship.md | 302 +-------- codex/.agents/plugins/marketplace.json | 2 +- codex/plugins/have/.codex-plugin/plugin.json | 16 +- codex/plugins/have/commands/review-cycle.md | 565 +--------------- codex/plugins/have/commands/ship.md | 293 +-------- .../plugins/have/skills/review-cycle/SKILL.md | 19 + codex/plugins/have/skills/ship/SKILL.md | 19 + install.sh | 38 +- 12 files changed, 153 insertions(+), 1768 deletions(-) create mode 100644 codex/plugins/have/skills/review-cycle/SKILL.md create mode 100644 codex/plugins/have/skills/ship/SKILL.md diff --git a/README.md b/README.md index f91dc94..c0c2e55 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ HappyVertical shared cross-repo configuration for agent-assisted development. This repo is the umbrella for configuration that ≥2 HappyVertical projects -consume. Today that's slash commands for Claude Code and Codex. Other +consume. Today that's Claude Code command adapters and Codex workflow skills. Other shared config (MCP servers, agent hooks, lint/format/tsconfig bases) will land here as second consumers appear. @@ -11,7 +11,7 @@ land here as second consumers appear. **In scope:** -- Slash commands for Claude Code and Codex (`claude/`, `codex/`) +- Claude Code command adapters and Codex-visible workflow skills (`claude/`, `codex/`) - Shared lint / format / tsconfig configs as published npm packages (`packages/eslint-config`, `packages/prettier-config`, `packages/tsconfig-base`) @@ -51,16 +51,21 @@ have-config/ │ ├── .claude-plugin/ │ │ └── plugin.json │ └── commands/ -│ ├── ship.md # /have:ship -│ └── review-cycle.md # /have:review-cycle +│ ├── ship.md # /have:ship adapter +│ └── review-cycle.md # /have:review-cycle adapter ├── codex/ # Codex marketplace │ └── plugins/ │ └── have/ # the `have` plugin │ ├── .codex-plugin/ │ │ └── plugin.json -│ └── commands/ -│ ├── ship.md # /have:ship -│ └── review-cycle.md # /have:review-cycle +│ ├── commands/ +│ │ ├── ship.md # /have:ship adapter +│ │ └── review-cycle.md # /have:review-cycle adapter +│ └── skills/ # Codex adapters to ContextForge +│ ├── ship/ +│ │ └── SKILL.md # have:ship +│ └── review-cycle/ +│ └── SKILL.md # have:review-cycle └── packages/ # published npm configs ├── eslint-config/ # @happyvertical/eslint-config ├── prettier-config/ # @happyvertical/prettier-config @@ -105,7 +110,7 @@ See each package's README for usage details: Configs are versioned via Changesets and published on merge to `main`. -## Plugin install (slash commands) +## Plugin install (agent workflows) ```bash git clone https://github.com/happyvertical/have-config.git ~/Work/happyvertical/repos/have-config @@ -122,21 +127,45 @@ cd ~/Work/happyvertical/repos/have-config 4. Optionally symlinks the cached plugin install back to the live repo path for in-place editing (`./install.sh --live`). -After install, both agents have: +After install, Claude Code has: - `/have:ship` — end-to-end shipping pipeline - `/have:review-cycle` — multi-tool review/fix/retest loop +Codex has equivalent skills: + +- `have:ship` — end-to-end shipping pipeline +- `have:review-cycle` — multi-tool review/fix/retest loop + ## Editing -The repo is the source of truth. Edits to `claude/have/commands/*.md` or -`codex/plugins/have/commands/*.md` are picked up: +ContextForge is the source of truth for workflow bodies: + +| Workflow | Prompt | Resource | +|---|---|---| +| Ship | `have-ship` | `have://happyvertical/workflows/ship` | +| Review cycle | `have-review-cycle` | `have://happyvertical/workflows/review-cycle` | + +The resources live in the **Happy Vertical** team at +`context.happyvertical.com`. They store the workflow markdown as a base64 +payload because ContextForge's default content scanner rejects raw shell-heavy +workflow markdown. The prompt loaders decode that payload and follow it as the +authoritative workflow. + +Edits to workflow behavior should happen in ContextForge, not in this repo. +This lets prompt/workflow changes go live without redeploying or reinstalling +the agent plugins. + +Edits to the adapter files are picked up: - **Live mode** (`install.sh --live`): edits are immediately visible to running sessions via symlink. May need re-linking after `claude plugin update` rewrites the cache. - **Standard mode**: edits require `claude plugin update have@have-config` - (and the codex equivalent) to refresh the installed copy. + for Claude, or rerunning `./install.sh` to refresh the Codex plugin cache. + +The Claude command files and Codex `skills/` files must remain thin adapters. +Do not put canonical workflow text back into this repo. ## Companion tool diff --git a/claude/.claude-plugin/marketplace.json b/claude/.claude-plugin/marketplace.json index c3d2e0d..66281b7 100644 --- a/claude/.claude-plugin/marketplace.json +++ b/claude/.claude-plugin/marketplace.json @@ -1,7 +1,7 @@ { "$schema": "https://anthropic.com/claude-code/marketplace.schema.json", "name": "have-config", - "description": "HappyVertical shared agent configuration: slash commands, MCP servers, hooks, and other cross-repo configuration consumed by ≥2 projects.", + "description": "HappyVertical shared agent configuration: ContextForge-backed command adapters, MCP servers, hooks, and other cross-repo configuration consumed by ≥2 projects.", "owner": { "name": "Will Griffin", "email": "buddyrandom@gmail.com" @@ -9,7 +9,7 @@ "plugins": [ { "name": "have", - "description": "HappyVertical agent commands for Claude Code: /have:ship and /have:review-cycle.", + "description": "HappyVertical ContextForge-backed command adapters for Claude Code: /have:ship and /have:review-cycle.", "author": { "name": "Will Griffin", "email": "buddyrandom@gmail.com" diff --git a/claude/have/.claude-plugin/plugin.json b/claude/have/.claude-plugin/plugin.json index f092c9a..a06fe08 100644 --- a/claude/have/.claude-plugin/plugin.json +++ b/claude/have/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "have", - "description": "HappyVertical agent commands for Claude Code. Provides /have:ship (end-to-end shipping pipeline) and /have:review-cycle (multi-tool review loop) for the org's standard pre-PR and PR-creation workflow.", - "version": "0.1.0", + "description": "HappyVertical ContextForge-backed command adapters for Claude Code. Provides /have:ship and /have:review-cycle by loading the authoritative workflow prompts/resources from ContextForge.", + "version": "0.1.1", "author": { "name": "Will Griffin", "email": "buddyrandom@gmail.com", diff --git a/claude/have/commands/review-cycle.md b/claude/have/commands/review-cycle.md index 0f2e55c..0b9a3ca 100644 --- a/claude/have/commands/review-cycle.md +++ b/claude/have/commands/review-cycle.md @@ -1,606 +1,16 @@ --- -description: Run a repeatable review/fix/retest loop over current work, optionally across multiple repos, without opening PRs or shipping. +description: "Run the HappyVertical review-cycle workflow from ContextForge." --- -# /review-cycle +# /have:review-cycle -Run a bounded review cycle on the current work independent of shipping. Default to 3 rounds unless the user passes `rounds=N`. +This local command is only an adapter. The authoritative workflow lives in ContextForge. -The parent agent running this command is **Claude Code**. The command orchestrates a **4-reviewer ensemble**: three independent reviewer subprocesses — codex-cli, a separate claude-cli invocation, and GitHub copilot-cli — plus the orchestrator's own explicit checklist pass against the same commit. Different models have different blind spots; the ensemble catches more than any single tool. +Load and follow the Happy Vertical Team prompt: -The claude-cli reviewer can be invoked two ways: -- **Preferred**: `claude -p ""` as a subprocess (when OAuth from the parent session works — requires a long-lived token via `claude setup-token`, or `ANTHROPIC_API_KEY` set). -- **Fallback when OAuth fails**: a fresh claude sub-agent via the parent's Agent tool, with the same review prompt. Same independence property (the sub-agent has no context from the parent conversation) and no OAuth gymnastics. Use the `general-purpose` sub-agent type with the pr-review prompt. +- Prompt: `have-review-cycle` +- Resource: `have://happyvertical/workflows/review-cycle` -The orchestrator's own pass is NOT silent-solo — it must be an explicit checklist run against the staged/committed diff, with findings written out in the same JSON shape the subprocesses produce. "I looked, it's fine" is not a review; an enumerated set of P0/P1/P2/P3 findings (including "no findings") is. +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the review-cycle workflow. -## Hard Rules - -- Respect the global worktree isolation policy before making edits. If the current checkout is a primary checkout such as `/Users/will/Work/.../repos/...`, move the work to a dedicated worktree and branch before editing, preferably under `/Users/will/.claude/worktrees/` with a `claude/` branch prefix. -- Do not mix this session's edits with unrelated dirty files. Preserve user changes, and ask only when the current work cannot be separated safely. -- Do not use destructive cleanup commands such as `git reset --hard`, `git checkout --`, or `git clean` unless the user explicitly asks for that exact destructive action. -- Do not use `claude ultrareview` or any `ultrareview` variant for the claude-cli reviewer subprocess. Use the normal claude-cli in non-interactive print mode (`claude -p`) instead. -- Every external review command must be allowed at least 15 minutes. The Claude Bash tool caps a single foreground command at 10 minutes (600000 ms), so for review subprocess invocations: either run them in the background (`run_in_background: true`) and poll with `BashOutput`, or split into shorter chunks. Do not silently truncate a review by hitting the timeout. -- Treat review output as evidence to verify, not as orders. Fix valid findings. For false positives, record the rationale in the final report. -- Keep going until the work is clean or the configured review-round cap is reached. -- If the work spans multiple repositories, review them as an ordered dependency graph. Review upstream repos first, then downstream consumers against the exact upstream commits or branches they depend on. -- This command does not open PRs, push branches, or watch CI unless the user explicitly asks for that during the review-cycle run. - -## Arguments - -- `rounds=N`: maximum review/fix rounds. Default `3`. -- `base=`: override the comparison base branch. -- `repos=`: explicit list of repositories in the review set. -- `no-fix`: review only; do not edit files. -- `no-baseline`: skip baseline validation before the first review round. - -## Preflight - -1. Confirm this is a git repository. If `repos=` was provided, confirm every listed path is a git repository. -2. Discover the review set: - - current repository - - repositories explicitly named by the user or `repos=` - - dirty related worktrees under `/Users/will/.claude/worktrees/` (also check `/Users/will/.codex/worktrees/` and `/Users/will/Work/_trees/` for cross-agent work in progress) - - local path, workspace, git, or package dependencies referenced by changed files - - git submodules or nested repositories touched by the diff - - sibling repositories that are clearly referenced by changed docs, package manifests, lockfiles, workflow files, deployment manifests, or local integration config -3. Exclude unrelated dirty repositories. If multiple dirty repos are present but the dependency relationship is not clear, report them and ask before including them. -4. For each included repository, capture: - - repo root - - current branch - - worktree path - - remote URL - - default/base branch - - staged, unstaged, and untracked files -5. Confirm every included checkout is safe for edits under the worktree isolation policy. State the selected worktree path and branch for each repository before editing. -6. Check required tools before relying on them: - - `pr-review` (install: `git clone https://github.com/happyvertical/pr-review.git ~/pr-review && export PATH="$HOME/pr-review/bin:$PATH"`) - - `codex` - - `claude` - - `gh copilot` - - `gh` when copilot-cli is reached through `gh copilot` -7. Read repository instructions and review context in every included repository: - - nearest `CLAUDE.md` - - nearest `AGENTS.md` if present - - `README*` - - package/build config files - - relevant test/lint/build docs - - `.github/workflows/*` when CI risk is part of the change -8. Look for `## Shipping Notes` in repo-level `CLAUDE.md` (or `AGENTS.md`). If present, use it for repo-specific validation commands, docs expectations, review expectations, and known flaky checks. - -## Multi-Repo Ordering - -When more than one repository is involved, build a dependency graph before reviewing. - -Use directed edges as `upstream -> downstream`, meaning the downstream repo depends on code, packages, APIs, manifests, images, generated artifacts, or documentation from the upstream repo. - -Evidence for ordering includes: - -- package manifests and lockfiles -- workspace files -- local `file:` or path dependencies -- Git submodules -- generated SDK/client references -- service API contracts -- deployment manifests and image references -- CI workflows that consume artifacts from another repo -- docs or release notes that explicitly describe dependency order -- `Shipping Notes` - -If the graph is ambiguous: - -- state the uncertainty -- choose the conservative order only when evidence is strong enough -- otherwise ask before proceeding - -Review in topological order: - -1. Validate and review each upstream repository first. -2. Fix upstream findings before relying on those changes downstream. -3. Record the exact upstream commit, branch, package version, image tag/digest, or artifact that downstream validation uses. -4. Update downstream repositories to reference the upstream result when that is part of the intended work. -5. Run downstream validation and review after upstream is clean. -6. If a downstream review finds a real upstream issue, go back to the upstream repo, fix it, rerun upstream validation/review, then rerun affected downstream validation/review. Count this as the same overall review round when practical. - -## Baseline Validation - -Unless `no-baseline` was passed, run baseline validation before the first review round for each repository in dependency order: - -1. Inspect the current diff and classify the change: - - behavior/API - - UI/UX - - data/schema/migrations - - infrastructure/CI - - docs-only - - tests-only -2. Run the repo's normal formatting/linting path when configured. Prefer commands documented in `Shipping Notes`, `CLAUDE.md`, package scripts, Makefiles, taskfiles, or README. -3. Run the smallest meaningful tests first, then broader suites appropriate to the blast radius. -4. Run typecheck/build commands when the stack has them. -5. Check documentation obligations: - - user-facing docs for behavior, API, config, workflow, CLI, or UI changes - - changelog/release notes when the repo uses them - - examples or generated docs when they are part of the changed surface -6. If validation fails, fix the failure unless `no-fix` was passed, then rerun the relevant command. - -## Review Commands - -Create a temporary review directory outside the repo for review outputs. Do not commit raw review logs unless the repo explicitly asks for them. - -For multi-repo work, create one review subdirectory per repository and include the dependency context in each review prompt. Upstream review prompts should ask whether downstream compatibility is preserved. Downstream review prompts should name the upstream commits, branches, versions, image tags, or artifacts being consumed. - -### Generate the review prompt with `pr-review` - -Use the `pr-review` tool (https://github.com/happyvertical/pr-review) to generate the review prompt rather than a generic one-line brief. `pr-review` bundles a calibrated 10-theme checklist (refactor regressions, tenant isolation, effect races, silent error swallowing, hardcoded paths, doc/code drift, infra duplication, dead config, concurrency, SQL correctness) plus any repository-specific patterns in `/.pr-review/extensions.md`. All three reviewers get the same calibrated prompt so their findings are directly comparable. - -If `pr-review` is not on `$PATH`: - -```bash -git clone https://github.com/happyvertical/pr-review.git ~/pr-review -export PATH="$HOME/pr-review/bin:$PATH" -``` - -If the repository being reviewed has no `.pr-review/extensions.md`, the shared core checklist still applies — the prompt just doesn't include repo-specific guidance. That's a signal to consider adding one after the review-cycle run. - -### Run codex-cli review - -`codex review` fetches its own diff, so pass `--no-diff` to `pr-review` to avoid sending the diff twice: - -- If the branch has committed changes against the base branch: - ```bash - pr-review --base --no-diff | codex review --base - - ``` -- If there are staged, unstaged, or untracked changes, also run: - ```bash - pr-review --base --no-diff | codex review --uncommitted - - ``` -- Do not use `claude ultrareview` or any `ultrareview` variant for any reviewer here. - -### Run claude-cli review (subprocess preferred, sub-agent fallback) - -The parent agent is already Claude Code — this step invokes a *separate* claude reviewer so the review pass is independent of the orchestrating session. Don't try to satisfy this step by reasoning inline as the orchestrator; that's the 4th slot (orchestrator self-review), not the claude reviewer slot. - -**Preferred: `claude -p` subprocess.** claude-cli (the subprocess) does not fetch its own diff — pipe `pr-review` output without `--no-diff`: - -```bash -pr-review --base | claude -p --permission-mode plan -``` - -- Use `claude -p` in non-interactive print mode. -- Prefer read-only/plan permissions for the review run (`--permission-mode plan`). -- Disallow edit/write tools where supported. -- Requires `claude -p` to authenticate. From inside a Claude Code parent session, the parent's OAuth typically doesn't propagate to the child — set up a long-lived token via `claude setup-token` (one-time, run in an interactive terminal where browser flow works) and export `CLAUDE_CODE_OAUTH_TOKEN`, OR set `ANTHROPIC_API_KEY` to a key from console.anthropic.com. - -**Fallback when subprocess auth fails: sub-agent via Agent tool.** When `claude -p` returns `Failed to authenticate. API Error: 401` and no long-lived token / API key is available, spawn a fresh Claude sub-agent via the parent's Agent tool. The sub-agent gets the same review prompt, runs with no context from the parent conversation (same independence as the subprocess), and produces findings in the same JSON shape — no OAuth gymnastics. - -Concrete shape (illustrative — invoke via Claude Code's actual -Agent tool with these parameter values, not as a literal -JavaScript call): - -```text -Agent tool invocation: - subagent_type: "general-purpose" - description: "PR #N round M claude reviewer" - run_in_background: true - prompt: -``` - -Note: the sub-agent is the same model family as the orchestrator (both are Claude), so its blind-spot overlap with the orchestrator's own self-review (4th slot) is high. The independence guarantee it provides is "no shared conversation context"; it does NOT provide "different model family" independence the way the codex-cli or copilot-cli subprocesses do. Treat the sub-agent fallback as a slot-fill of last resort, not equivalence with the subprocess. - -### Run copilot-cli review - -**This step is non-optional for the "catch before push" intent.** The -Copilot PR review *bot* only fires after a PR is opened — too late to -prevent the round-trip the review-cycle exists to compress. The copilot-cli runs locally pre-push and gives you copilot-cli's blind-spot -coverage before the bot has a chance to comment. - -copilot-cli expects the prompt to carry its own context. **The -invocation must enforce read-only at the permission layer — prompt -instructions are advisory, tool permissions are enforcement.** If -copilot-cli can use write/edit-capable tools, a "review" pass can mutate -the working tree mid-round, breaking the same-commit guarantee the -loop relies on. - -`--allow-all-tools` is *not* read-only — it grants write/edit -capability and would let the model mutate the working tree mid-review. -Don't use it. But `--available-tools shell,read` alone *also doesn't -work* in non-interactive mode — it only filters which tools the model -can *see*, not which it can run without approval. In `-p` mode there's -no place to ask for permission, so tool calls get denied with -`Permission denied and could not request permission from user`. The -review then runs with no repository context. - -The correct shape is **explicit per-command `--allow-tool` flags** for -the specific read-only commands a review needs. Verify against -`gh copilot -- --help` and `gh copilot -- help permissions` for the -syntax your CLI version supports; example for current copilot-cli: - -```bash -REPO_ROOT="$(git rev-parse --show-toplevel)" -gh copilot -- -p "$(pr-review --base --pretty)" \ - -C "$REPO_ROOT" \ - --add-dir "$REPO_ROOT" \ - --disallow-temp-dir \ - --allow-tool 'shell(git diff)' \ - --allow-tool 'shell(git log)' \ - --allow-tool 'shell(git show)' \ - --allow-tool 'shell(git status)' \ - --allow-tool 'shell(git rev-parse)' \ - --allow-tool 'shell(rg)' \ - --allow-tool 'shell(cat)' \ - --allow-tool 'shell(head)' \ - --effort xhigh -``` - -The `-C` / `--add-dir` / `--disallow-temp-dir` trio is **scope -hygiene**, not a security boundary: they keep the reviewer focused -on the repo and prevent it from wandering into unrelated files in -your `/tmp` or wherever else the shell was invoked from. That -reduces noise in findings — not a vulnerability fix. The reviewer -is running locally with your credentials and can already see -anything you can see; that's how local CLI tools work. - -**When stricter sandboxing actually matters** (and the above flags -are insufficient — you need a sanitized temp checkout): -- Reviewing PRs from untrusted contributors (OSS maintainership) - where the diff could contain prompt-injection asking the model to - read your `.env` and quote it into findings the contributor sees -- CI environments where the reviewer runs unattended and findings - get auto-posted to public PR comments -- Workspaces with secrets in untracked files that you don't want - surfaced even in your own review output - -For the normal HappyVertical case — engineer reviewing their own -org's PR pre-push, findings going to their own terminal — none of -that applies. The flags above are enough. - -Add `--deny-tool` for any specific commands you want hard-blocked. -The per-command `--allow-tool` allowlist is **mostly** read-only — -but it is NOT a hard write-prevention boundary, because copilot-cli -matches at first-level subcommand granularity. `--allow-tool -'shell(git diff)'` approves any `git diff …` invocation including -write-capable forms like `git diff --output=path` which can dirty -the working tree. Similarly, `shell(rg)` permits redirection-style -flags depending on shell escaping. The prompt's "don't modify -files" instruction is defense-in-depth, but the structural -guarantee for "the reviewer ran against the same commit" is a -**pre/post tree-snapshot comparison**. - -For reviews of **committed work** (the common case): before each -reviewer, the tree is clean; after, run `git status --porcelain` — -any output means the reviewer modified the tree, the same-commit -guarantee is broken, the round is invalid. Restart from the clean -commit (or run reviewers in a disposable worktree). - -For reviews of **uncommitted/dirty work** (e.g. mid-edit review, -`codex review --uncommitted` flows): the simple "is status empty" -check fails because the tree was already dirty. Use **snapshot -comparison** — it's the only approach with no footguns: - -```bash -# BEFORE each reviewer runs -SNAPSHOT_DIR=$(mktemp -d) -git status --porcelain > "$SNAPSHOT_DIR/pre-status.txt" -git diff > "$SNAPSHOT_DIR/pre-unstaged.diff" -git diff --cached > "$SNAPSHOT_DIR/pre-staged.diff" -# Capture untracked file contents (hash + path) so we can detect -# if a reviewer added/modified untracked files. `git status` would -# catch added/removed but not same-name-different-content edits. -# NOTE: filenames are treated as DATA, never substituted into -# shell source. `xargs -I{} sh -c '...{}...'` looks convenient but -# is command-injectable — a file named `evil$(rm -rf ~).txt` -# would execute the substitution. Use a null-delimited read loop -# so each filename arrives as a bash variable (still data). -git ls-files --others --exclude-standard -z \ - | while IFS= read -r -d '' f; do - printf '%s %s\n' "$(git hash-object -- "$f")" "$f" - done \ - | sort > "$SNAPSHOT_DIR/pre-untracked.txt" - -# ... run the reviewer ... - -# AFTER — capture same shape, diff against pre-state -git status --porcelain > "$SNAPSHOT_DIR/post-status.txt" -git diff > "$SNAPSHOT_DIR/post-unstaged.diff" -git diff --cached > "$SNAPSHOT_DIR/post-staged.diff" -git ls-files --others --exclude-standard -z \ - | while IFS= read -r -d '' f; do - printf '%s %s\n' "$(git hash-object -- "$f")" "$f" - done \ - | sort > "$SNAPSHOT_DIR/post-untracked.txt" - -if ! diff -q "$SNAPSHOT_DIR/pre-status.txt" "$SNAPSHOT_DIR/post-status.txt" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-unstaged.diff" "$SNAPSHOT_DIR/post-unstaged.diff" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-staged.diff" "$SNAPSHOT_DIR/post-staged.diff" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-untracked.txt" "$SNAPSHOT_DIR/post-untracked.txt" >/dev/null; then - echo "Reviewer mutated tree state — round invalid. See $SNAPSHOT_DIR for diffs." - exit 1 -fi -rm -rf "$SNAPSHOT_DIR" -``` - -This catches: -- Added/removed tracked files (status diff) -- Modified tracked files even if status shape unchanged - (e.g. `M path → M path` with different bytes — unstaged.diff - catches this; status alone wouldn't) -- Staged/index mutations (staged.diff) -- Added/removed untracked files OR same-name-different-content - (untracked hash list) - -Preserves your staging discipline exactly, doesn't touch any -commits, no WIP dance, no `git reset` calls (so Hard Rules don't -need a carve-out). - -Why not WIP-commit-then-undo (which an earlier draft of this doc -suggested): that approach accumulated multiple footguns across -rounds — `git add -A` would leak unrelated untracked files into -`pr-review`'s diff; `git stash` would actually remove dirty changes -from the worktree and review the wrong state; `git reset --mixed -HEAD~1` would silently drop the wrong commit if HEAD moved; the -patch-capture-and-restore step couldn't recreate untracked files -deleted by `reset --hard`; `wip:` isn't a valid Conventional Commit -type and would be rejected by commitlint in any repo using these -configs (which is the whole org). The snapshot approach sidesteps -every one of these. If you find yourself reaching for WIP commits -to make this work, you're solving the wrong problem. - -Either way, never just "is `git status` clean now" as the -post-check — that only works when "clean" was the baseline. - -- Use `--pretty` so copilot-cli receives the prompt as readable markdown - rather than the JSON-instruction format. -- Pass `--` after `gh copilot` to forward flags to the underlying - `copilot` binary; otherwise `gh` may interpret them. -- `--effort xhigh` matches codex-cli's reasoning depth; tune down if the - diff is small and you want faster runs. -- The prompt itself also instructs not to modify files. That's - defense-in-depth, not the primary enforcement — the permission - flags do the actual blocking. - -**Known blockers and fallbacks** (real failures we've seen): - -- **`Access denied by policy settings`** — the org's Copilot policy - is disabling CLI use. Fix at https://github.com/settings/copilot - (personal) and/or your org's Copilot policies page (admin). Until - enabled, copilot-cli cannot run pre-push. -- **`Failed to authenticate. API Error: 401`** on `claude -p` — happens - when this command is invoked from inside an active Claude Code - session; OAuth credentials don't propagate to spawned children. - Workaround: set `ANTHROPIC_API_KEY` env var on the child invocation, - or run review-cycle from a terminal / CI / codex-cli session instead. - -**When a reviewer slot can't be filled**: proceed with the others -*and* record in the final report which slot was skipped and why. -**Status MUST drop to `partial` when any required reviewer slot -is unfilled.** The four required slots by default are: -- codex-cli (subprocess) -- the claude reviewer slot — filled by either `claude -p` subprocess - OR Agent-tool sub-agent fallback when OAuth fails. The slot is - "filled" if EITHER succeeds; only "skipped" if BOTH fail. -- copilot-cli (subprocess) -- the orchestrator's checklist pass (the 4th slot, fills itself). - -Never silently drop a slot. Never report `clean` with a skipped -required slot — `/ship` gates on `Status: clean`, and a soft skip -would let unreviewed code merge. - -If copilot-cli is the unavailable one specifically, record this in -the final report's `Skipped reviewers` field with reason. Downstream -(`/ship`, or the human invoking review-cycle directly) reads the -report and decides whether to open the PR as a **draft** so the -Copilot bot can review before merge candidacy. `/review-cycle` -itself never opens or pushes PRs — that's `/ship`'s job — so this -fallback is something the report enables, not something review-cycle -executes. - -### For all three external reviewer slots - -These rules apply to the three subprocess reviewers (codex-cli, -copilot-cli, and the claude `claude -p` subprocess). When the -claude slot is filled via the Agent-tool sub-agent fallback -instead, the sub-agent runs in the parent process (no subprocess, -no stdout/stderr files to capture). The 15-minute-timeout and -stdout/stderr-capture rules don't apply to that path; the -sub-agent's findings come back as the agent result. - -- Use a review command timeout of at least 15 minutes. Since the Bash tool caps a single foreground call at 10 minutes, run reviewers in the background (`run_in_background: true`) and poll completion with `BashOutput`, or split into multiple shorter calls. -- Capture stdout and stderr to separate files in the temp review directory — malformed or empty findings almost always have the cause in stderr. -- Treat each tool's findings as evidence to verify against the code, not as orders to apply. Vague claims get dismissed; concrete file:line citations with named failure paths get acted on. - -### Orchestrator self-review (the 4th reviewer slot) - -The orchestrator (the parent Claude Code session running this command) must also perform an explicit checklist pass against the same commit each round. This is NOT silent-solo — it must produce written findings in the same JSON shape the subprocesses do, including "no findings" when nothing surfaces. - -**Read this carefully — the 4th slot has structural confirmation bias the other three don't have:** -- The orchestrator is the same model family as the claude-cli reviewer (both Claude). Its blind-spot coverage overlaps with claude-cli's, not with codex-cli's or copilot-cli's. -- The orchestrator authored (or at least drove) the fix being reviewed. It has full context of intent — what the fix was supposed to do, why each decision was made. That context is helpful for *understanding* the code but is exactly the cognitive bias that makes "did I miss anything?" the wrong question to ask yourself. -- A clean orchestrator pass therefore carries less independent epistemic weight than a clean codex-cli or copilot-cli pass. Consumers of the Reviews report should NOT treat 4/4 clean as equivalent to 3/3 clean + a fresh perspective. - -The orchestrator slot's role is **explicit checklist accountability** — forcing the orchestrator to run through the same questions and write down the answer — not independent blind-spot coverage. Keep it in the loop precisely because the discipline of running the checklist surfaces things the orchestrator's "I looked, it's fine" intuition skips, not because Claude-reviewing-its-own-work is a strong signal. - -- Run the orchestrator pass in parallel with the subprocesses (while they run in the background, the orchestrator reads the diff against the checklist). -- Use the same pr-review checklist + extensions the subprocesses use. -- Output the same JSON shape: `{summary, findings: [{severity, category, file, line, title, body, confidence}], skipped: []}`. -- Include the orchestrator findings in the round's dedup step alongside subprocess findings, but weigh them with the bias caveat above — a finding the orchestrator surfaces that no other reviewer caught is real; a "no findings" pass from the orchestrator alone (without subprocess corroboration) is weak. -- If the orchestrator has nothing to add ("no findings"), record that explicitly — the absence of explicit findings is silent-solo; an explicit "{findings: []}" entry is participation. - -After all FOUR reviewer slots produce findings (three subprocesses + orchestrator), merge into one checklist grouped by severity (see "Review/Fix Loop" below). Prefer findings flagged by ≥2 reviewers when severity is medium or low; high-severity findings from a single reviewer still warrant verification. Findings flagged ONLY by the orchestrator's self-review get extra scrutiny on whether they're real (the confirmation bias works both ways — orchestrator can over-flag things it knows are intentional too). - -### Optional: capture for calibration - -If the repository has a `.pr-review/extensions.md`, also append `| pr-review-capture` to one of the runs (typically the claude-cli subprocess or codex-cli) so the findings are stored at `.pr-review/history/.json`. Later, `pr-review-tune --last 10` can compare stored findings against the review comments PRs actually received and propose refinements to the checklist. This closes the feedback loop so the checklist gets sharper over time. - -```bash -pr-review --base | claude -p --permission-mode plan | pr-review-capture | tee /dev/tty -``` - -## Review/Fix Loop - -Run up to `rounds` review rounds. The argument default is `3` -regardless of change type (set at the `rounds=N` arg above). For -documentation / reviewer-checklist content, consider passing -`rounds=5..10` because each round catches progressively narrower -factual edge cases — there's no auto-detection that bumps the cap -for doc work. - -**Hard rules for the loop** (these prevent the "stopped too early" -*and* "looped too long on trivia" failure modes): - -- **Each round runs all reviewers in parallel against the SAME commit** - — not sequentially against each other's fixes. Sequential cascading - makes findings depend on which reviewer ran first and obscures - whether reviewers actually agree on the latest state. - - *Carve-out for the claude sub-agent fallback*: the sub-agent is - spawned only after `claude -p` returns auth failure (typically - within ~1s). Launching both concurrently as a "defensive precaution" - doubles Anthropic API spend when the subprocess succeeds. Launch - the subprocess first, wait briefly for the 401, then spawn the - sub-agent only if auth failed. This still counts as "parallel" for - the same-commit guarantee — the failed subprocess made no observable - tree change, and the sub-agent runs concurrently with the codex-cli - and copilot-cli subprocesses from its launch onward. -- **A fix-round on substantive (P0-P2) findings is never the final - round.** If you just pushed a fix for a real bug, you MUST run - another round to confirm it didn't introduce a new one. -- **The loop exits when no P0/P1/P2 findings remain — not when - every reviewer returns zero findings.** P3 / nit-level findings - (polish, narrow factual edges, cosmetic placement) get triaged - three ways (fix inline if cheap, record in the final report if - worth tracking, file as follow-up if bigger) but never extend the - loop. Running another full ensemble round just to verify a - one-line wording tweak is technical perfectionism that burns - reviewer cycles without changing what ships. -- **Convergence is per-commit for behaviour-changing fixes** (P0/P1/P2 - and any non-fix code changes). Reviewer A returning clean against - commit X doesn't mean clean against commit Y when Y changes - behaviour — re-run all reviewers. **P3-only commits do not reset - convergence**: if the only change since the last clean verify - round is a P3 wording tweak, you don't need another full ensemble - pass. -- **One reviewer returning clean is NOT convergence — the whole - ensemble must return clean.** A reviewer that didn't run can't - have caught the bug another reviewer would have. Two failure - modes to guard against: - - *Silent solo*: only running one reviewer per round (e.g. - "codex-cli is fast and reliable, I'll skip the others") and - declaring convergence when it returns 0. The whole point of - the ensemble is non-overlapping blind spots. A real example: - if you solo a single reviewer for ~12 rounds and then add a - second reviewer for round 13, expect that second reviewer to - immediately surface findings the first kept missing. - - *Unavailable ≠ clean*: if a reviewer errored (auth, policy, - network, env), that's a missing signal — not a clean signal. - Record the unavailability explicitly in the final report. - Either resolve the blocker and retry, or accept the - reduced-coverage tradeoff with rationale, but do not count - the absence as agreement. - -For each round, process repositories in dependency order: - -1. Run validation before review if files changed since the previous validation pass. -2. Run all four reviewer slots for each repository in dependency order: codex-cli, the claude slot (subprocess `claude -p` OR sub-agent via Agent tool when OAuth fails), copilot-cli, and the orchestrator's own checklist pass. Run the three subprocesses in parallel in the background; the orchestrator's pass runs concurrently while waiting on subprocess completion. All four must produce explicit findings (including "no findings") before dedup. -3. Merge findings into a single checklist by severity: - - `P0/P1`: correctness, data loss, security, broken build, failing tests. **Always block. Always loop.** - - `P2`: likely bug, missing test, missing docs for changed behavior. **Block by default; loop unless explicitly accepted with rationale in the final report (which `/ship` then copies into the PR body when creating the PR).** - - `P3`: maintainability or polish with clear benefit; narrow factual edges affecting tiny version windows or rare paths. **Never block. Never extend the loop just to verify a P3 fix.** For each P3 finding, pick one based on cost vs. value: - - **Cheap to fix → fix inline in the same commit/PR.** No verify round needed; group with other fixes if any. (Most P3 wording/clarity tweaks fall here.) - - **Worth tracking but not blocking → record in the final report** as accepted non-blocker with brief rationale. If a PR already exists, also copy into the PR body; otherwise `/ship` propagates the report into the PR body at PR creation time. - - **Bigger than this PR's scope → file as follow-up issue**, link from the final report (and PR body, when one exists). -4. Verify each finding against the code. Do not blindly patch speculative review comments. -5. If `no-fix` was passed, stop after reporting findings. -6. Address all valid P0/P1 findings (mandatory) and all valid P2 findings (mandatory unless explicitly accepted in the final report with a one-line rationale) in priority order. -7. Add or adjust tests for bug fixes and behavior changes. -8. Rerun relevant validation after edits. -9. If upstream fixes change the contract consumed downstream, rerun affected downstream validation and review even if that downstream repo had already passed in the current round. -10. **If a P0/P1/P2 fix was pushed in this round, the next round MUST run** to verify the fix didn't break something. Do not stop on a P0/P1/P2 fix-round. -11. Stop the loop as `clean` only when **ALL THREE** conditions - hold across the graph: - - a verify round returns no *unaccepted* P0/P1/P2 findings - from any reviewer in any included repo, - - validation is green across the graph, AND - - every required reviewer actually ran in the verify round - (any skipped/unavailable reviewer → status is `partial`, - not `clean`, per the Status contract below). - - Don't conflate "no findings surfaced" with "clean" — a - reviewer that didn't run produced no findings because it - didn't run, not because none exist. - - Reviewers may continue surfacing an accepted P2 in subsequent - rounds (they have no way to know it was accepted); the - acceptance lives in the final report, and the stop condition - discounts it. P3/nit findings at exit time get recorded in the - final report, not fixed in this PR (consumers like `/ship` - are responsible for surfacing them in the PR body when the PR - exists). - -If the loop hits the round cap: - -- stop and summarize unresolved findings -- distinguish true blockers from false positives and accepted non-blockers -- if findings are still surfacing at the cap, that's a signal — either - the spec is over-detailed (consider simplifying), the reviewer set - is producing diminishing returns (acceptable to ship with a recorded - follow-up), or there's a genuine gap (don't ship; raise the cap or - reassess) -- **special case: a P0/P1/P2 fix landed in the final permitted round** - — Rule 10 requires the next round MUST run to verify, but the cap - forbids it. Report status as `blocked` with reason - `verify-round-blocked-by-cap`. The fix may be correct but no - verify round confirmed it; per the Status contract, an unverified - P0/P1/P2 fix counts as "unaccepted P0/P1/P2 remaining" — its - effect on the codebase is unobserved. Note in the final report - that the cap blocked verification and recommend re-running with - `rounds=N+1` (or higher) so the verify round can complete. Don't - report `clean` just because the post-fix tree has no surfaced - findings — those findings were never sought. -- do not push or open PRs from this command unless the user explicitly asks - -## Final Report - -Return a concise review-cycle report: - -```text -## Review Cycle Result -- Status: clean | partial | blocked | findings-only - (clean = no P0/P1 + all P2 fixed-or-accepted + ALL required reviewers ran - + validation green; - partial = otherwise-clean but at least one required reviewer was - skipped. Single cause only — other "incomplete" states - (unverified fix, validation failed) are `blocked`, not - `partial`; - blocked = unaccepted P0/P1/P2 remaining (whether before or at the - round cap), validation failed, OR a P0/P1/P2 fix landed - in the final permitted round with no verify round - possible (an unverified fix counts as potentially - unaccepted — the operator should re-run with a raised - `rounds=N+1` to let the verify round complete). A - round-cap exit with ONLY P3/nit findings remaining is - NOT blocked — those findings go in the accepted - non-blockers field and Status stays clean (or partial - if a required reviewer was skipped, or blocked if - validation failed — the carve-out only suppresses the - "P3-only at cap → blocked" path; other blocked causes - still apply). Without this carve-out, the round-cap - definition would re-block on the exact trivia loop - these rules are designed to exit; - findings-only = `no-fix` was passed) -- Repos: -- Worktrees: -- Branches: -- Validation: -- Reviews: -- Docs: -- Dependency order: downstream edges or none> -- Remaining blockers (P0/P1, or unaccepted P2): -- Accepted P2 (with rationale): -- Accepted non-blockers (P3/nit): -- Skipped reviewers: -``` +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/claude/have/commands/ship.md b/claude/have/commands/ship.md index 496e4ca..6f3c13c 100644 --- a/claude/have/commands/ship.md +++ b/claude/have/commands/ship.md @@ -1,302 +1,16 @@ --- -description: "Prepare current work for shipping: validate, update docs, run /review-cycle, open a ready PR, and watch CI to green." +description: "Run the HappyVertical ship workflow from ContextForge." --- -# /ship +# /have:ship -Ship the current repository work end to end. Use `/review-cycle` for the repeating review/fix/retest loop before opening PRs. +This local command is only an adapter. The authoritative workflow lives in ContextForge. -The parent agent running this command is **Claude Code**. CI watching, PR creation, and the review-cycle subprocess invocations all happen through Bash tool calls — long-running steps (CI watch, review subprocesses) should run in the background with `run_in_background: true` and be polled with `BashOutput` rather than tying up a foreground call against the 10-minute Bash cap. +Load and follow the Happy Vertical Team prompt: -## Hard Rules +- Prompt: `have-ship` +- Resource: `have://happyvertical/workflows/ship` -- Respect the global worktree isolation policy before making edits. If the current checkout is a primary checkout such as `/Users/will/Work/.../repos/...`, move the work to a dedicated worktree and branch before editing, preferably under `/Users/will/.claude/worktrees/` with a `claude/` branch prefix. -- Do not mix this session's edits with unrelated dirty files. Preserve user changes, and ask only when the current work cannot be separated safely. -- Do not use destructive cleanup commands such as `git reset --hard`, `git checkout --`, or `git clean` unless the user explicitly asks for that exact destructive action. -- Run `/review-cycle` before opening PRs. Inherit its review rules, including no `ultrareview`, at least 15-minute review timeouts (via background Bash + `BashOutput`), and up to the configured round cap. -- CI failures after PR creation count as new work and must be fixed, locally revalidated, and passed back through `/review-cycle` when the fix changes code, tests, docs, config, or behavior materially. -- If the current work spans multiple repositories, ship them as an ordered dependency graph. Validate and run `/review-cycle` on upstream repos first, then downstream consumers against the exact upstream commits or PR branches they depend on. +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the ship workflow. -## Arguments - -- `rounds=N`: maximum `/review-cycle` rounds. Default `3`. -- `base=`: override the comparison base branch. -- `repos=`: explicit list of repositories in the shipping set. -- `draft`: explicitly create draft PRs. Without this argument, open PRs ready for review when validation and `/review-cycle` are clean. - -## Preflight - -1. Confirm this is a git repository. If `repos=` was provided, confirm every listed path is a git repository. -2. Discover the full shipping set before validating anything: - - current repository - - repositories explicitly named by the user or `repos=` - - dirty related worktrees under `/Users/will/.claude/worktrees/` (also check `/Users/will/.codex/worktrees/` and `/Users/will/Work/_trees/` for cross-agent work in progress) - - local path, workspace, git, or package dependencies referenced by changed files - - git submodules or nested repositories touched by the diff - - sibling repositories that are clearly referenced by changed docs, package manifests, lockfiles, workflow files, deployment manifests, or local integration config -3. Exclude unrelated dirty repositories. If multiple dirty repos are present but the dependency relationship is not clear, report them and ask before including them. -4. For each included repository, capture: - - repo root - - current branch - - worktree path - - remote URL - - default/base branch - - staged, unstaged, and untracked files -5. Confirm every included checkout is safe for edits under the worktree isolation policy. State the selected worktree path and branch for each repository before editing. -6. Check required tools before relying on them: - - `gh` - - `pr-review` (install: `git clone https://github.com/happyvertical/pr-review.git ~/pr-review && export PATH="$HOME/pr-review/bin:$PATH"`) - - `codex` - - `claude` - - `gh copilot` -7. Confirm GitHub auth with `gh auth status`. Treat missing auth as a blocker for PR/CI work. -8. Read repository instructions and shipping context in every included repository: - - nearest `CLAUDE.md` - - nearest `AGENTS.md` if present - - `README*` - - package/build config files - - `.github/pull_request_template*` - - `.github/workflows/*` -9. Look for `## Shipping Notes` in repo-level `CLAUDE.md` (or `AGENTS.md`). If present, treat it as the source of repo-specific shipping commands, docs requirements, CI quirks, release notes, PR expectations, and known flaky checks. - -## Multi-Repo Ordering - -When more than one repository is involved, build a dependency graph before validation and `/review-cycle`. - -Use directed edges as `upstream -> downstream`, meaning the downstream repo depends on code, packages, APIs, manifests, images, generated artifacts, or documentation from the upstream repo. - -Evidence for ordering includes: - -- package manifests and lockfiles -- workspace files -- local `file:` or path dependencies -- Git submodules -- generated SDK/client references -- service API contracts -- deployment manifests and image references -- CI workflows that consume artifacts from another repo -- docs or release notes that explicitly describe dependency order -- `Shipping Notes` - -If the graph is ambiguous: - -- state the uncertainty -- choose the conservative order only when evidence is strong enough -- otherwise ask before proceeding - -Ship in topological order: - -1. Validate each upstream repository first. -2. Run `/review-cycle` on upstream repositories before relying on those changes downstream. -3. Record the exact upstream commit, branch, PR URL, package version, image tag/digest, or artifact that downstream validation uses. -4. Update downstream repositories to reference the upstream result when that is part of the intended change. -5. Run downstream validation and `/review-cycle` after upstream is clean. -6. If downstream work finds a real upstream issue, go back to the upstream repo, fix it, rerun upstream validation and `/review-cycle`, then rerun affected downstream validation and `/review-cycle`. - -For PRs: - -- Open upstream PRs before downstream PRs. -- Open PRs ready for review by default, including downstream PRs that depend on unmerged upstream PRs. -- Link downstream PRs to upstream PRs and call out the dependency explicitly. -- Use draft PRs only when the user passed `draft`, the repo explicitly requires draft stacked PRs, or unresolved blockers make the PR not ready for review. -- Watch CI in upstream-to-downstream order. If upstream CI fails, pause downstream promotion until the upstream failure is fixed or proven unrelated. - -## Shipping Notes Policy - -`## Shipping Notes` is a good repo-level pattern. Use it as a durable project checklist, not a run log. - -When present: - -- follow its commands and repo-specific gates before generic guesses -- update it only when this run discovers durable facts future shipping runs should know -- keep updates concise and actionable - -When absent: - -- do not add it just to say the command ran -- add it only if you learn durable repo-specific shipping knowledge, such as the canonical lint/test commands, required docs surfaces, release-note policy, CI watch quirks, or PR conventions -- if adding it, keep the section short and factual - -Suggested shape: - -```markdown -## Shipping Notes - -- Validation: -- Documentation: -- Reviews: -- PR/CI: -- Known quirks: -``` - -## Clean And Validate - -For multi-repo work, run this section for each repository in dependency order. After every upstream repo changes, rerun the relevant downstream checks that prove compatibility with the new upstream state. - -1. Inspect the current diff and classify the change: - - behavior/API - - UI/UX - - data/schema/migrations - - infrastructure/CI - - docs-only - - tests-only -2. Run the repo's normal formatting/linting path when configured. Prefer the commands documented in `Shipping Notes`, `CLAUDE.md`, package scripts, Makefiles, taskfiles, or README. -3. Run the smallest meaningful tests first, then broader suites appropriate to the blast radius. -4. Run typecheck/build commands when the stack has them. -5. Check documentation obligations: - - Update user-facing docs for behavior, API, config, workflow, CLI, or UI changes. - - Update changelog/release notes when the repo uses them. - - Update examples or generated docs when they are part of the changed surface. - - If no docs are needed, note the reason. -6. If validation fails, fix the failure and rerun the relevant command. Broaden validation before review if the fix touches shared or user-facing behavior. - -## Review Cycle Gate - -After validation and documentation cleanup, run `/review-cycle` over the same repository set before opening PRs. - -Use the same `rounds=`, `base=`, and `repos=` arguments passed to `/ship`. For multi-repo work, pass the dependency order and upstream/downstream context discovered during `/ship` preflight. - -Treat `/review-cycle` as the blocker gate: - -**Regardless of the gate's result**, always copy these fields from -`/review-cycle`'s final report into the PR body when creating or -updating the PR: -- `Accepted P2 (with rationale)` — accepted P2 happens on the `clean` - branch under the current status contract (all P2 fixed-or-accepted - → clean), so this propagation is not gated by `partial` -- `Accepted non-blockers (P3/nit)` — same reasoning -- `Skipped reviewers` (if any) - -These fields are how human reviewers see the deliberate choices the -ensemble made. Dropping them defeats the audit trail. - -Then branch on the gate result: - -- If `/review-cycle` returns `clean`, continue to commit and PR. -- If it returns `partial`, branch on the reason recorded in - `Skipped reviewers` (the only documented cause of `partial` — - Accepted P2 ends in `clean`, not `partial`, per the Status - contract): - - **Partial because copilot-cli was skipped** (org policy block, - network failure, missing auth, etc.): open the PR as a **draft** - so the Copilot bot can review post-push. - - **Prerequisite check**: GitHub's automatic Copilot code review - of drafts is opt-in per-repo. By default the bot only reviews - when a PR opens *non-draft* (or transitions Draft→Open) and - does NOT auto-re-review subsequent pushes. Before relying on - this fallback, verify in the repo's Copilot settings ([docs](https://docs.github.com/en/copilot/concepts/agents/code-review#about-automatic-pull-request-reviews)) - that BOTH "Automatically review pull requests" includes - "Review draft pull requests" AND "Review new pushes" is - enabled. If either is off, the fallback will silently wait - forever for a review that never comes — you must instead - request the bot review manually with `gh pr edit - --add-reviewer @copilot` ([docs](https://docs.github.com/en/copilot/how-tos/use-copilot-agents/request-a-code-review/use-code-review)). - - **gh CLI version requirement**: `--add-reviewer @copilot` - requires gh CLI v2.88.0 or newer ([release notes](https://github.com/cli/cli/releases/tag/v2.88.0)). - On older gh, the command fails with `Could not request - reviewer: '@copilot' not found` and the bot is NOT requested - — silently regressing into the same "draft sits forever - without review" mode. Check with `gh --version` first. If - your gh is older, upgrade (`brew upgrade gh`) or use the PR - page's Reviewers menu manually. - - For re-reviews after subsequent pushes, use the Reviewers menu - (re-request button) on the PR page; `gh pr edit` is for the - initial add only. - - Address bot findings, then rerun `/review-cycle`. The rerun - will *still* return `partial` (the CLI block is the same), so - it can't be the clearance signal. Instead: when the Copilot - bot has reviewed the **current** commit with no unaddressed - findings AND a human explicitly accepts the bot-for-CLI - substitution (typically by running `gh pr ready`), that's the - clearance path. "Current commit" matters: if you pushed - fixes after the bot reviewed, request a re-review on the new - SHA before clearing. Document the substitution in the PR body - so the audit trail is clear. - - **Partial because a different required reviewer slot was unfilled** - (codex-cli unavailable, OR claude slot couldn't be filled via - EITHER `claude -p` subprocess OR the sub-agent fallback, OR the - orchestrator slot was unfilled because no explicit `{findings: - []}` checklist pass was produced this round): open as draft and - call out the skip in the PR body so a human can decide whether - the remaining reviewer coverage is sufficient. Don't mark ready - until the skipped slot can be filled or a human explicitly - accepts the gap with rationale in the PR body. - - Note: if `claude -p` failed but the sub-agent fallback succeeded, - the claude slot IS filled (not skipped). `/review-cycle` should - have returned `clean`, not `partial`, in that case — if it - returned `partial` anyway, that's a bug in how the orchestrator - classified the substitution and should be fixed there, not - worked around here. -- If it returns `blocked`, stop before opening ready PRs. Open draft PRs only when the user passed `draft` or a draft would help expose the blocker. - - **Special sub-case: blocked because of `verify-round-blocked-by-cap`** (a P0/P1/P2 fix landed in the final permitted `/review-cycle` round). The fix may be correct but no verify round confirmed it. Don't ship — re-run `/review-cycle rounds=N+1` (or higher) to let the verify round complete, then re-attempt `/ship`. Calling this out explicitly because the failure mode looks like "clean" to a literal reader (the tree post-fix surfaces no findings) but actually means "findings were never sought". -- If `/review-cycle` changed files, rerun the relevant validation and documentation checks before committing. - -## Commit And PR - -When the Review Cycle Gate above has been satisfied (either `clean`, -or `partial` with an explicit fallback path documented above), -commit and open PRs in dependency order. Draft vs ready follows the -gate's branch — draft on partial, ready on clean (unless the user -passed `draft`): - -1. Recheck `git status --porcelain` in each included repository. -2. Ensure every branch name is suitable. If needed, create a `claude/ship-` branch per repository. -3. Commit uncommitted shipping changes with concise messages. Do not rewrite or squash existing user commits unless asked. -4. Push upstream branches first, then downstream branches. -5. Create or update PRs with `gh pr create` or `gh pr edit`, upstream first. -6. Use each repo's PR template when present. -7. If an existing PR is draft AND `/review-cycle` returned status `clean` (not `partial`, not `blocked`) AND validation is green AND the user didn't pass `draft`, mark it ready for review with `gh pr ready`. "Now clean" is the Review Cycle Gate output specifically — not a subjective re-read of the working tree. On `partial`, the human runs `gh pr ready` after the partial-branch clearance path documented above (e.g. after Copilot bot has reviewed the current commit and the operator explicitly accepts the bot-for-CLI substitution). Don't auto-ready a draft that came from a partial gate. -8. Include in every PR: - - summary of changes - - validation commands and results - - review rounds run and outcome - - documentation updates or why none were needed - - unresolved accepted non-blockers, if any - - cross-repo dependencies, including upstream PR URLs or exact commits when applicable - -If the user passed `draft`, create draft PRs. Otherwise, create PRs ready for review when the work is clean. Do not make a downstream PR draft solely because it depends on an unmerged upstream PR; link the dependency clearly instead. Use draft only for explicit user preference, documented repo policy, or unresolved blockers. - -## CI Watch And Fix - -After PRs exist, watch CI in dependency order. CI watching is long-running — invoke `gh pr checks --watch` via Bash with `run_in_background: true` and poll with `BashOutput`, or use the `Monitor` tool with `gh pr checks --json state -q '.[].state' | grep -v -E 'success|skipped|neutral'` until it returns empty. - -1. Run `gh pr checks --watch --interval 10` for the current upstream PR before advancing to downstream PRs. -2. If checks pass, record the PR URL and green status. -3. If checks fail: - - list failing check names with `gh pr checks --json name,workflow,state,bucket,link` - - inspect failing logs with `gh run view --log-failed` or the check-specific URL - - identify the smallest valid fix - - apply the fix in the isolated worktree - - rerun local validation relevant to the failing check - - rerun reviews when the fix changes code, tests, docs, config, or behavior materially - - commit and push - - resume watching CI -4. If an upstream CI fix changes downstream behavior or artifacts, rerun affected downstream validation, review, and CI. -5. Continue until CI is green across all included PRs or a real blocker remains, such as missing secrets, external service outage, unavailable required credentials, or a failing check that cannot be inspected locally. - -## Final Report - -Return a concise shipping report: - -```text -## Ship Result -- Status: clean | partial | blocked -- Repos: -- Worktrees: -- Branches: -- PRs: -- Validation: -- Reviews: -- Accepted P2 (with rationale): -- Accepted non-blockers (P3/nit): -- Skipped reviewers: -- Docs: -- CI: green | failing | blocked | not configured -- Dependency order: downstream edges or none> -- Remaining: -``` +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/codex/.agents/plugins/marketplace.json b/codex/.agents/plugins/marketplace.json index 461aac7..d52e6ac 100644 --- a/codex/.agents/plugins/marketplace.json +++ b/codex/.agents/plugins/marketplace.json @@ -2,7 +2,7 @@ "name": "have-config", "interface": { "displayName": "HappyVertical Agent Config", - "shortDescription": "Cross-repo agent configuration for HappyVertical projects" + "shortDescription": "ContextForge-backed cross-repo agent configuration for HappyVertical projects" }, "plugins": [ { diff --git a/codex/plugins/have/.codex-plugin/plugin.json b/codex/plugins/have/.codex-plugin/plugin.json index bca7c66..533ac61 100644 --- a/codex/plugins/have/.codex-plugin/plugin.json +++ b/codex/plugins/have/.codex-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "have", - "version": "0.1.0", - "description": "HappyVertical agent commands for Codex. Provides /have:ship (end-to-end shipping pipeline) and /have:review-cycle (multi-tool review loop) for the org's standard pre-PR and PR-creation workflow.", + "version": "0.1.1", + "description": "HappyVertical agent workflow adapters for Codex. Provides skills for have:ship and have:review-cycle by loading the authoritative workflow prompts/resources from ContextForge.", "author": { "name": "Will Griffin", "email": "buddyrandom@gmail.com", @@ -12,6 +12,7 @@ "license": "MIT", "keywords": [ "codex", + "skill", "slash-command", "shipping", "review", @@ -19,10 +20,11 @@ "happyvertical", "pr-review" ], + "skills": "./skills/", "interface": { - "displayName": "have — HappyVertical Commands", - "shortDescription": "Org-wide review and shipping commands for HappyVertical projects", - "longDescription": "Provides /have:review-cycle (multi-tool review loop using codex+claude+copilot) and /have:ship (end-to-end shipping: validate, review, PR, watch CI). Uses pr-review (https://github.com/happyvertical/pr-review) to generate calibrated review prompts. Mirrors the Claude Code version of this plugin.", + "displayName": "have — HappyVertical Workflows", + "shortDescription": "ContextForge-backed review and shipping workflows for HappyVertical projects", + "longDescription": "Provides Codex-visible skills for have:review-cycle and have:ship. The skills are thin adapters that load the authoritative workflow prompts/resources from ContextForge in the Happy Vertical team.", "developerName": "Will Griffin", "category": "Coding", "capabilities": [ @@ -32,8 +34,8 @@ ], "websiteURL": "https://github.com/happyvertical/have-config", "defaultPrompt": [ - "Run /have:review-cycle for repeat reviews and fixes.", - "Run /have:ship to validate, review, PR, and watch CI." + "Use have:review-cycle for repeat reviews and fixes.", + "Use have:ship to validate, review, PR, and watch CI." ], "brandColor": "#0F766E", "screenshots": [] diff --git a/codex/plugins/have/commands/review-cycle.md b/codex/plugins/have/commands/review-cycle.md index 34399f1..0b9a3ca 100644 --- a/codex/plugins/have/commands/review-cycle.md +++ b/codex/plugins/have/commands/review-cycle.md @@ -1,565 +1,16 @@ --- -description: Run a repeatable review/fix/retest loop over current work, optionally across multiple repos, without opening PRs or shipping. +description: "Run the HappyVertical review-cycle workflow from ContextForge." --- -# /review-cycle +# /have:review-cycle -Run a bounded review cycle on the current work independent of shipping. Default to 3 rounds unless the user passes `rounds=N`. +This local command is only an adapter. The authoritative workflow lives in ContextForge. -The parent agent running this command is **Codex CLI**. The command orchestrates a **4-reviewer ensemble**: three independent reviewer subprocesses — a separate codex-cli invocation, claude-cli, and GitHub copilot-cli — plus the orchestrator's own explicit checklist pass against the same commit. Different models have different blind spots; the ensemble catches more than any single tool. +Load and follow the Happy Vertical Team prompt: -When OAuth or auth issues block any subprocess reviewer, the parent should record the unavailability in the `Skipped reviewers` field of the final report and let the Status contract drop to `partial` (per "Reviewer Availability" below). Resolve the blocker (e.g. `claude setup-token` for claude-cli, org Copilot policy toggle for copilot-cli) and re-run if you need `clean` for `/ship`; the "rationale" field is documentation of WHY the slot was skipped, NOT a way to keep Status=clean despite a skipped slot. The Status contract is strict — silent slot drops or "accept and continue" framings would let unreviewed code through `/ship`'s gate. +- Prompt: `have-review-cycle` +- Resource: `have://happyvertical/workflows/review-cycle` -The orchestrator's own pass is NOT silent-solo — it must be an explicit checklist run against the staged/committed diff, with findings written out in the same JSON shape the subprocesses produce. "I looked, it's fine" is not a review; an enumerated set of P0/P1/P2/P3 findings (including "no findings") is. +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the review-cycle workflow. -## Hard Rules - -- Respect the global worktree isolation policy before making edits. If the current checkout is a primary checkout such as `/Users/will/Work/.../repos/...`, move the work to a dedicated worktree and branch before editing, preferably under `/Users/will/.codex/worktrees/` with a `codex/` branch prefix. -- Do not mix this session's edits with unrelated dirty files. Preserve user changes, and ask only when the current work cannot be separated safely. -- Do not use destructive cleanup commands such as `git reset --hard`, `git checkout --`, or `git clean` unless the user explicitly asks for that exact destructive action. -- Do not use `claude ultrareview` or any `ultrareview` command. Use the normal claude-cli in non-interactive print mode for review. -- Every external review command must be allowed at least 15 minutes. When using codex-cli's command tools, set the command timeout to at least `900000` ms for review commands. -- Treat review output as evidence to verify, not as orders. Fix valid findings. For false positives, record the rationale in the final report. -- Keep going until the work is clean or the configured review-round cap is reached. -- If the work spans multiple repositories, review them as an ordered dependency graph. Review upstream repos first, then downstream consumers against the exact upstream commits or branches they depend on. -- This command does not open PRs, push branches, or watch CI unless the user explicitly asks for that during the review-cycle run. - -## Arguments - -- `rounds=N`: maximum review/fix rounds. Default `3`. -- `base=`: override the comparison base branch. -- `repos=`: explicit list of repositories in the review set. -- `no-fix`: review only; do not edit files. -- `no-baseline`: skip baseline validation before the first review round. - -## Preflight - -1. Confirm this is a git repository. If `repos=` was provided, confirm every listed path is a git repository. -2. Discover the review set: - - current repository - - repositories explicitly named by the user or `repos=` - - dirty related worktrees under `/Users/will/.codex/worktrees/` - - local path, workspace, git, or package dependencies referenced by changed files - - git submodules or nested repositories touched by the diff - - sibling repositories that are clearly referenced by changed docs, package manifests, lockfiles, workflow files, deployment manifests, or local integration config -3. Exclude unrelated dirty repositories. If multiple dirty repos are present but the dependency relationship is not clear, report them and ask before including them. -4. For each included repository, capture: - - repo root - - current branch - - worktree path - - remote URL - - default/base branch - - staged, unstaged, and untracked files -5. Confirm every included checkout is safe for edits under the worktree isolation policy. State the selected worktree path and branch for each repository before editing. -6. Check required tools before relying on them: - - `pr-review` (install: `git clone https://github.com/happyvertical/pr-review.git ~/pr-review && export PATH="$HOME/pr-review/bin:$PATH"`) - - `codex` - - `claude` - - `gh copilot` - - `gh` when copilot-cli is reached through `gh copilot` -7. Read repository instructions and review context in every included repository: - - nearest `AGENTS.md` - - nearest `CLAUDE.md` if present - - `README*` - - package/build config files - - relevant test/lint/build docs - - `.github/workflows/*` when CI risk is part of the change -8. Look for `## Shipping Notes` in repo-level `AGENTS.md`. If present, use it for repo-specific validation commands, docs expectations, review expectations, and known flaky checks. - -## Multi-Repo Ordering - -When more than one repository is involved, build a dependency graph before reviewing. - -Use directed edges as `upstream -> downstream`, meaning the downstream repo depends on code, packages, APIs, manifests, images, generated artifacts, or documentation from the upstream repo. - -Evidence for ordering includes: - -- package manifests and lockfiles -- workspace files -- local `file:` or path dependencies -- Git submodules -- generated SDK/client references -- service API contracts -- deployment manifests and image references -- CI workflows that consume artifacts from another repo -- docs or release notes that explicitly describe dependency order -- `Shipping Notes` - -If the graph is ambiguous: - -- state the uncertainty -- choose the conservative order only when evidence is strong enough -- otherwise ask before proceeding - -Review in topological order: - -1. Validate and review each upstream repository first. -2. Fix upstream findings before relying on those changes downstream. -3. Record the exact upstream commit, branch, package version, image tag/digest, or artifact that downstream validation uses. -4. Update downstream repositories to reference the upstream result when that is part of the intended work. -5. Run downstream validation and review after upstream is clean. -6. If a downstream review finds a real upstream issue, go back to the upstream repo, fix it, rerun upstream validation/review, then rerun affected downstream validation/review. Count this as the same overall review round when practical. - -## Baseline Validation - -Unless `no-baseline` was passed, run baseline validation before the first review round for each repository in dependency order: - -1. Inspect the current diff and classify the change: - - behavior/API - - UI/UX - - data/schema/migrations - - infrastructure/CI - - docs-only - - tests-only -2. Run the repo's normal formatting/linting path when configured. Prefer commands documented in `Shipping Notes`, `AGENTS.md`, package scripts, Makefiles, taskfiles, or README. -3. Run the smallest meaningful tests first, then broader suites appropriate to the blast radius. -4. Run typecheck/build commands when the stack has them. -5. Check documentation obligations: - - user-facing docs for behavior, API, config, workflow, CLI, or UI changes - - changelog/release notes when the repo uses them - - examples or generated docs when they are part of the changed surface -6. If validation fails, fix the failure unless `no-fix` was passed, then rerun the relevant command. - -## Review Commands - -Create a temporary review directory outside the repo for review outputs. Do not commit raw review logs unless the repo explicitly asks for them. - -For multi-repo work, create one review subdirectory per repository and include the dependency context in each review prompt. Upstream review prompts should ask whether downstream compatibility is preserved. Downstream review prompts should name the upstream commits, branches, versions, image tags, or artifacts being consumed. - -### Generate the review prompt with `pr-review` - -Use the `pr-review` tool (https://github.com/happyvertical/pr-review) to generate the review prompt rather than a generic one-line brief. `pr-review` bundles a calibrated 10-theme checklist (refactor regressions, tenant isolation, effect races, silent error swallowing, hardcoded paths, doc/code drift, infra duplication, dead config, concurrency, SQL correctness) plus any repository-specific patterns in `/.pr-review/extensions.md`. All three reviewers get the same calibrated prompt so their findings are directly comparable. - -If `pr-review` is not on `$PATH`: - -```bash -git clone https://github.com/happyvertical/pr-review.git ~/pr-review -export PATH="$HOME/pr-review/bin:$PATH" -``` - -If the repository being reviewed has no `.pr-review/extensions.md`, the shared core checklist still applies — the prompt just doesn't include repo-specific guidance. That's a signal to consider adding one after the review-cycle run. - -### Run codex-cli review - -`codex review` fetches its own diff, so pass `--no-diff` to `pr-review` to avoid sending the diff twice: - -- If the branch has committed changes against the base branch: - ```bash - pr-review --base --no-diff | codex review --base - - ``` -- If there are staged, unstaged, or untracked changes, also run: - ```bash - pr-review --base --no-diff | codex review --uncommitted - - ``` -- Do not use `claude ultrareview` or any `ultrareview` variant for any reviewer here. - -### Run claude-cli review - -claude-cli does not fetch its own diff — pipe `pr-review` output without `--no-diff`: - -```bash -pr-review --base | claude -p --permission-mode plan -``` - -- Use `claude -p` in non-interactive mode. -- Prefer read-only/plan permissions for the review run (`--permission-mode plan`). -- Disallow edit/write tools where supported. - -### Run copilot-cli review - -**This step is non-optional for the "catch before push" intent.** The -Copilot PR review *bot* only fires after a PR is opened — too late to -prevent the round-trip the review-cycle exists to compress. The copilot-cli runs locally pre-push and gives you copilot-cli's blind-spot -coverage before the bot has a chance to comment. - -copilot-cli expects the prompt to carry its own context. **The -invocation must enforce read-only at the permission layer — prompt -instructions are advisory, tool permissions are enforcement.** If -copilot-cli can use write/edit-capable tools, a "review" pass can mutate -the working tree mid-round, breaking the same-commit guarantee the -loop relies on. - -`--allow-all-tools` is *not* read-only — it grants write/edit -capability and would let the model mutate the working tree mid-review. -Don't use it. But `--available-tools shell,read` alone *also doesn't -work* in non-interactive mode — it only filters which tools the model -can *see*, not which it can run without approval. In `-p` mode there's -no place to ask for permission, so tool calls get denied with -`Permission denied and could not request permission from user`. The -review then runs with no repository context. - -The correct shape is **explicit per-command `--allow-tool` flags** for -the specific read-only commands a review needs. Verify against -`gh copilot -- --help` and `gh copilot -- help permissions` for the -syntax your CLI version supports; example for current copilot-cli: - -```bash -REPO_ROOT="$(git rev-parse --show-toplevel)" -gh copilot -- -p "$(pr-review --base --pretty)" \ - -C "$REPO_ROOT" \ - --add-dir "$REPO_ROOT" \ - --disallow-temp-dir \ - --allow-tool 'shell(git diff)' \ - --allow-tool 'shell(git log)' \ - --allow-tool 'shell(git show)' \ - --allow-tool 'shell(git status)' \ - --allow-tool 'shell(git rev-parse)' \ - --allow-tool 'shell(rg)' \ - --allow-tool 'shell(cat)' \ - --allow-tool 'shell(head)' \ - --effort xhigh -``` - -The `-C` / `--add-dir` / `--disallow-temp-dir` trio is **scope -hygiene**, not a security boundary: they keep the reviewer focused -on the repo and prevent it from wandering into unrelated files in -your `/tmp` or wherever else the shell was invoked from. That -reduces noise in findings — not a vulnerability fix. The reviewer -is running locally with your credentials and can already see -anything you can see; that's how local CLI tools work. - -**When stricter sandboxing actually matters** (and the above flags -are insufficient — you need a sanitized temp checkout): -- Reviewing PRs from untrusted contributors (OSS maintainership) - where the diff could contain prompt-injection asking the model to - read your `.env` and quote it into findings the contributor sees -- CI environments where the reviewer runs unattended and findings - get auto-posted to public PR comments -- Workspaces with secrets in untracked files that you don't want - surfaced even in your own review output - -For the normal HappyVertical case — engineer reviewing their own -org's PR pre-push, findings going to their own terminal — none of -that applies. The flags above are enough. - -Add `--deny-tool` for any specific commands you want hard-blocked. -The per-command `--allow-tool` allowlist is **mostly** read-only — -but it is NOT a hard write-prevention boundary, because copilot-cli -matches at first-level subcommand granularity. `--allow-tool -'shell(git diff)'` approves any `git diff …` invocation including -write-capable forms like `git diff --output=path` which can dirty -the working tree. Similarly, `shell(rg)` permits redirection-style -flags depending on shell escaping. The prompt's "don't modify -files" instruction is defense-in-depth, but the structural -guarantee for "the reviewer ran against the same commit" is the -**pre/post tree-snapshot comparison**. - -For reviews of **committed work** (the common case): before each -reviewer, the tree is clean; after, run `git status --porcelain` — -any output means the reviewer modified the tree, the same-commit -guarantee is broken, the round is invalid. Restart from the clean -commit (or run reviewers in a disposable worktree). - -For reviews of **uncommitted/dirty work** (e.g. mid-edit review, -`codex review --uncommitted` flows): the simple "is status empty" -check fails because the tree was already dirty. Use **snapshot -comparison** — it's the only approach with no footguns: - -```bash -# BEFORE each reviewer runs -SNAPSHOT_DIR=$(mktemp -d) -git status --porcelain > "$SNAPSHOT_DIR/pre-status.txt" -git diff > "$SNAPSHOT_DIR/pre-unstaged.diff" -git diff --cached > "$SNAPSHOT_DIR/pre-staged.diff" -# Capture untracked file contents (hash + path) so we can detect -# if a reviewer added/modified untracked files. `git status` would -# catch added/removed but not same-name-different-content edits. -# NOTE: filenames are treated as DATA, never substituted into -# shell source. `xargs -I{} sh -c '...{}...'` looks convenient but -# is command-injectable — a file named `evil$(rm -rf ~).txt` -# would execute the substitution. Use a null-delimited read loop -# so each filename arrives as a bash variable (still data). -git ls-files --others --exclude-standard -z \ - | while IFS= read -r -d '' f; do - printf '%s %s\n' "$(git hash-object -- "$f")" "$f" - done \ - | sort > "$SNAPSHOT_DIR/pre-untracked.txt" - -# ... run the reviewer ... - -# AFTER — capture same shape, diff against pre-state -git status --porcelain > "$SNAPSHOT_DIR/post-status.txt" -git diff > "$SNAPSHOT_DIR/post-unstaged.diff" -git diff --cached > "$SNAPSHOT_DIR/post-staged.diff" -git ls-files --others --exclude-standard -z \ - | while IFS= read -r -d '' f; do - printf '%s %s\n' "$(git hash-object -- "$f")" "$f" - done \ - | sort > "$SNAPSHOT_DIR/post-untracked.txt" - -if ! diff -q "$SNAPSHOT_DIR/pre-status.txt" "$SNAPSHOT_DIR/post-status.txt" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-unstaged.diff" "$SNAPSHOT_DIR/post-unstaged.diff" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-staged.diff" "$SNAPSHOT_DIR/post-staged.diff" >/dev/null \ - || ! diff -q "$SNAPSHOT_DIR/pre-untracked.txt" "$SNAPSHOT_DIR/post-untracked.txt" >/dev/null; then - echo "Reviewer mutated tree state — round invalid. See $SNAPSHOT_DIR for diffs." - exit 1 -fi -rm -rf "$SNAPSHOT_DIR" -``` - -This catches: -- Added/removed tracked files (status diff) -- Modified tracked files even if status shape unchanged - (e.g. `M path → M path` with different bytes — unstaged.diff - catches this; status alone wouldn't) -- Staged/index mutations (staged.diff) -- Added/removed untracked files OR same-name-different-content - (untracked hash list) - -Preserves your staging discipline exactly, doesn't touch any -commits, no WIP dance, no `git reset` calls (so Hard Rules don't -need a carve-out). - -Why not WIP-commit-then-undo (which an earlier draft of this doc -suggested): that approach accumulated multiple footguns across -rounds — `git add -A` would leak unrelated untracked files into -`pr-review`'s diff; `git stash` would actually remove dirty changes -from the worktree and review the wrong state; `git reset --mixed -HEAD~1` would silently drop the wrong commit if HEAD moved; the -patch-capture-and-restore step couldn't recreate untracked files -deleted by `reset --hard`; `wip:` isn't a valid Conventional Commit -type and would be rejected by commitlint in any repo using these -configs (which is the whole org). The snapshot approach sidesteps -every one of these. If you find yourself reaching for WIP commits -to make this work, you're solving the wrong problem. - -Either way, never just "is `git status` clean now" as the -post-check — that only works when "clean" was the baseline. - -- Use `--pretty` so copilot-cli receives the prompt as readable markdown - rather than the JSON-instruction format. -- Pass `--` after `gh copilot` to forward flags to the underlying - `copilot` binary; otherwise `gh` may interpret them. -- `--effort xhigh` matches codex-cli's reasoning depth; tune down if the - diff is small and you want faster runs. -- The prompt itself also instructs not to modify files. That's - defense-in-depth, not the primary enforcement — the permission - flags do the actual blocking. - -**Known blockers and fallbacks** (real failures we've seen): - -- **`Access denied by policy settings`** — the org's Copilot policy - is disabling CLI use. Fix at https://github.com/settings/copilot - (personal) and/or your org's Copilot policies page (admin). Until - enabled, copilot-cli cannot run pre-push. -- **`Failed to authenticate. API Error: 401`** on `claude -p` — happens - when this command is invoked from inside an active Claude Code - session; OAuth credentials don't propagate to spawned children. - Workaround: set `ANTHROPIC_API_KEY` env var on the child invocation, - or run review-cycle from a terminal / CI / codex-cli session instead. - -**When a reviewer is unavailable**: proceed with the others *and* -record in the final report which reviewer was skipped and why. -**Status MUST drop to `partial` when any required reviewer slot -is unfilled.** The four required slots by default are: -- codex-cli (subprocess) -- claude-cli (subprocess) — the Codex CLI orchestrator does not - have a documented sub-agent substitute, so subprocess auth - failure means the slot is skipped (no fallback). -- copilot-cli (subprocess) -- the orchestrator's checklist pass (the 4th slot, fills itself). - -Never silently drop a slot. Never report `clean` with a skipped -required slot — `/ship` gates on `Status: clean`, and a soft skip -would let unreviewed code merge. - -If copilot-cli is the unavailable one specifically, record this in -the final report's `Skipped reviewers` field with reason. Downstream -(`/ship`, or the human invoking review-cycle directly) reads the -report and decides whether to open the PR as a **draft** so the -Copilot bot can review before merge candidacy. `/review-cycle` -itself never opens or pushes PRs — that's `/ship`'s job — so this -fallback is something the report enables, not something review-cycle -executes. - -### For all three subprocess reviewers - -- Use a review command timeout of at least 15 minutes. -- Capture stdout and stderr to separate files in the temp review directory — malformed or empty findings almost always have the cause in stderr. -- Treat each tool's findings as evidence to verify against the code, not as orders to apply. Vague claims get dismissed; concrete file:line citations with named failure paths get acted on. - -### Orchestrator self-review (the 4th reviewer slot) - -The orchestrator (the parent Codex CLI session running this command) must also perform an explicit checklist pass against the same commit each round. This is NOT silent-solo — it must produce written findings in the same JSON shape the subprocesses do, including "no findings" when nothing surfaces. - -**Read this carefully — the 4th slot has structural confirmation bias the other three don't have:** -- The orchestrator is the same model family as the codex-cli reviewer (both Codex). Its blind-spot coverage overlaps with codex-cli's, not with claude-cli's or copilot-cli's. -- The orchestrator authored (or at least drove) the fix being reviewed. It has full context of intent — what the fix was supposed to do, why each decision was made. That context is helpful for *understanding* the code but is exactly the cognitive bias that makes "did I miss anything?" the wrong question to ask yourself. -- A clean orchestrator pass therefore carries less independent epistemic weight than a clean claude-cli or copilot-cli pass. - -The orchestrator slot's role is **explicit checklist accountability** — forcing the orchestrator to run through the same questions and write down the answer — not independent blind-spot coverage. Keep it in the loop precisely because the discipline of running the checklist surfaces things the orchestrator's "I looked, it's fine" intuition skips. - -- Run the orchestrator pass in parallel with the subprocesses (while they run in the background, the orchestrator reads the diff against the checklist). -- Use the same pr-review checklist + extensions the subprocesses use. -- Output the same JSON shape: `{summary, findings: [{severity, category, file, line, title, body, confidence}], skipped: []}`. -- Include the orchestrator findings in the round's dedup step alongside subprocess findings, but weigh them with the bias caveat above — a finding the orchestrator surfaces that no other reviewer caught is real; a "no findings" pass from the orchestrator alone (without subprocess corroboration) is weak. -- If the orchestrator has nothing to add ("no findings"), record that explicitly — the absence of explicit findings is silent-solo; an explicit "{findings: []}" entry is participation. - -After all FOUR reviewer slots produce findings (three subprocesses + orchestrator), merge into one checklist grouped by severity (see "Review/Fix Loop" below). Prefer findings flagged by ≥2 reviewers when severity is medium or low; high-severity findings from a single reviewer still warrant verification. Findings flagged ONLY by the orchestrator's self-review get extra scrutiny on whether they're real (the confirmation bias works both ways — orchestrator can over-flag things it knows are intentional too). - -### Optional: capture for calibration - -If the repository has a `.pr-review/extensions.md`, also append `| pr-review-capture` to one of the runs (typically claude-cli or codex-cli) so the findings are stored at `.pr-review/history/.json`. Later, `pr-review-tune --last 10` can compare stored findings against the review comments PRs actually received and propose refinements to the checklist. This closes the feedback loop so the checklist gets sharper over time. - -```bash -pr-review --base | claude -p --permission-mode plan | pr-review-capture | tee /dev/tty -``` - -## Review/Fix Loop - -Run up to `rounds` review rounds. The argument default is `3` -regardless of change type (set at the `rounds=N` arg above). For -documentation / reviewer-checklist content, consider passing -`rounds=5..10` because each round catches progressively narrower -factual edge cases — there's no auto-detection that bumps the cap -for doc work. - -**Hard rules for the loop** (these prevent the "stopped too early" -*and* "looped too long on trivia" failure modes): - -- **Each round runs all reviewers in parallel against the SAME commit** - — not sequentially against each other's fixes. Sequential cascading - makes findings depend on which reviewer ran first and obscures - whether reviewers actually agree on the latest state. -- **A fix-round on substantive (P0-P2) findings is never the final - round.** If you just pushed a fix for a real bug, you MUST run - another round to confirm it didn't introduce a new one. -- **The loop exits when no P0/P1/P2 findings remain — not when - every reviewer returns zero findings.** P3 / nit-level findings - (polish, narrow factual edges, cosmetic placement) get triaged - three ways (fix inline if cheap, record in the final report if - worth tracking, file as follow-up if bigger) but never extend the - loop. Running another full ensemble round just to verify a - one-line wording tweak is technical perfectionism that burns - reviewer cycles without changing what ships. -- **Convergence is per-commit for behaviour-changing fixes** (P0/P1/P2 - and any non-fix code changes). Reviewer A returning clean against - commit X doesn't mean clean against commit Y when Y changes - behaviour — re-run all reviewers. **P3-only commits do not reset - convergence**: if the only change since the last clean verify - round is a P3 wording tweak, you don't need another full ensemble - pass. -- **One reviewer returning clean is NOT convergence — the whole - ensemble must return clean.** A reviewer that didn't run can't - have caught the bug another reviewer would have. Two failure - modes to guard against: - - *Silent solo*: only running one reviewer per round (e.g. - "codex-cli is fast and reliable, I'll skip the others") and - declaring convergence when it returns 0. The whole point of - the ensemble is non-overlapping blind spots. A real example: - if you solo a single reviewer for ~12 rounds and then add a - second reviewer for round 13, expect that second reviewer to - immediately surface findings the first kept missing. - - *Unavailable ≠ clean*: if a reviewer errored (auth, policy, - network, env), that's a missing signal — not a clean signal. - Record the unavailability explicitly in the final report. - Either resolve the blocker and retry, or accept the - reduced-coverage tradeoff with rationale, but do not count - the absence as agreement. - -For each round, process repositories in dependency order: - -1. Run validation before review if files changed since the previous validation pass. -2. Run all four reviewer slots for each repository in dependency order: codex-cli, claude-cli, copilot-cli, and the orchestrator's own checklist pass. Run the three subprocesses in parallel where possible; the orchestrator's pass runs concurrently. All four must produce explicit findings (including "no findings") before dedup. If a slot can't be filled (e.g. claude-cli auth fails — no sub-agent substitute on this orchestrator), record the skip in the final report and let the Status contract drop to `partial`. Don't smuggle a skip into the slot list as "filled with caveat" — it changes the gate semantics. -3. Merge findings into a single checklist by severity: - - `P0/P1`: correctness, data loss, security, broken build, failing tests. **Always block. Always loop.** - - `P2`: likely bug, missing test, missing docs for changed behavior. **Block by default; loop unless explicitly accepted with rationale in the final report (which `/ship` then copies into the PR body when creating the PR).** - - `P3`: maintainability or polish with clear benefit; narrow factual edges affecting tiny version windows or rare paths. **Never block. Never extend the loop just to verify a P3 fix.** For each P3 finding, pick one based on cost vs. value: - - **Cheap to fix → fix inline in the same commit/PR.** No verify round needed; group with other fixes if any. (Most P3 wording/clarity tweaks fall here.) - - **Worth tracking but not blocking → record in the final report** as accepted non-blocker with brief rationale. If a PR already exists, also copy into the PR body; otherwise `/ship` propagates the report into the PR body at PR creation time. - - **Bigger than this PR's scope → file as follow-up issue**, link from the final report (and PR body, when one exists). -4. Verify each finding against the code. Do not blindly patch speculative review comments. -5. If `no-fix` was passed, stop after reporting findings. -6. Address all valid P0/P1 findings (mandatory) and all valid P2 findings (mandatory unless explicitly accepted in the final report with a one-line rationale) in priority order. -7. Add or adjust tests for bug fixes and behavior changes. -8. Rerun relevant validation after edits. -9. If upstream fixes change the contract consumed downstream, rerun affected downstream validation and review even if that downstream repo had already passed in the current round. -10. **If a P0/P1/P2 fix was pushed in this round, the next round MUST run** to verify the fix didn't break something. Do not stop on a P0/P1/P2 fix-round. -11. Stop the loop as `clean` only when **ALL THREE** conditions - hold across the graph: - - a verify round returns no *unaccepted* P0/P1/P2 findings - from any reviewer in any included repo, - - validation is green across the graph, AND - - every required reviewer actually ran in the verify round - (any skipped/unavailable reviewer → status is `partial`, - not `clean`, per the Status contract below). - - Don't conflate "no findings surfaced" with "clean" — a - reviewer that didn't run produced no findings because it - didn't run, not because none exist. - - Reviewers may continue surfacing an accepted P2 in subsequent - rounds (they have no way to know it was accepted); the - acceptance lives in the final report, and the stop condition - discounts it. P3/nit findings at exit time get recorded in the - final report, not fixed in this PR (consumers like `/ship` - are responsible for surfacing them in the PR body when the PR - exists). - -If the loop hits the round cap: - -- stop and summarize unresolved findings -- distinguish true blockers from false positives and accepted non-blockers -- if findings are still surfacing at the cap, that's a signal — either - the spec is over-detailed (consider simplifying), the reviewer set - is producing diminishing returns (acceptable to ship with a recorded - follow-up), or there's a genuine gap (don't ship; raise the cap or - reassess) -- **special case: a P0/P1/P2 fix landed in the final permitted round** - — Rule 10 requires the next round MUST run to verify, but the cap - forbids it. Report status as `blocked` with reason - `verify-round-blocked-by-cap`. The fix may be correct but no - verify round confirmed it; per the Status contract, an unverified - P0/P1/P2 fix counts as "unaccepted P0/P1/P2 remaining" — its - effect on the codebase is unobserved. Note in the final report - that the cap blocked verification and recommend re-running with - `rounds=N+1` (or higher) so the verify round can complete. Don't - report `clean` just because the post-fix tree has no surfaced - findings — those findings were never sought. -- do not push or open PRs from this command unless the user explicitly asks - -## Final Report - -Return a concise review-cycle report: - -```text -## Review Cycle Result -- Status: clean | partial | blocked | findings-only - (clean = no P0/P1 + all P2 fixed-or-accepted + ALL required reviewers ran - + validation green; - partial = otherwise-clean but at least one required reviewer was - skipped. Single cause only — other "incomplete" states - (unverified fix, validation failed) are `blocked`, not - `partial`; - blocked = unaccepted P0/P1/P2 remaining (whether before or at the - round cap), validation failed, OR a P0/P1/P2 fix landed - in the final permitted round with no verify round - possible (an unverified fix counts as potentially - unaccepted — the operator should re-run with a raised - `rounds=N+1` to let the verify round complete). A - round-cap exit with ONLY P3/nit findings remaining is - NOT blocked — those findings go in the accepted - non-blockers field and Status stays clean (or partial - if a required reviewer was skipped, or blocked if - validation failed — the carve-out only suppresses the - "P3-only at cap → blocked" path; other blocked causes - still apply). Without this carve-out, the round-cap - definition would re-block on the exact trivia loop - these rules are designed to exit; - findings-only = `no-fix` was passed) -- Repos: -- Worktrees: -- Branches: -- Validation: -- Reviews: -- Docs: -- Dependency order: downstream edges or none> -- Remaining blockers (P0/P1, or unaccepted P2): -- Accepted P2 (with rationale): -- Accepted non-blockers (P3/nit): -- Skipped reviewers: -``` +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/codex/plugins/have/commands/ship.md b/codex/plugins/have/commands/ship.md index 70ac404..6f3c13c 100644 --- a/codex/plugins/have/commands/ship.md +++ b/codex/plugins/have/commands/ship.md @@ -1,293 +1,16 @@ --- -description: "Prepare current work for shipping: validate, update docs, run /review-cycle, open a ready PR, and watch CI to green." +description: "Run the HappyVertical ship workflow from ContextForge." --- -# /ship +# /have:ship -Ship the current repository work end to end. Use `/review-cycle` for the repeating review/fix/retest loop before opening PRs. +This local command is only an adapter. The authoritative workflow lives in ContextForge. -## Hard Rules +Load and follow the Happy Vertical Team prompt: -- Respect the global worktree isolation policy before making edits. If the current checkout is a primary checkout such as `/Users/will/Work/.../repos/...`, move the work to a dedicated worktree and branch before editing, preferably under `/Users/will/.codex/worktrees/` with a `codex/` branch prefix. -- Do not mix this session's edits with unrelated dirty files. Preserve user changes, and ask only when the current work cannot be separated safely. -- Do not use destructive cleanup commands such as `git reset --hard`, `git checkout --`, or `git clean` unless the user explicitly asks for that exact destructive action. -- Run `/review-cycle` before opening PRs. Inherit its review rules, including no `ultrareview`, at least 15-minute review timeouts, and up to the configured round cap. -- CI failures after PR creation count as new work and must be fixed, locally revalidated, and passed back through `/review-cycle` when the fix changes code, tests, docs, config, or behavior materially. -- If the current work spans multiple repositories, ship them as an ordered dependency graph. Validate and run `/review-cycle` on upstream repos first, then downstream consumers against the exact upstream commits or PR branches they depend on. +- Prompt: `have-ship` +- Resource: `have://happyvertical/workflows/ship` -## Arguments +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the ship workflow. -- `rounds=N`: maximum `/review-cycle` rounds. Default `3`. -- `base=`: override the comparison base branch. -- `repos=`: explicit list of repositories in the shipping set. -- `draft`: explicitly create draft PRs. Without this argument, open PRs ready for review when validation and `/review-cycle` are clean. - -## Preflight - -1. Confirm this is a git repository. If `repos=` was provided, confirm every listed path is a git repository. -2. Discover the full shipping set before validating anything: - - current repository - - repositories explicitly named by the user or `repos=` - - dirty related worktrees under `/Users/will/.codex/worktrees/` - - local path, workspace, git, or package dependencies referenced by changed files - - git submodules or nested repositories touched by the diff - - sibling repositories that are clearly referenced by changed docs, package manifests, lockfiles, workflow files, deployment manifests, or local integration config -3. Exclude unrelated dirty repositories. If multiple dirty repos are present but the dependency relationship is not clear, report them and ask before including them. -4. For each included repository, capture: - - repo root - - current branch - - worktree path - - remote URL - - default/base branch - - staged, unstaged, and untracked files -5. Confirm every included checkout is safe for edits under the worktree isolation policy. State the selected worktree path and branch for each repository before editing. -6. Check required tools before relying on them: - - `gh` - - `pr-review` (install: `git clone https://github.com/happyvertical/pr-review.git ~/pr-review && export PATH="$HOME/pr-review/bin:$PATH"`) - - `codex` - - `claude` - - `gh copilot` -7. Confirm GitHub auth with `gh auth status`. Treat missing auth as a blocker for PR/CI work. -8. Read repository instructions and shipping context in every included repository: - - nearest `AGENTS.md` - - nearest `CLAUDE.md` if present - - `README*` - - package/build config files - - `.github/pull_request_template*` - - `.github/workflows/*` -9. Look for `## Shipping Notes` in repo-level `AGENTS.md`. If present, treat it as the source of repo-specific shipping commands, docs requirements, CI quirks, release notes, PR expectations, and known flaky checks. - -## Multi-Repo Ordering - -When more than one repository is involved, build a dependency graph before validation and `/review-cycle`. - -Use directed edges as `upstream -> downstream`, meaning the downstream repo depends on code, packages, APIs, manifests, images, generated artifacts, or documentation from the upstream repo. - -Evidence for ordering includes: - -- package manifests and lockfiles -- workspace files -- local `file:` or path dependencies -- Git submodules -- generated SDK/client references -- service API contracts -- deployment manifests and image references -- CI workflows that consume artifacts from another repo -- docs or release notes that explicitly describe dependency order -- `Shipping Notes` - -If the graph is ambiguous: - -- state the uncertainty -- choose the conservative order only when evidence is strong enough -- otherwise ask before proceeding - -Ship in topological order: - -1. Validate each upstream repository first. -2. Run `/review-cycle` on upstream repositories before relying on those changes downstream. -3. Record the exact upstream commit, branch, PR URL, package version, image tag/digest, or artifact that downstream validation uses. -4. Update downstream repositories to reference the upstream result when that is part of the intended change. -5. Run downstream validation and `/review-cycle` after upstream is clean. -6. If downstream work finds a real upstream issue, go back to the upstream repo, fix it, rerun upstream validation and `/review-cycle`, then rerun affected downstream validation and `/review-cycle`. - -For PRs: - -- Open upstream PRs before downstream PRs. -- Open PRs ready for review by default, including downstream PRs that depend on unmerged upstream PRs. -- Link downstream PRs to upstream PRs and call out the dependency explicitly. -- Use draft PRs only when the user passed `draft`, the repo explicitly requires draft stacked PRs, or unresolved blockers make the PR not ready for review. -- Watch CI in upstream-to-downstream order. If upstream CI fails, pause downstream promotion until the upstream failure is fixed or proven unrelated. - -## Shipping Notes Policy - -`## Shipping Notes` is a good repo-level pattern. Use it as a durable project checklist, not a run log. - -When present: - -- follow its commands and repo-specific gates before generic guesses -- update it only when this run discovers durable facts future shipping runs should know -- keep updates concise and actionable - -When absent: - -- do not add it just to say the command ran -- add it only if you learn durable repo-specific shipping knowledge, such as the canonical lint/test commands, required docs surfaces, release-note policy, CI watch quirks, or PR conventions -- if adding it, keep the section short and factual - -Suggested shape: - -```markdown -## Shipping Notes - -- Validation: -- Documentation: -- Reviews: -- PR/CI: -- Known quirks: -``` - -## Clean And Validate - -For multi-repo work, run this section for each repository in dependency order. After every upstream repo changes, rerun the relevant downstream checks that prove compatibility with the new upstream state. - -1. Inspect the current diff and classify the change: - - behavior/API - - UI/UX - - data/schema/migrations - - infrastructure/CI - - docs-only - - tests-only -2. Run the repo's normal formatting/linting path when configured. Prefer the commands documented in `Shipping Notes`, `AGENTS.md`, package scripts, Makefiles, taskfiles, or README. -3. Run the smallest meaningful tests first, then broader suites appropriate to the blast radius. -4. Run typecheck/build commands when the stack has them. -5. Check documentation obligations: - - Update user-facing docs for behavior, API, config, workflow, CLI, or UI changes. - - Update changelog/release notes when the repo uses them. - - Update examples or generated docs when they are part of the changed surface. - - If no docs are needed, note the reason. -6. If validation fails, fix the failure and rerun the relevant command. Broaden validation before review if the fix touches shared or user-facing behavior. - -## Review Cycle Gate - -After validation and documentation cleanup, run `/review-cycle` over the same repository set before opening PRs. - -Use the same `rounds=`, `base=`, and `repos=` arguments passed to `/ship`. For multi-repo work, pass the dependency order and upstream/downstream context discovered during `/ship` preflight. - -Treat `/review-cycle` as the blocker gate: - -**Regardless of the gate's result**, always copy these fields from -`/review-cycle`'s final report into the PR body when creating or -updating the PR: -- `Accepted P2 (with rationale)` — accepted P2 happens on the `clean` - branch under the current status contract (all P2 fixed-or-accepted - → clean), so this propagation is not gated by `partial` -- `Accepted non-blockers (P3/nit)` — same reasoning -- `Skipped reviewers` (if any) - -These fields are how human reviewers see the deliberate choices the -ensemble made. Dropping them defeats the audit trail. - -Then branch on the gate result: - -- If `/review-cycle` returns `clean`, continue to commit and PR. -- If it returns `partial`, branch on the reason recorded in - `Skipped reviewers` (the only documented cause of `partial` — - Accepted P2 ends in `clean`, not `partial`, per the Status - contract): - - **Partial because copilot-cli was skipped** (org policy block, - network failure, missing auth, etc.): open the PR as a **draft** - so the Copilot bot can review post-push. - - **Prerequisite check**: GitHub's automatic Copilot code review - of drafts is opt-in per-repo. By default the bot only reviews - when a PR opens *non-draft* (or transitions Draft→Open) and - does NOT auto-re-review subsequent pushes. Before relying on - this fallback, verify in the repo's Copilot settings ([docs](https://docs.github.com/en/copilot/concepts/agents/code-review#about-automatic-pull-request-reviews)) - that BOTH "Automatically review pull requests" includes - "Review draft pull requests" AND "Review new pushes" is - enabled. If either is off, the fallback will silently wait - forever for a review that never comes — you must instead - request the bot review manually with `gh pr edit - --add-reviewer @copilot` ([docs](https://docs.github.com/en/copilot/how-tos/use-copilot-agents/request-a-code-review/use-code-review)). - - **gh CLI version requirement**: `--add-reviewer @copilot` - requires gh CLI v2.88.0 or newer ([release notes](https://github.com/cli/cli/releases/tag/v2.88.0)). - On older gh, the command fails with `Could not request - reviewer: '@copilot' not found` and the bot is NOT requested - — silently regressing into the same "draft sits forever - without review" mode. Check with `gh --version` first. If - your gh is older, upgrade (`brew upgrade gh`) or use the PR - page's Reviewers menu manually. - - For re-reviews after subsequent pushes, use the Reviewers menu - (re-request button) on the PR page; `gh pr edit` is for the - initial add only. - - Address bot findings, then rerun `/review-cycle`. The rerun - will *still* return `partial` (the CLI block is the same), so - it can't be the clearance signal. Instead: when the Copilot - bot has reviewed the **current** commit with no unaddressed - findings AND a human explicitly accepts the bot-for-CLI - substitution (typically by running `gh pr ready`), that's the - clearance path. "Current commit" matters: if you pushed - fixes after the bot reviewed, request a re-review on the new - SHA before clearing. Document the substitution in the PR body - so the audit trail is clear. - - **Partial because a different required reviewer slot was unfilled** - (codex-cli unavailable, OR claude-cli auth fails — Codex CLI - orchestrator has no sub-agent substitute — OR the orchestrator - slot was unfilled because no explicit `{findings: []}` checklist - pass was produced this round): open as draft and call out the - skip in the PR body so a human can decide whether the remaining - reviewer coverage is sufficient. Don't mark ready until the - skipped slot can be filled or a human explicitly accepts the - gap with rationale in the PR body. -- If it returns `blocked`, stop before opening ready PRs. Open draft PRs only when the user passed `draft` or a draft would help expose the blocker. - - **Special sub-case: blocked because of `verify-round-blocked-by-cap`** (a P0/P1/P2 fix landed in the final permitted `/review-cycle` round). The fix may be correct but no verify round confirmed it. Don't ship — re-run `/review-cycle rounds=N+1` (or higher) to let the verify round complete, then re-attempt `/ship`. Calling this out explicitly because the failure mode looks like "clean" to a literal reader (the tree post-fix surfaces no findings) but actually means "findings were never sought". -- If `/review-cycle` changed files, rerun the relevant validation and documentation checks before committing. - -## Commit And PR - -When the Review Cycle Gate above has been satisfied (either `clean`, -or `partial` with an explicit fallback path documented above), -commit and open PRs in dependency order. Draft vs ready follows the -gate's branch — draft on partial, ready on clean (unless the user -passed `draft`): - -1. Recheck `git status --porcelain` in each included repository. -2. Ensure every branch name is suitable. If needed, create a `codex/ship-` branch per repository. -3. Commit uncommitted shipping changes with concise messages. Do not rewrite or squash existing user commits unless asked. -4. Push upstream branches first, then downstream branches. -5. Create or update PRs with `gh pr create` or `gh pr edit`, upstream first. -6. Use each repo's PR template when present. -7. If an existing PR is draft AND `/review-cycle` returned status `clean` (not `partial`, not `blocked`) AND validation is green AND the user didn't pass `draft`, mark it ready for review with `gh pr ready`. "Now clean" is the Review Cycle Gate output specifically — not a subjective re-read of the working tree. On `partial`, the human runs `gh pr ready` after the partial-branch clearance path documented above (e.g. after Copilot bot has reviewed the current commit and the operator explicitly accepts the bot-for-CLI substitution). Don't auto-ready a draft that came from a partial gate. -8. Include in every PR: - - summary of changes - - validation commands and results - - review rounds run and outcome - - documentation updates or why none were needed - - unresolved accepted non-blockers, if any - - cross-repo dependencies, including upstream PR URLs or exact commits when applicable - -If the user passed `draft`, create draft PRs. Otherwise, create PRs ready for review when the work is clean. Do not make a downstream PR draft solely because it depends on an unmerged upstream PR; link the dependency clearly instead. Use draft only for explicit user preference, documented repo policy, or unresolved blockers. - -## CI Watch And Fix - -After PRs exist, watch CI in dependency order: - -1. Run `gh pr checks --watch --interval 10` for the current upstream PR before advancing to downstream PRs. -2. If checks pass, record the PR URL and green status. -3. If checks fail: - - list failing check names with `gh pr checks --json name,workflow,state,bucket,link` - - inspect failing logs with `gh run view --log-failed` or the check-specific URL - - identify the smallest valid fix - - apply the fix in the isolated worktree - - rerun local validation relevant to the failing check - - rerun reviews when the fix changes code, tests, docs, config, or behavior materially - - commit and push - - resume watching CI -4. If an upstream CI fix changes downstream behavior or artifacts, rerun affected downstream validation, review, and CI. -5. Continue until CI is green across all included PRs or a real blocker remains, such as missing secrets, external service outage, unavailable required credentials, or a failing check that cannot be inspected locally. - -## Final Report - -Return a concise shipping report: - -```text -## Ship Result -- Status: clean | partial | blocked -- Repos: -- Worktrees: -- Branches: -- PRs: -- Validation: -- Reviews: -- Accepted P2 (with rationale): -- Accepted non-blockers (P3/nit): -- Skipped reviewers: -- Docs: -- CI: green | failing | blocked | not configured -- Dependency order: downstream edges or none> -- Remaining: -``` +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/codex/plugins/have/skills/review-cycle/SKILL.md b/codex/plugins/have/skills/review-cycle/SKILL.md new file mode 100644 index 0000000..51d5f6a --- /dev/null +++ b/codex/plugins/have/skills/review-cycle/SKILL.md @@ -0,0 +1,19 @@ +--- +name: review-cycle +description: Use when the user invokes /review-cycle, /have:review-cycle, have:review-cycle, or asks to run HappyVertical's bounded multi-reviewer review, fix, and retest loop before shipping. +metadata: + short-description: Run HappyVertical's ContextForge review cycle +--- + +# Have Review Cycle + +This Codex skill is only an adapter. The authoritative workflow lives in ContextForge. + +Load and follow the Happy Vertical Team prompt: + +- Prompt: `have-review-cycle` +- Resource: `have://happyvertical/workflows/review-cycle` + +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the review-cycle workflow. + +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/codex/plugins/have/skills/ship/SKILL.md b/codex/plugins/have/skills/ship/SKILL.md new file mode 100644 index 0000000..32fdb20 --- /dev/null +++ b/codex/plugins/have/skills/ship/SKILL.md @@ -0,0 +1,19 @@ +--- +name: ship +description: Use when the user invokes /ship, /have:ship, have:ship, or asks to ship current work using HappyVertical's standard workflow. +metadata: + short-description: Run HappyVertical's ContextForge ship workflow +--- + +# Have Ship + +This Codex skill is only an adapter. The authoritative workflow lives in ContextForge. + +Load and follow the Happy Vertical Team prompt: + +- Prompt: `have-ship` +- Resource: `have://happyvertical/workflows/ship` + +Use ContextForge MCP prompts/resources when available. If prompt invocation is unavailable, read the resource by URI. The resource text contains `encoding: base64` and a `payload_base64:` block; decode that payload as UTF-8 markdown and follow it exactly as the ship workflow. + +If ContextForge is unavailable, the prompt is missing, or the resource cannot be read or decoded, stop and report the ContextForge access problem instead of falling back to a stale local workflow. diff --git a/install.sh b/install.sh index 5e6ab1a..c1b9c40 100755 --- a/install.sh +++ b/install.sh @@ -7,12 +7,12 @@ # 2. Registers this repo as a marketplace for Claude Code. # 3. Installs the `have` plugin in Claude Code. # 4. Registers this repo as a marketplace for Codex. -# 5. (Codex auto-enables installed plugins via marketplace add.) +# 5. Enables and syncs the `have` plugin cache in Codex. # 6. With --live, symlinks the cached plugin installs back to this repo # so edits are immediately visible to running sessions. # # Usage: -# ./install.sh # standard install (use `plugin update` after edits) +# ./install.sh # standard install (use `plugin update` after adapter edits) # ./install.sh --live # live mode (cache symlinked to repo) # ./install.sh --uninstall # remove marketplaces + plugins # ./install.sh -h # help @@ -34,6 +34,7 @@ done REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PR_REVIEW_DIR="${PR_REVIEW_DIR:-$HOME/Work/happyvertical/repos/pr-review}" +HAVE_PLUGIN_VERSION="0.1.1" cyan() { printf "\033[36m%s\033[0m\n" "$*"; } green() { printf "\033[32m%s\033[0m\n" "$*"; } @@ -44,6 +45,7 @@ if [[ "$UNINSTALL" -eq 1 ]]; then command -v claude >/dev/null 2>&1 && claude plugin uninstall have@have-config 2>/dev/null || true command -v claude >/dev/null 2>&1 && claude plugin marketplace remove have-config 2>/dev/null || true command -v codex >/dev/null 2>&1 && codex plugin marketplace remove have-config 2>/dev/null || true + rm -rf "$HOME/.codex/plugins/cache/have-config" green "Uninstalled." exit 0 fi @@ -106,7 +108,14 @@ else: print(" Enabled have@have-config in ~/.codex/config.toml.") PY - green " Codex: /have:ship and /have:review-cycle ready (after restart)." + CODEX_CACHE="$HOME/.codex/plugins/cache/have-config/have/$HAVE_PLUGIN_VERSION" + CODEX_SOURCE="$REPO_ROOT/codex/plugins/have" + mkdir -p "$(dirname "$CODEX_CACHE")" + rm -rf "$CODEX_CACHE" + cp -R "$CODEX_SOURCE" "$CODEX_CACHE" + green " Codex cache synced: $CODEX_CACHE" + + green " Codex: have:ship and have:review-cycle skills ready (after restart)." else red " codex CLI not found; skipping Codex install." fi @@ -116,24 +125,33 @@ if [[ "$LIVE" -eq 1 ]]; then cyan "→ Step 4/4: --live mode — symlinking caches to repo" # Claude cache path: ~/.claude/plugins/cache//// - CLAUDE_CACHE="$HOME/.claude/plugins/cache/have-config/have/0.1.0" + CLAUDE_CACHE="$HOME/.claude/plugins/cache/have-config/have/$HAVE_PLUGIN_VERSION" CLAUDE_SOURCE="$REPO_ROOT/claude/have" if [[ -d "$(dirname "$CLAUDE_CACHE")" ]]; then rm -rf "$CLAUDE_CACHE" ln -s "$CLAUDE_SOURCE" "$CLAUDE_CACHE" green " Claude cache symlinked: $CLAUDE_CACHE -> $CLAUDE_SOURCE" - cyan " (Edits to $CLAUDE_SOURCE are now live. Re-run --live after 'claude plugin update' which re-clones the cache.)" + cyan " (Adapter edits to $CLAUDE_SOURCE are now live. Workflow body edits happen in ContextForge.)" else red " Claude cache dir not found at $(dirname "$CLAUDE_CACHE"); install may have failed." fi - # Codex doesn't expose its cache layout the same way; skip symlink for now. - cyan " Codex: --live not implemented (codex marketplace add appears to read directly from source; edits propagate naturally)." + CODEX_CACHE="$HOME/.codex/plugins/cache/have-config/have/$HAVE_PLUGIN_VERSION" + CODEX_SOURCE="$REPO_ROOT/codex/plugins/have" + if [[ -d "$(dirname "$CODEX_CACHE")" ]]; then + rm -rf "$CODEX_CACHE" + ln -s "$CODEX_SOURCE" "$CODEX_CACHE" + green " Codex cache symlinked: $CODEX_CACHE -> $CODEX_SOURCE" + else + red " Codex cache dir not found at $(dirname "$CODEX_CACHE"); install may have failed." + fi else cyan "→ Step 4/4: skipping --live (pass --live to enable live edits)" fi green "" -green "Done. Restart Claude / Codex sessions to pick up the new commands." -green "Try: /have:review-cycle" -green " /have:ship" +green "Done. Restart Claude / Codex sessions to pick up the workflows." +green "Claude: /have:review-cycle" +green " /have:ship" +green "Codex: have:review-cycle" +green " have:ship"