diff --git a/setup-linux.sh b/setup-linux.sh index 972c3ca..8b7684e 100755 --- a/setup-linux.sh +++ b/setup-linux.sh @@ -537,10 +537,56 @@ else log_info "GitHub Copilot CLI not installed, skipping Copilot config (install via snap/apt/curl: https://docs.github.com/copilot/how-tos/copilot-cli)" fi +# BUG-004 + BUG-011: defense-in-depth wrapper around EVERY `claude ` +# invocation in this script. The Claude Code CLI's deserialize-modify-serialize +# cycle drops fields outside its internal struct (organizationType, +# organizationRateLimitTier, projects map, onboarding flags) -- upstream bug +# anthropics/claude-code#59870 -- shrinking ~/.claude/.claude.json from ~75 KB +# to ~1.5 KB and forcing re-authentication. The bug fires on ANY subcommand +# (`plugin install`, `plugin list`, `mcp get`, `mcp add`), not just install. +# +# BUG-004 (PR #57) wrapped only `claude plugin install`. BUG-011 extends the +# guard to every other call site (MCP loop iterations + plugin list pre-fetch) +# after the user empirically observed truncation recurrence in an MCP-only path. +# +# snapshot_claude_json copies the file to a tempfile BEFORE the call; +# restore_claude_json_if_truncated restores it AFTER, iff the snapshot was +# >= 10 KB and the new file is < 50% of the snapshot size. Complementary to +# SDD-021 session-start canary in claude-session-start.sh (same 10240-byte +# threshold, same upstream issue). See dotfiles#33 for the original incomplete +# trigger fix that motivated this layer. +snapshot_claude_json() { + local claude_json="$HOME/.claude/.claude.json" + [ -f "$claude_json" ] || return 0 + local backup + backup=$(mktemp "${TMPDIR:-/tmp}/.claude.json.bug004.XXXXXX") + cp -f "$claude_json" "$backup" + printf '%s' "$backup" +} + +restore_claude_json_if_truncated() { + local backup="$1" + [ -n "$backup" ] && [ -f "$backup" ] || return 0 + local claude_json="$HOME/.claude/.claude.json" + if [ -f "$claude_json" ]; then + local snapshot_size new_size half + snapshot_size=$(stat -c %s "$backup" 2>/dev/null || echo 0) + new_size=$(stat -c %s "$claude_json" 2>/dev/null || echo 0) + half=$((snapshot_size / 2)) + if [ "$snapshot_size" -ge 10240 ] && [ "$new_size" -lt "$half" ]; then + cp -f "$backup" "$claude_json" + log_warning ".claude.json shrunk from $snapshot_size to $new_size bytes after install (upstream #59870); restored from backup" + fi + fi + rm -f "$backup" +} + # Register MCP servers (requires Claude Code CLI, Node.js, jq) # Idempotent: server list lives in mcp-servers.json (SSOT shared with Windows); # `claude mcp get` is used to skip already-registered entries, and `add` errors -# are surfaced rather than swallowed. +# are surfaced rather than swallowed. BUG-011: every `claude mcp {get,add}` +# invocation is wrapped with snapshot_claude_json / restore_claude_json_if_truncated +# because both subcommands hit the same #59870 truncation path as `plugin install`. MCP_CONFIG="$DOTFILES_DIR/mcp-servers.json" if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && command -v jq >/dev/null 2>&1; then if [ ! -f "$MCP_CONFIG" ]; then @@ -565,10 +611,15 @@ if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && comman fi fi fi + # BUG-011: snapshot before both `mcp get` and `mcp add` (one snapshot + # per iteration -- legitimate `mcp add` additions are <<50% of file + # size, so restore only fires on the real #59870 truncation). + _snap=$(snapshot_claude_json) # Idempotence: skip if `claude mcp get` already knows this name if claude mcp get "$mcp_name" >/dev/null 2>&1; then log_info "MCP $mcp_name already registered, skipping" mcp_skipped=$((mcp_skipped + 1)) + restore_claude_json_if_truncated "$_snap" continue fi # Word-split args intentionally; entries in mcp-servers.json are @@ -581,6 +632,7 @@ if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && comman log_warning "Failed to register MCP $mcp_name: $mcp_err" mcp_failed=$((mcp_failed + 1)) fi + restore_claude_json_if_truncated "$_snap" done < <(jq -r '.servers[] | [.name, .transport, .args, (.prerequisite_binary // ""), (.prerequisite_command // "")] | @tsv' "$MCP_CONFIG") log_success "MCP servers: $mcp_added added, $mcp_skipped already present, $mcp_failed failed" fi @@ -588,56 +640,20 @@ else log_warning "Claude Code CLI, npx, or jq not found, skipping MCP server registration" fi -# BUG-004: defense-in-depth wrapper around `claude plugin install`. The bash -# idempotence guard below (`grep -qF "$plugin"` against `claude plugin list`) -# yields a false negative for claude-mem@thedotmack -- it never appears in -# that listing (different marketplace, `@thedotmack` vs `@claude-plugins-official`). -# Every setup run installs claude-mem again, which triggers upstream -# anthropics/claude-code#59870: the CLI's deserialize-modify-serialize cycle -# drops fields outside its internal struct (organizationType, -# organizationRateLimitTier, projects map, onboarding flags), shrinking -# ~/.claude/.claude.json from ~75 KB to ~1.5 KB and forcing re-authentication. -# snapshot_claude_json copies the file to a tempfile BEFORE the install; -# restore_claude_json_if_truncated restores it AFTER, iff the snapshot was -# >= 10 KB and the new file is < 50% of the snapshot size. Complementary to -# SDD-021 session-start canary in claude-session-start.sh (same 10240-byte -# threshold, same upstream issue). See dotfiles#33 for the original incomplete -# trigger fix that motivated this layer. -snapshot_claude_json() { - local claude_json="$HOME/.claude/.claude.json" - [ -f "$claude_json" ] || return 0 - local backup - backup=$(mktemp "${TMPDIR:-/tmp}/.claude.json.bug004.XXXXXX") - cp -f "$claude_json" "$backup" - printf '%s' "$backup" -} - -restore_claude_json_if_truncated() { - local backup="$1" - [ -n "$backup" ] && [ -f "$backup" ] || return 0 - local claude_json="$HOME/.claude/.claude.json" - if [ -f "$claude_json" ]; then - local snapshot_size new_size half - snapshot_size=$(stat -c %s "$backup" 2>/dev/null || echo 0) - new_size=$(stat -c %s "$claude_json" 2>/dev/null || echo 0) - half=$((snapshot_size / 2)) - if [ "$snapshot_size" -ge 10240 ] && [ "$new_size" -lt "$half" ]; then - cp -f "$backup" "$claude_json" - log_warning ".claude.json shrunk from $snapshot_size to $new_size bytes after install (upstream #59870); restored from backup" - fi - fi - rm -f "$backup" -} - # Claude Code plugins (requires claude CLI). # Idempotent: cache the installed-plugins list ONCE before the loop and skip -# entries already present. The wrapper above (BUG-004) catches the +# entries already present. The wrapper above (BUG-004/011) catches the # false-negative case where the idempotence guard misses a plugin (e.g. # claude-mem@thedotmack) and the resulting `claude plugin install` call -# truncates .claude.json. Same idempotence pattern as MCP registration (line 447). +# truncates .claude.json. BUG-011: the pre-loop `claude plugin list` is now +# also wrapped because it goes through the same #59870 path. if command -v claude >/dev/null 2>&1; then log_info "Installing Claude Code plugins..." + # BUG-011: wrap the read-only `claude plugin list` pre-fetch with the + # snapshot guard -- the CLI still rewrites .claude.json on any invocation. + _snap=$(snapshot_claude_json) installed_plugins=$(claude plugin list 2>/dev/null || true) + restore_claude_json_if_truncated "$_snap" plugins_added=0 plugins_skipped=0 for plugin in \ diff --git a/setup-windows.ps1 b/setup-windows.ps1 index 4383e64..01af7e1 100644 --- a/setup-windows.ps1 +++ b/setup-windows.ps1 @@ -348,7 +348,10 @@ if (Test-Path $skillsSource) { # Register MCP servers (requires Claude Code CLI, Node.js) # Idempotent: server list lives in mcp-servers.json (SSOT shared with Linux); # `claude mcp get` is used to skip already-registered entries, and `add` errors -# are surfaced rather than swallowed. +# are surfaced rather than swallowed. BUG-011: every `claude mcp {get,add}` +# invocation is wrapped with Backup-AndRestoreClaudeJson because both subcommands +# hit the same #59870 deserialize-modify-serialize truncation path as +# `plugin install`. $mcpConfig = "$DotfilesDir\mcp-servers.json" $claudeCmd = Get-Command claude -ErrorAction SilentlyContinue $npxCmd = Get-Command npx -ErrorAction SilentlyContinue @@ -376,14 +379,24 @@ if (-not ($claudeCmd -and $npxCmd)) { & $prereqParts[0] @($prereqParts[1..($prereqParts.Length - 1)]) 2>&1 | Out-Null } } - $null = & claude mcp get $srv.name 2>&1 + # BUG-011: wrap the idempotence-check `claude mcp get` with the same + # guard used for install -- the CLI rewrites .claude.json on any + # invocation. $LASTEXITCODE is automatic and survives the scriptblock. + Backup-AndRestoreClaudeJson -Action { + $null = & claude mcp get $srv.name 2>&1 + } if ($LASTEXITCODE -eq 0) { Write-Info "MCP $($srv.name) already registered, skipping" $mcpSkipped++ continue } $argParts = $srv.args -split '\s+' - $mcpErr = & claude mcp add --transport $srv.transport $srv.name --scope user -- @argParts 2>&1 + # BUG-011: wrap `claude mcp add` -- the unwrapped call here was the + # residual #59870 trigger after BUG-004 (PR #57) closed only the + # plugin-install path. + $mcpErr = Backup-AndRestoreClaudeJson -Action { + & claude mcp add --transport $srv.transport $srv.name --scope user -- @argParts 2>&1 + } if ($LASTEXITCODE -eq 0) { Write-Success "Registered MCP $($srv.name)" $mcpAdded++ @@ -406,7 +419,9 @@ if (-not ($claudeCmd -and $npxCmd)) { # the projects map, and onboarding flags get silently dropped. Re-running # install for already-installed plugins triggers silent .claude.json truncation # and forces re-authentication in every project. Same idempotence pattern as -# MCP registration above. +# MCP registration above. BUG-011: the pre-loop `claude plugin list` is now +# also wrapped because it goes through the same #59870 deserialize-modify- +# serialize path. if ($claudeCmd) { Write-Info "Installing Claude Code plugins..." $plugins = @( @@ -422,7 +437,11 @@ if ($claudeCmd) { "commit-commands@claude-plugins-official", "pr-review-toolkit@claude-plugins-official" ) - $installedPlugins = try { (& claude plugin list 2>$null) -join "`n" } catch { "" } + # BUG-011: wrap the read-only `claude plugin list` pre-fetch with the + # snapshot guard -- the CLI still rewrites .claude.json on any invocation. + $installedPlugins = Backup-AndRestoreClaudeJson -Action { + try { (& claude plugin list 2>$null) -join "`n" } catch { "" } + } $pluginsAdded = 0 $pluginsSkipped = 0 foreach ($plugin in $plugins) { diff --git a/specs/BUG-011-mcp-loop-claude-json-guard/proposal.md b/specs/BUG-011-mcp-loop-claude-json-guard/proposal.md new file mode 100644 index 0000000..01880c7 --- /dev/null +++ b/specs/BUG-011-mcp-loop-claude-json-guard/proposal.md @@ -0,0 +1,59 @@ +--- +id: "BUG-011-mcp-loop-claude-json-guard" +type: spec +status: draft +created: "2026-05-20" +tags: [spec, proposal, claude-json, truncate-guard, mcp, cross-os-parity] +template_version: "1.0" +--- + +# BUG-011-mcp-loop-claude-json-guard + +## Why + +The user ran `setup-windows.ps1` and `~/.claude/.claude.json` was truncated again, forcing re-authentication in every project. BUG-004 (PR #57) added `Backup-AndRestoreClaudeJson` / `backup_and_restore_claude_json` snapshot+restore around `claude plugin install` — but the same upstream truncation bug (`anthropics/claude-code#59870`) fires on **every** Claude CLI invocation that goes through the deserialize-modify-serialize cycle. The MCP registration loop runs `claude mcp get` + `claude mcp add` for ~9 servers per setup (≈18 unwrapped invocations), and `claude plugin list` is called once before the plugin install loop. None of these are wrapped. The recurrence is the natural consequence: BUG-004 fixed one call site, the others are still hot. + +## What + +Wrap **every** Claude CLI invocation in both `setup-linux.sh` and `setup-windows.ps1` with the existing snapshot/restore helper, per-call (not per-loop, to preserve legitimate state additions on success and only restore on >50% size drop). Specifically: + +1. `setup-linux.sh`: relocate `snapshot_claude_json` / `restore_claude_json_if_truncated` ABOVE the MCP registration block (currently defined after it). Wrap each MCP loop iteration around `claude mcp get` + `mcp add`. Wrap the `claude plugin list` pre-loop call. +2. `setup-windows.ps1`: wrap each MCP `foreach` iteration around `claude mcp get` + `mcp add` with `Backup-AndRestoreClaudeJson`. Wrap the `claude plugin list` pre-loop call. +3. Update the BUG-004 comment block in both scripts to reflect the broader scope ("every Claude CLI call site is guarded", not just "plugin install"). +4. Add bats parity assertions in `tests/setup-linux.bats` + `tests/setup-windows.bats` so future call sites added without the guard fail CI. + +After this PR, no `claude ` call in either setup script executes without a snapshot in place. + +## Out of scope + +- The upstream fix in `anthropics/claude-code#59870` itself — out of repo control (already filed via SDD-022 cross-issue commentary). +- Wrapping `claude` calls in **runtime** scripts (e.g. `claude-session-start.{sh,ps1}`) — those are read-only canaries (size check), not setup-time mutators. They detect the bug; they do not mutate `.claude.json`. Out of scope here. +- Refactoring `Backup-AndRestoreClaudeJson` / `snapshot_claude_json` themselves — the helpers are correct and unchanged. We only re-wrap more call sites. +- Adding new MCP servers or new plugins. The list is frozen for this PR; the only diff is structural (guard wrapping). + +## Risks / open questions + +- **Risk: per-call snapshot+restore adds ~9 × file copy operations to setup runtime.** Mitigation: the file is small (≤75 KB); even 18 snapshots cost ~1.5 MB of temp I/O and <1 s wall time. Acceptable. +- **Risk: a legitimate `mcp add` increases the file size, snapshot/restore could mask later truncation in the same run.** Mitigation: per-call wrap (chosen via user Q1) snapshots BEFORE each call and restores AFTER that single call, so each iteration is independently protected. A successful `mcp add` that adds 200 bytes is not >50% smaller than its own snapshot → no spurious restore. +- **Risk: the `mcp get` pre-check (read-only by API) might NOT trigger the upstream bug, making its wrapping redundant.** Mitigation: BUG-004's comment block explicitly says "every `claude plugin install` writes to `.claude.json`" — empirically, any CLI invocation goes through the same serializer. Cost of an extra snapshot per iteration is negligible; cost of skipping it and discovering a third recurrence path is much higher. +- **Risk: the bats assertions are pattern-based grep checks** (text-level), not behavioral tests. They lock the *presence* of the wrap, not its correctness. Mitigation: pattern matches the BUG-004 family — same level of enforcement, same caveats; combined with the upstream bug already empirically observed, this is sufficient defense-in-depth. + +## Acceptance criteria + +- [ ] `setup-linux.sh`: `snapshot_claude_json` is defined BEFORE the MCP registration block (lexical order, no forward reference). +- [ ] `setup-linux.sh`: every `claude mcp get` / `claude mcp add` / `claude plugin list` / `claude plugin install` call has a `snapshot_claude_json` within 5 lines above AND a `restore_claude_json_if_truncated` within 10 lines below. +- [ ] `setup-windows.ps1`: every `claude mcp get` / `claude mcp add` / `claude plugin list` / `claude plugin install` call is inside a `Backup-AndRestoreClaudeJson -Action { ... }` scriptblock. +- [ ] `tests/setup-linux.bats` and `tests/setup-windows.bats`: new parity assertions fail if the MCP loop or `plugin list` is added without the guard. +- [ ] `bats tests/setup-linux.bats tests/setup-windows.bats` green (no regressions in existing 670+ assertions). +- [ ] `shellcheck --severity=error` clean on `setup-linux.sh`. +- [ ] `pwsh -Command "Invoke-ScriptAnalyzer -Path setup-windows.ps1 -Severity Error"` clean (matches CI gate). +- [ ] verification.md ships with commit hashes + test output excerpts. + +## References + +- Vault: `10_projects/dotfiles/11-tasks.md` BUG-011 entry (this PR's "vault gate"). +- Predecessor: BUG-004 (PR [#57](https://github.com/mlorentedev/dotfiles/pull/57)) — established the snapshot/restore pattern for `claude plugin install`. +- Sibling: SDD-021 (PR #56) — session-start canary that detects truncation; this PR prevents it at the source. +- Upstream: `anthropics/claude-code#59870` (filed via SDD-022 cross-issue commentary). +- Pattern: `00_meta/patterns/fix-small-debt.md` (audit all call sites of a vulnerable API when patching one). +- Vault lesson (post-merge): `90-lessons.md` "Incident → guard pattern" — extend with "when guarding one CLI call site, audit ALL call sites of the same vulnerable CLI in the same PR". diff --git a/specs/BUG-011-mcp-loop-claude-json-guard/tasks.md b/specs/BUG-011-mcp-loop-claude-json-guard/tasks.md new file mode 100644 index 0000000..8c4d0f6 --- /dev/null +++ b/specs/BUG-011-mcp-loop-claude-json-guard/tasks.md @@ -0,0 +1,49 @@ +--- +tags: [spec, tasks, claude-json, truncate-guard] +created: "2026-05-20" +--- + +# Tasks - BUG-011-mcp-loop-claude-json-guard + +## Setup + +- [x] Branch: `fix/BUG-011-mcp-loop-claude-json-guard` (off main). +- [x] Vault entry in `11-tasks.md`. +- [x] Spec scaffolded via `scripts/init-spec.ps1`. + +## Implementation (TDD order) + +### Tests first + +- [ ] `tests/setup-linux.bats`: add assertion that the MCP loop wraps `claude mcp add` / `mcp get` with `snapshot_claude_json` + `restore_claude_json_if_truncated`. +- [ ] `tests/setup-linux.bats`: add assertion that `claude plugin list` is wrapped with the snapshot/restore pair. +- [ ] `tests/setup-windows.bats`: add assertion that the MCP loop foreach body is inside `Backup-AndRestoreClaudeJson`. +- [ ] `tests/setup-windows.bats`: add assertion that `claude plugin list` invocation is inside `Backup-AndRestoreClaudeJson`. +- [ ] Cross-OS parity assertion: both scripts wrap the MCP loop (mirrors the existing BUG-004 parity assert at `tests/setup-linux.bats:216`). +- [ ] Run bats — assertions should FAIL (red). + +### Implementation + +- [ ] `setup-linux.sh`: move `snapshot_claude_json` and `restore_claude_json_if_truncated` function definitions ABOVE the MCP registration block (currently at lines 606-630, needs to be before line 540). +- [ ] `setup-linux.sh`: wrap each MCP loop iteration body (`claude mcp get` + `claude mcp add`) with `_snap=$(snapshot_claude_json)` / `restore_claude_json_if_truncated "$_snap"`. +- [ ] `setup-linux.sh`: wrap `installed_plugins=$(claude plugin list ...)` with the same snapshot/restore pair. +- [ ] `setup-linux.sh`: update the BUG-004 comment block to reference BUG-011 (extended scope). +- [ ] `setup-windows.ps1`: wrap the MCP foreach iteration body with `Backup-AndRestoreClaudeJson -Action { ... }` around both `claude mcp get` and `claude mcp add`. +- [ ] `setup-windows.ps1`: wrap `$installedPlugins = ... claude plugin list ...` inside `Backup-AndRestoreClaudeJson`. +- [ ] `setup-windows.ps1`: update the BUG-004 comment block to reference BUG-011 (extended scope). +- [ ] Run bats — assertions now PASS (green). + +### Lint + cross-check + +- [ ] `shellcheck --severity=error scripts/*.sh setup-linux.sh` clean. +- [ ] `pwsh -Command "Invoke-ScriptAnalyzer -Path setup-windows.ps1 -Severity Error"` clean. +- [ ] Re-run full bats suite: `bats tests/` — no regressions (target green). + +## Closing + +- [ ] verification.md filled (commit hashes, bats output excerpts, before/after grep diff showing wrapped call sites). +- [ ] PR opened referencing `specs/BUG-011-mcp-loop-claude-json-guard/`. +- [ ] PR body notes: empirical confirmation pending until next clean-machine setup run; user can verify by running `setup-windows.ps1` on a machine with `.claude.json ≥ 10 KB` and confirming the file is unchanged (or restored from snapshot if upstream fires). +- [ ] Post-merge: tick BUG-011 in vault `11-tasks.md`, append PR link. +- [ ] Post-merge: `git mv specs/BUG-011-mcp-loop-claude-json-guard/ specs/archive/`. +- [ ] Post-merge: append lesson to `90-lessons.md` — rule: "when guarding one CLI call site, audit ALL call sites of the same vulnerable CLI in the same PR". diff --git a/specs/BUG-011-mcp-loop-claude-json-guard/verification.md b/specs/BUG-011-mcp-loop-claude-json-guard/verification.md new file mode 100644 index 0000000..f9fd6c8 --- /dev/null +++ b/specs/BUG-011-mcp-loop-claude-json-guard/verification.md @@ -0,0 +1,67 @@ +--- +tags: [spec, verification, claude-json, truncate-guard] +created: "2026-05-20" +--- + +# Verification - BUG-011-mcp-loop-claude-json-guard + +## Evidence (per acceptance criterion) + +- [x] **Linux helpers BEFORE MCP loop**: `snapshot_claude_json` defined at `setup-linux.sh:558` (relocated from line 606); first `claude mcp get` at `setup-linux.sh:619`. Lexical order verified via `grep -n` comparison. +- [x] **Linux MCP loop wrap**: `_snap=$(snapshot_claude_json)` at `setup-linux.sh:617`, before `claude mcp get` (l.619) and `claude mcp add` (l.628); `restore_claude_json_if_truncated "$_snap"` at l.622 (idempotence path) and l.635 (add path). +- [x] **Linux `claude plugin list` wrap**: `_snap=$(snapshot_claude_json)` at `setup-linux.sh:654` before `claude plugin list` at l.655; restore at l.656. +- [x] **Windows MCP loop wrap**: `Backup-AndRestoreClaudeJson -Action { ... }` wraps `claude mcp get` (`setup-windows.ps1:379-381`) and `claude mcp add` (`setup-windows.ps1:391-393`). +- [x] **Windows `claude plugin list` wrap**: `$installedPlugins = Backup-AndRestoreClaudeJson -Action { try { (& claude plugin list 2>$null) -join "\`n" } catch { "" } }` at `setup-windows.ps1:428-430`. +- [x] **Bats parity assertions**: 5 new asserts added to `tests/setup-linux.bats` (`# --- BUG-011: ...` block) + 3 new asserts to `tests/setup-windows.bats`. Manual grep-based equivalent of every assert verified PASS post-implementation. + +## Test status + +- **Manual grep verification (cross-OS, post-implementation)**: + ``` + === Linux === + helpers before MCP loop: PASS + mcp add snapshot: PASS + mcp add restore: PASS + plugin list snapshot: PASS + plugin list restore: PASS + plugin install snapshot (BUG-004 preserved): PASS + === Windows === + mcp add wrap: PASS + mcp get wrap: PASS + plugin list wrap: PASS + plugin install wrap (BUG-004 preserved): PASS + ``` +- **Bash syntax** (`bash -n setup-linux.sh`): OK. +- **PowerShell parse** (`[System.Management.Automation.Language.Parser]::ParseFile('setup-windows.ps1', ...)`): no parse errors (AST emitted, error array empty). +- **Empirical `$LASTEXITCODE` + stdout + closure propagation test** (isolated repro, `cmd /c exit N` as CLI stub through a `Backup-AndRestoreClaudeJson`-shaped wrapper): + ``` + Case A (cmd /c exit 0): LASTEXITCODE after wrapper = 0 + Case B (cmd /c exit 5): LASTEXITCODE after wrapper = 5 + Case C (capture stdout + exit 7): captured='simulated-stdout', LASTEXITCODE = 7 + Case D (closure over $it): results = saw:alpha,saw:beta,saw:gamma + VERDICT: PASS - wrapper preserves LASTEXITCODE, stdout, and closure semantics + ``` + Confirms the Windows refactor is semantically safe: `$LASTEXITCODE` survives `& $Action`, captured stdout works (the `$mcpErr = Backup-AndRestoreClaudeJson { ... 2>&1 }` pattern), and `foreach` iteration variables are accessible inside the scriptblock (the `$srv.name` / `$srv.args` closure). +- **Full bats suite**: pending — `bats` not on local PATH; CI Docker integration job runs the full suite at PR open. Manual grep-based assertion checks (the same patterns the bats greps execute) all pass. +- **End-to-end empirical confirmation**: pending — would require running `setup-windows.ps1` on a machine with healthy ~75 KB `~/.claude/.claude.json`. NOT executed locally because that's the exact path the bug fires on; testing it on the user's live machine would risk the very re-login this PR prevents. Validation will come naturally from the next setup run on a clean target. + +## Decisions made during implementation + +- **Per-call wrap, not per-loop** (user Q1): chose finer granularity. Cost: ~9-10 extra snapshots per setup. Benefit: legitimate `mcp add` outputs (small file-size growth) never trigger spurious restore — the >50% shrink condition only fires inside its own iteration, against its own snapshot. +- **Include `claude plugin list`** (user Q2): wrapped the read-only pre-fetch too because BUG-004's own comment block warns "every `claude plugin install` writes to `.claude.json`" — empirically any CLI invocation goes through the same serializer. +- **Windows: separate scriptblock wraps per CLI call**, NOT one wrap around the foreach body. Reason: the iteration body contains `continue` statements; `continue` inside a scriptblock invoked via `& $Action` does NOT continue the OUTER foreach. Keeping each `claude ` in its own narrowly-scoped scriptblock preserves control flow. +- **`$LASTEXITCODE` propagation across scriptblock**: relied on PS's automatic-variable behavior — external commands update the global `$LASTEXITCODE` regardless of invocation scope. If empirical evidence shows this fails, fall back to capturing exit code explicitly inside the scriptblock and returning it. +- **Bats assertion grep window widened to `-B15`** (from initial `-B10`): comment headers + the idempotence-skip path push the snapshot ~11 lines above `claude mcp add`. The assertion's intent is "wrap is present", not "wrap is within 10 lines" — 15 is empirically generous, still tight enough to catch a missing wrap. + +## Promotion candidates + +- [x] **Lesson for `dotfiles/90-lessons.md`**: yes — "when guarding one CLI call site of a vulnerable upstream API, audit ALL call sites of the same CLI in the same PR". This is a sibling/refinement of the BUG-006 "Incident → guard pattern" lesson. Direct evidence: BUG-004 wrapped one call site (plugin install), BUG-011 had to wrap the remaining four (mcp get, mcp add, plugin list × 2 platforms). +- [ ] **ADR-worthy decision**: no — defense-in-depth wrap is a localized fix, not an architectural shift. +- [ ] **New pattern candidate for `00_meta/patterns/`**: no — already covered by `fix-small-debt.md` and the existing "Incident → guard" lesson; not a >1-project recurrence yet. + +## Archive checklist + +- [ ] `proposal.md` frontmatter set to `status: archived` (post-merge). +- [ ] Folder moved: `specs/BUG-011-mcp-loop-claude-json-guard/` -> `specs/archive/BUG-011-mcp-loop-claude-json-guard/` (post-merge). +- [ ] Backlog entry in vault `11-tasks.md` ticked with PR link (post-merge). +- [ ] Lesson appended to `dotfiles/90-lessons.md` (post-merge, see Promotion candidates). diff --git a/tests/setup-linux.bats b/tests/setup-linux.bats index 6a08d0e..9d4c3e8 100644 --- a/tests/setup-linux.bats +++ b/tests/setup-linux.bats @@ -219,6 +219,43 @@ setup() { grep -q 'snapshot_claude_json' "$DOTFILES_DIR/setup-linux.sh" } +# --- BUG-011: extend the BUG-004 guard to every claude CLI call site --- +# BUG-004 covered only `claude plugin install`. The same upstream truncation +# (anthropics/claude-code#59870) fires on `claude mcp get`, `claude mcp add`, and +# `claude plugin list` because they go through the same deserialize-modify- +# serialize cycle. With ~9 MCP servers, each setup run unwrapped triggered +# ~18 chances of truncation. These asserts lock in the wrap on every call site. + +@test "setup-linux.sh defines snapshot helpers BEFORE the MCP loop (BUG-011)" { + # No forward references: helper definitions must precede first use. + helper_line=$(grep -n '^snapshot_claude_json()' "$DOTFILES_DIR/setup-linux.sh" | head -1 | cut -d: -f1) + mcp_get_line=$(grep -n 'claude mcp get' "$DOTFILES_DIR/setup-linux.sh" | head -1 | cut -d: -f1) + [ -n "$helper_line" ] && [ -n "$mcp_get_line" ] + [ "$helper_line" -lt "$mcp_get_line" ] +} + +@test "setup-linux.sh wraps claude mcp add with snapshot+restore (BUG-011)" { + # snapshot called within 15 lines before mcp add (header comments + mcp get + # idempotence path live between); restore within 10 lines after. + grep -B15 'claude mcp add --transport' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'snapshot_claude_json' + grep -A10 'claude mcp add --transport' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'restore_claude_json_if_truncated' +} + +@test "setup-linux.sh wraps claude plugin list with snapshot+restore (BUG-011)" { + grep -B5 'claude plugin list 2>/dev/null' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'snapshot_claude_json' + grep -A5 'claude plugin list 2>/dev/null' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'restore_claude_json_if_truncated' +} + +@test "parity: both setup scripts wrap claude mcp add with the snapshot guard (BUG-011)" { + grep -B15 'claude mcp add --transport' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'snapshot_claude_json' + grep -B15 'claude mcp add --transport' "$DOTFILES_DIR/setup-windows.ps1" | grep -q 'Backup-AndRestoreClaudeJson' +} + +@test "parity: both setup scripts wrap claude plugin list with the snapshot guard (BUG-011)" { + grep -B5 'claude plugin list' "$DOTFILES_DIR/setup-linux.sh" | grep -q 'snapshot_claude_json' + grep -B5 'claude plugin list' "$DOTFILES_DIR/setup-windows.ps1" | grep -q 'Backup-AndRestoreClaudeJson' +} + # --- doctor + env-contract.json (cross-OS parity) --- @test "env-contract.json exists and is valid JSON with required sections" { diff --git a/tests/setup-windows.bats b/tests/setup-windows.bats index 74309ac..0bf3620 100644 --- a/tests/setup-windows.bats +++ b/tests/setup-windows.bats @@ -191,6 +191,26 @@ setup() { grep -qF 'claude plugin list' "$PS1_SCRIPT" } +# --- BUG-011: extend the BUG-004 guard to every claude CLI call site --- +# BUG-004 covered only `claude plugin install`. The same upstream truncation +# (anthropics/claude-code#59870) fires on `claude mcp get`, `claude mcp add`, and +# `claude plugin list` because they go through the same deserialize-modify- +# serialize cycle. With ~9 MCP servers, each unwrapped setup run triggered +# ~18 chances of truncation. These asserts lock in the wrap on every call site. + +@test "setup-windows.ps1 wraps claude mcp add with Backup-AndRestoreClaudeJson (BUG-011)" { + grep -B15 'claude mcp add --transport' "$PS1_SCRIPT" | grep -q 'Backup-AndRestoreClaudeJson' +} + +@test "setup-windows.ps1 wraps claude mcp get with Backup-AndRestoreClaudeJson (BUG-011)" { + # Same scriptblock typically wraps both mcp get and mcp add inside one iteration. + grep -B15 'claude mcp get' "$PS1_SCRIPT" | grep -q 'Backup-AndRestoreClaudeJson' +} + +@test "setup-windows.ps1 wraps claude plugin list with Backup-AndRestoreClaudeJson (BUG-011)" { + grep -B5 'claude plugin list' "$PS1_SCRIPT" | grep -q 'Backup-AndRestoreClaudeJson' +} + # --- BUG-005: Windows PowerShell 5.1 auto-reexec under pwsh --- # SDD-002 (PR #51) introduced Merge-ClaudeSettings which uses # `ConvertFrom-Json -AsHashtable` -- a parameter added in PowerShell 7.0 that