Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 60 additions & 44 deletions setup-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,56 @@ else
log_info "GitHub Copilot CLI not installed, skipping Copilot config (install via snap/apt/curl: https://docs.github.com/copilot/how-tos/copilot-cli)"
fi

# BUG-004 + BUG-011: defense-in-depth wrapper around EVERY `claude <subcommand>`
# invocation in this script. The Claude Code CLI's deserialize-modify-serialize
# cycle drops fields outside its internal struct (organizationType,
# organizationRateLimitTier, projects map, onboarding flags) -- upstream bug
# anthropics/claude-code#59870 -- shrinking ~/.claude/.claude.json from ~75 KB
# to ~1.5 KB and forcing re-authentication. The bug fires on ANY subcommand
# (`plugin install`, `plugin list`, `mcp get`, `mcp add`), not just install.
#
# BUG-004 (PR #57) wrapped only `claude plugin install`. BUG-011 extends the
# guard to every other call site (MCP loop iterations + plugin list pre-fetch)
# after the user empirically observed truncation recurrence in an MCP-only path.
#
# snapshot_claude_json copies the file to a tempfile BEFORE the call;
# restore_claude_json_if_truncated restores it AFTER, iff the snapshot was
# >= 10 KB and the new file is < 50% of the snapshot size. Complementary to
# SDD-021 session-start canary in claude-session-start.sh (same 10240-byte
# threshold, same upstream issue). See dotfiles#33 for the original incomplete
# trigger fix that motivated this layer.
snapshot_claude_json() {
local claude_json="$HOME/.claude/.claude.json"
[ -f "$claude_json" ] || return 0
local backup
backup=$(mktemp "${TMPDIR:-/tmp}/.claude.json.bug004.XXXXXX")
cp -f "$claude_json" "$backup"
printf '%s' "$backup"
}

restore_claude_json_if_truncated() {
local backup="$1"
[ -n "$backup" ] && [ -f "$backup" ] || return 0
local claude_json="$HOME/.claude/.claude.json"
if [ -f "$claude_json" ]; then
local snapshot_size new_size half
snapshot_size=$(stat -c %s "$backup" 2>/dev/null || echo 0)
new_size=$(stat -c %s "$claude_json" 2>/dev/null || echo 0)
half=$((snapshot_size / 2))
if [ "$snapshot_size" -ge 10240 ] && [ "$new_size" -lt "$half" ]; then
cp -f "$backup" "$claude_json"
log_warning ".claude.json shrunk from $snapshot_size to $new_size bytes after install (upstream #59870); restored from backup"
fi
fi
rm -f "$backup"
}

# Register MCP servers (requires Claude Code CLI, Node.js, jq)
# Idempotent: server list lives in mcp-servers.json (SSOT shared with Windows);
# `claude mcp get` is used to skip already-registered entries, and `add` errors
# are surfaced rather than swallowed.
# are surfaced rather than swallowed. BUG-011: every `claude mcp {get,add}`
# invocation is wrapped with snapshot_claude_json / restore_claude_json_if_truncated
# because both subcommands hit the same #59870 truncation path as `plugin install`.
MCP_CONFIG="$DOTFILES_DIR/mcp-servers.json"
if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && command -v jq >/dev/null 2>&1; then
if [ ! -f "$MCP_CONFIG" ]; then
Expand All @@ -565,10 +611,15 @@ if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && comman
fi
fi
fi
# BUG-011: snapshot before both `mcp get` and `mcp add` (one snapshot
# per iteration -- legitimate `mcp add` additions are <<50% of file
# size, so restore only fires on the real #59870 truncation).
_snap=$(snapshot_claude_json)
# Idempotence: skip if `claude mcp get` already knows this name
if claude mcp get "$mcp_name" >/dev/null 2>&1; then
log_info "MCP $mcp_name already registered, skipping"
mcp_skipped=$((mcp_skipped + 1))
restore_claude_json_if_truncated "$_snap"
continue
fi
# Word-split args intentionally; entries in mcp-servers.json are
Expand All @@ -581,63 +632,28 @@ if command -v claude >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && comman
log_warning "Failed to register MCP $mcp_name: $mcp_err"
mcp_failed=$((mcp_failed + 1))
fi
restore_claude_json_if_truncated "$_snap"
done < <(jq -r '.servers[] | [.name, .transport, .args, (.prerequisite_binary // ""), (.prerequisite_command // "")] | @tsv' "$MCP_CONFIG")
log_success "MCP servers: $mcp_added added, $mcp_skipped already present, $mcp_failed failed"
fi
else
log_warning "Claude Code CLI, npx, or jq not found, skipping MCP server registration"
fi

# BUG-004: defense-in-depth wrapper around `claude plugin install`. The bash
# idempotence guard below (`grep -qF "$plugin"` against `claude plugin list`)
# yields a false negative for claude-mem@thedotmack -- it never appears in
# that listing (different marketplace, `@thedotmack` vs `@claude-plugins-official`).
# Every setup run installs claude-mem again, which triggers upstream
# anthropics/claude-code#59870: the CLI's deserialize-modify-serialize cycle
# drops fields outside its internal struct (organizationType,
# organizationRateLimitTier, projects map, onboarding flags), shrinking
# ~/.claude/.claude.json from ~75 KB to ~1.5 KB and forcing re-authentication.
# snapshot_claude_json copies the file to a tempfile BEFORE the install;
# restore_claude_json_if_truncated restores it AFTER, iff the snapshot was
# >= 10 KB and the new file is < 50% of the snapshot size. Complementary to
# SDD-021 session-start canary in claude-session-start.sh (same 10240-byte
# threshold, same upstream issue). See dotfiles#33 for the original incomplete
# trigger fix that motivated this layer.
snapshot_claude_json() {
local claude_json="$HOME/.claude/.claude.json"
[ -f "$claude_json" ] || return 0
local backup
backup=$(mktemp "${TMPDIR:-/tmp}/.claude.json.bug004.XXXXXX")
cp -f "$claude_json" "$backup"
printf '%s' "$backup"
}

restore_claude_json_if_truncated() {
local backup="$1"
[ -n "$backup" ] && [ -f "$backup" ] || return 0
local claude_json="$HOME/.claude/.claude.json"
if [ -f "$claude_json" ]; then
local snapshot_size new_size half
snapshot_size=$(stat -c %s "$backup" 2>/dev/null || echo 0)
new_size=$(stat -c %s "$claude_json" 2>/dev/null || echo 0)
half=$((snapshot_size / 2))
if [ "$snapshot_size" -ge 10240 ] && [ "$new_size" -lt "$half" ]; then
cp -f "$backup" "$claude_json"
log_warning ".claude.json shrunk from $snapshot_size to $new_size bytes after install (upstream #59870); restored from backup"
fi
fi
rm -f "$backup"
}

# Claude Code plugins (requires claude CLI).
# Idempotent: cache the installed-plugins list ONCE before the loop and skip
# entries already present. The wrapper above (BUG-004) catches the
# entries already present. The wrapper above (BUG-004/011) catches the
# false-negative case where the idempotence guard misses a plugin (e.g.
# claude-mem@thedotmack) and the resulting `claude plugin install` call
# truncates .claude.json. Same idempotence pattern as MCP registration (line 447).
# truncates .claude.json. BUG-011: the pre-loop `claude plugin list` is now
# also wrapped because it goes through the same #59870 path.
if command -v claude >/dev/null 2>&1; then
log_info "Installing Claude Code plugins..."
# BUG-011: wrap the read-only `claude plugin list` pre-fetch with the
# snapshot guard -- the CLI still rewrites .claude.json on any invocation.
_snap=$(snapshot_claude_json)
installed_plugins=$(claude plugin list 2>/dev/null || true)
restore_claude_json_if_truncated "$_snap"
plugins_added=0
plugins_skipped=0
for plugin in \
Expand Down
29 changes: 24 additions & 5 deletions setup-windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,10 @@ if (Test-Path $skillsSource) {
# Register MCP servers (requires Claude Code CLI, Node.js)
# Idempotent: server list lives in mcp-servers.json (SSOT shared with Linux);
# `claude mcp get` is used to skip already-registered entries, and `add` errors
# are surfaced rather than swallowed.
# are surfaced rather than swallowed. BUG-011: every `claude mcp {get,add}`
# invocation is wrapped with Backup-AndRestoreClaudeJson because both subcommands
# hit the same #59870 deserialize-modify-serialize truncation path as
# `plugin install`.
$mcpConfig = "$DotfilesDir\mcp-servers.json"
$claudeCmd = Get-Command claude -ErrorAction SilentlyContinue
$npxCmd = Get-Command npx -ErrorAction SilentlyContinue
Expand Down Expand Up @@ -376,14 +379,24 @@ if (-not ($claudeCmd -and $npxCmd)) {
& $prereqParts[0] @($prereqParts[1..($prereqParts.Length - 1)]) 2>&1 | Out-Null
}
}
$null = & claude mcp get $srv.name 2>&1
# BUG-011: wrap the idempotence-check `claude mcp get` with the same
# guard used for install -- the CLI rewrites .claude.json on any
# invocation. $LASTEXITCODE is automatic and survives the scriptblock.
Backup-AndRestoreClaudeJson -Action {
$null = & claude mcp get $srv.name 2>&1
}
if ($LASTEXITCODE -eq 0) {
Write-Info "MCP $($srv.name) already registered, skipping"
$mcpSkipped++
continue
}
$argParts = $srv.args -split '\s+'
$mcpErr = & claude mcp add --transport $srv.transport $srv.name --scope user -- @argParts 2>&1
# BUG-011: wrap `claude mcp add` -- the unwrapped call here was the
# residual #59870 trigger after BUG-004 (PR #57) closed only the
# plugin-install path.
$mcpErr = Backup-AndRestoreClaudeJson -Action {
& claude mcp add --transport $srv.transport $srv.name --scope user -- @argParts 2>&1
}
if ($LASTEXITCODE -eq 0) {
Write-Success "Registered MCP $($srv.name)"
$mcpAdded++
Expand All @@ -406,7 +419,9 @@ if (-not ($claudeCmd -and $npxCmd)) {
# the projects map, and onboarding flags get silently dropped. Re-running
# install for already-installed plugins triggers silent .claude.json truncation
# and forces re-authentication in every project. Same idempotence pattern as
# MCP registration above.
# MCP registration above. BUG-011: the pre-loop `claude plugin list` is now
# also wrapped because it goes through the same #59870 deserialize-modify-
# serialize path.
if ($claudeCmd) {
Write-Info "Installing Claude Code plugins..."
$plugins = @(
Expand All @@ -422,7 +437,11 @@ if ($claudeCmd) {
"commit-commands@claude-plugins-official",
"pr-review-toolkit@claude-plugins-official"
)
$installedPlugins = try { (& claude plugin list 2>$null) -join "`n" } catch { "" }
# BUG-011: wrap the read-only `claude plugin list` pre-fetch with the
# snapshot guard -- the CLI still rewrites .claude.json on any invocation.
$installedPlugins = Backup-AndRestoreClaudeJson -Action {
try { (& claude plugin list 2>$null) -join "`n" } catch { "" }
}
$pluginsAdded = 0
$pluginsSkipped = 0
foreach ($plugin in $plugins) {
Expand Down
59 changes: 59 additions & 0 deletions specs/BUG-011-mcp-loop-claude-json-guard/proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
id: "BUG-011-mcp-loop-claude-json-guard"
type: spec
status: draft
created: "2026-05-20"
tags: [spec, proposal, claude-json, truncate-guard, mcp, cross-os-parity]
template_version: "1.0"
---

# BUG-011-mcp-loop-claude-json-guard

## Why

The user ran `setup-windows.ps1` and `~/.claude/.claude.json` was truncated again, forcing re-authentication in every project. BUG-004 (PR #57) added `Backup-AndRestoreClaudeJson` / `backup_and_restore_claude_json` snapshot+restore around `claude plugin install` — but the same upstream truncation bug (`anthropics/claude-code#59870`) fires on **every** Claude CLI invocation that goes through the deserialize-modify-serialize cycle. The MCP registration loop runs `claude mcp get` + `claude mcp add` for ~9 servers per setup (≈18 unwrapped invocations), and `claude plugin list` is called once before the plugin install loop. None of these are wrapped. The recurrence is the natural consequence: BUG-004 fixed one call site, the others are still hot.

## What

Wrap **every** Claude CLI invocation in both `setup-linux.sh` and `setup-windows.ps1` with the existing snapshot/restore helper, per-call (not per-loop, to preserve legitimate state additions on success and only restore on >50% size drop). Specifically:

1. `setup-linux.sh`: relocate `snapshot_claude_json` / `restore_claude_json_if_truncated` ABOVE the MCP registration block (currently defined after it). Wrap each MCP loop iteration around `claude mcp get` + `mcp add`. Wrap the `claude plugin list` pre-loop call.
2. `setup-windows.ps1`: wrap each MCP `foreach` iteration around `claude mcp get` + `mcp add` with `Backup-AndRestoreClaudeJson`. Wrap the `claude plugin list` pre-loop call.
3. Update the BUG-004 comment block in both scripts to reflect the broader scope ("every Claude CLI call site is guarded", not just "plugin install").
4. Add bats parity assertions in `tests/setup-linux.bats` + `tests/setup-windows.bats` so future call sites added without the guard fail CI.

After this PR, no `claude <subcommand>` call in either setup script executes without a snapshot in place.

## Out of scope

- The upstream fix in `anthropics/claude-code#59870` itself — out of repo control (already filed via SDD-022 cross-issue commentary).
- Wrapping `claude` calls in **runtime** scripts (e.g. `claude-session-start.{sh,ps1}`) — those are read-only canaries (size check), not setup-time mutators. They detect the bug; they do not mutate `.claude.json`. Out of scope here.
- Refactoring `Backup-AndRestoreClaudeJson` / `snapshot_claude_json` themselves — the helpers are correct and unchanged. We only re-wrap more call sites.
- Adding new MCP servers or new plugins. The list is frozen for this PR; the only diff is structural (guard wrapping).

## Risks / open questions

- **Risk: per-call snapshot+restore adds ~9 × file copy operations to setup runtime.** Mitigation: the file is small (≤75 KB); even 18 snapshots cost ~1.5 MB of temp I/O and <1 s wall time. Acceptable.
- **Risk: a legitimate `mcp add` increases the file size, snapshot/restore could mask later truncation in the same run.** Mitigation: per-call wrap (chosen via user Q1) snapshots BEFORE each call and restores AFTER that single call, so each iteration is independently protected. A successful `mcp add` that adds 200 bytes is not >50% smaller than its own snapshot → no spurious restore.
- **Risk: the `mcp get` pre-check (read-only by API) might NOT trigger the upstream bug, making its wrapping redundant.** Mitigation: BUG-004's comment block explicitly says "every `claude plugin install` writes to `.claude.json`" — empirically, any CLI invocation goes through the same serializer. Cost of an extra snapshot per iteration is negligible; cost of skipping it and discovering a third recurrence path is much higher.
- **Risk: the bats assertions are pattern-based grep checks** (text-level), not behavioral tests. They lock the *presence* of the wrap, not its correctness. Mitigation: pattern matches the BUG-004 family — same level of enforcement, same caveats; combined with the upstream bug already empirically observed, this is sufficient defense-in-depth.

## Acceptance criteria

- [ ] `setup-linux.sh`: `snapshot_claude_json` is defined BEFORE the MCP registration block (lexical order, no forward reference).
- [ ] `setup-linux.sh`: every `claude mcp get` / `claude mcp add` / `claude plugin list` / `claude plugin install` call has a `snapshot_claude_json` within 5 lines above AND a `restore_claude_json_if_truncated` within 10 lines below.
- [ ] `setup-windows.ps1`: every `claude mcp get` / `claude mcp add` / `claude plugin list` / `claude plugin install` call is inside a `Backup-AndRestoreClaudeJson -Action { ... }` scriptblock.
- [ ] `tests/setup-linux.bats` and `tests/setup-windows.bats`: new parity assertions fail if the MCP loop or `plugin list` is added without the guard.
- [ ] `bats tests/setup-linux.bats tests/setup-windows.bats` green (no regressions in existing 670+ assertions).
- [ ] `shellcheck --severity=error` clean on `setup-linux.sh`.
- [ ] `pwsh -Command "Invoke-ScriptAnalyzer -Path setup-windows.ps1 -Severity Error"` clean (matches CI gate).
- [ ] verification.md ships with commit hashes + test output excerpts.

## References

- Vault: `10_projects/dotfiles/11-tasks.md` BUG-011 entry (this PR's "vault gate").
- Predecessor: BUG-004 (PR [#57](https://github.com/mlorentedev/dotfiles/pull/57)) — established the snapshot/restore pattern for `claude plugin install`.
- Sibling: SDD-021 (PR #56) — session-start canary that detects truncation; this PR prevents it at the source.
- Upstream: `anthropics/claude-code#59870` (filed via SDD-022 cross-issue commentary).
- Pattern: `00_meta/patterns/fix-small-debt.md` (audit all call sites of a vulnerable API when patching one).
- Vault lesson (post-merge): `90-lessons.md` "Incident → guard pattern" — extend with "when guarding one CLI call site, audit ALL call sites of the same vulnerable CLI in the same PR".
Loading
Loading