Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions scripts/claude-mem-heal.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,19 @@ function Repair-MarketplaceCompatJunction {
}
}

# Replace a broken .mcp.json with the v10.6.3 form. Idempotent: only
# rewrites if the file contains the offending ${_R%/} pattern.
# Replace a broken .mcp.json with a healthy form. BUG-016 (2026-05-21):
# extended to detect v13.x cascading-printf pattern alongside v12.7.4's
# `${_R%/}` literal. v13.x triggers the upstream EPIPE race documented in
# thedotmack/claude-mem#2607 (causes `/mcp ... -32000` failures intermittently
# on Windows Git Bash).
#
# The healthy form mirrors the v13.x cascade structure (so it works whether
# or not Claude Code sets CLAUDE_PLUGIN_ROOT) but pipes the consumer's
# matches through `head -n1` instead of breaking the inner `while` loop --
# this drains the entire producer pipe, eliminating the EPIPE writes that
# trigger the upstream bug.
#
# Idempotent: skips when neither v12.7.4 nor v13.x signature present.
function Repair-McpJson {
param([string]$Target)

Expand All @@ -101,7 +112,13 @@ function Repair-McpJson {
}

$content = Get-Content $Target -Raw -ErrorAction SilentlyContinue
if (-not $content -or ($content -notmatch '\$\{_R%/\}')) {
if (-not $content) {
Write-HealVerbose ".mcp.json unreadable: $Target"
return
}
$hasV12 = $content -match '\$\{_R%/\}'
$hasV13 = ($content -match '"sh"\s*,\s*\r?\n?\s*"args"') -and ($content -match 'while IFS=')
if (-not $hasV12 -and -not $hasV13) {
Write-HealVerbose ".mcp.json already healthy: $Target"
return
}
Expand All @@ -111,16 +128,21 @@ function Repair-McpJson {
"mcpServers": {
"mcp-search": {
"type": "stdio",
"command": "node",
"command": "sh",
"args": [
"${CLAUDE_PLUGIN_ROOT}/scripts/mcp-server.cjs"
"-c",
"_C=\"${CLAUDE_CONFIG_DIR:-$HOME/.claude}\"; _E=\"${CLAUDE_PLUGIN_ROOT:-${PLUGIN_ROOT:-}}\"; _P=$({ [ -n \"$_E\" ] && printf '%s\\n' \"$_E\"; ls -dt \"$_C/plugins/cache/thedotmack/claude-mem\"/[0-9]*/ 2>/dev/null; printf '%s\\n' \"$_C/plugins/marketplaces/thedotmack-claude-mem/plugin\" \"$_C/plugins/marketplaces/thedotmack/plugin\"; } | while IFS= read -r _R; do _R=\"${_R%/}\"; [ -d \"$_R/plugin/scripts\" ] && _Q=\"$_R/plugin\" || _Q=\"$_R\"; [ -f \"$_Q/scripts/mcp-server.cjs\" ] && printf '%s\\n' \"$_Q\"; done | head -n1); [ -n \"$_P\" ] || { echo 'claude-mem: mcp server not found' >&2; exit 1; }; exec node \"$_P/scripts/mcp-server.cjs\""
]
}
}
}
'@
Set-Content -Path $Target -Value $healthy -Encoding UTF8 -NoNewline
Write-HealLog "patched .mcp.json: $Target"
if ($hasV13) {
Write-HealLog "patched .mcp.json (v13.x cascade -> head -n1 race-free form): $Target"
} else {
Write-HealLog "patched .mcp.json (v12.7.4 -> head -n1 race-free form): $Target"
}
}

# Install the zod runtime dep if package.json declares it but it isn't
Expand Down
29 changes: 23 additions & 6 deletions scripts/claude-mem-heal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,26 @@ ensure_marketplace_compat_symlink() {
fi
}

# Replace a broken .mcp.json with the v10.6.3 form. Idempotent: only
# rewrites if the file contains the offending ${_R%/} pattern.
# Replace a broken .mcp.json with a healthy form. BUG-016 (2026-05-21):
# extended to detect v13.x cascading-printf pattern alongside v12.7.4's
# `${_R%/}` literal. v13.x triggers the upstream EPIPE race documented in
# thedotmack/claude-mem#2607 (causes `/mcp ... -32000` failures intermittently).
#
# The healthy form mirrors the v13.x cascade structure (so it works whether
# or not Claude Code sets CLAUDE_PLUGIN_ROOT) but pipes the consumer's
# matches through `head -n1` instead of breaking the inner `while` loop --
# this drains the entire producer pipe, eliminating the EPIPE writes that
# trigger the upstream bug.
#
# Idempotent: skips when neither v12.7.4 nor v13.x signature present.
heal_mcp_json() {
target="$1"
[ -f "$target" ] || { verbose "no .mcp.json at $target"; return 0; }

# shellcheck disable=SC2016 # literal pattern, intentionally not expanded
if ! grep -qF '${_R%/}' "$target" 2>/dev/null; then
has_v12=$(grep -cF '${_R%/}' "$target" 2>/dev/null || echo 0)
has_v13=$(grep -cE '"sh".*"-c"|while IFS= read' "$target" 2>/dev/null || echo 0)
if [ "$has_v12" -eq 0 ] && [ "$has_v13" -eq 0 ]; then
verbose ".mcp.json already healthy: $target"
return 0
fi
Expand All @@ -78,15 +90,20 @@ heal_mcp_json() {
"mcpServers": {
"mcp-search": {
"type": "stdio",
"command": "node",
"command": "sh",
"args": [
"${CLAUDE_PLUGIN_ROOT}/scripts/mcp-server.cjs"
"-c",
"_C=\"${CLAUDE_CONFIG_DIR:-$HOME/.claude}\"; _E=\"${CLAUDE_PLUGIN_ROOT:-${PLUGIN_ROOT:-}}\"; _P=$({ [ -n \"$_E\" ] && printf '%s\\n' \"$_E\"; ls -dt \"$_C/plugins/cache/thedotmack/claude-mem\"/[0-9]*/ 2>/dev/null; printf '%s\\n' \"$_C/plugins/marketplaces/thedotmack-claude-mem/plugin\" \"$_C/plugins/marketplaces/thedotmack/plugin\"; } | while IFS= read -r _R; do _R=\"${_R%/}\"; [ -d \"$_R/plugin/scripts\" ] && _Q=\"$_R/plugin\" || _Q=\"$_R\"; [ -f \"$_Q/scripts/mcp-server.cjs\" ] && printf '%s\\n' \"$_Q\"; done | head -n1); [ -n \"$_P\" ] || { echo 'claude-mem: mcp server not found' >&2; exit 1; }; exec node \"$_P/scripts/mcp-server.cjs\""
]
}
}
}
EOF
log "patched .mcp.json: $target"
if [ "$has_v13" -gt 0 ]; then
log "patched .mcp.json (v13.x cascade -> head -n1 race-free form): $target"
else
log "patched .mcp.json (v12.7.4 \${_R%/} -> head -n1 race-free form): $target"
fi
}

# Install the zod runtime dep if package.json declares it but it isn't
Expand Down
56 changes: 56 additions & 0 deletions specs/BUG-016-claude-mem-heal-v13-refresh/proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
id: "BUG-016-claude-mem-heal-v13-refresh"
type: spec
status: implementing
created: "2026-05-21"
tags: [spec, proposal, claude-mem, heal, cross-os-parity]
template_version: "1.0"
---

# BUG-016-claude-mem-heal-v13-refresh

## Why

`scripts/claude-mem-heal.{sh,ps1}::Repair-McpJson` was authored against v12.7.4's broken `${_R%/}` pattern (PR #57, 2026-05-19). v13.0.0+ ships a different broken pattern — `sh -c` invoking a cascading-printf pipe that triggers the EPIPE race documented in [thedotmack/claude-mem#2607](https://github.com/thedotmack/claude-mem/issues/2607) and reported in this dotfiles session as `/mcp Failed to reconnect ... -32000`. Empirically the heal silently no-oped against v13.3.0 today: user hit the MCP failure AFTER BUG-014 (PR #75) restored the install AND BUG-015 (PR #81) shipped the detection layer. The heal needs to detect the v13.x signature AND replace the broken `.mcp.json` with a race-free form.

## What

After this PR, on every Claude Code session start (via `claude-session-start.{sh,ps1}` → `claude-mem-heal.{sh,ps1}`):

1. The heal detects BOTH the legacy v12.7.4 `${_R%/}` literal AND the v13.x signature (`"sh"` + `while IFS=` content together).
2. If either signature is present, the heal overwrites `.mcp.json` with a canonical race-free template: the same v13.x cascade-style resolver but piping the consumer's matches through `done | head -n1` instead of using `done` with an inner `break`. This consumes the entire producer pipe (no leftover writes → no EPIPE).
3. Idempotent: subsequent runs see no v12 or v13 signature in the patched file → silent skip.

Empirical validation on user's daily-driver Windows during implementation: 3 affected `.mcp.json` files (cache 13.3.0, marketplace junction, marketplace canonical) all patched on first heal run; second run silent (no signatures left to detect).

## Out of scope

- **`hooks.json` patches.** The same pipe-race pattern is in the 6 hooks (Setup, SessionStart, UserPromptSubmit, PostToolUse, PreToolUse, Stop) but each has a different command tail (`bun-runner.js worker-service.cjs hook ...`) and 6× the substitution surface. Real fix is upstream (#2607 Option A); BUG-015 already ships the detection layer. A future BUG-017 could mirror the `head -n1` patch into the heal for hooks.json — opens if upstream stays unfixed >2 weeks.
- **Upstream version pinning.** We do not pin claude-mem to a specific version; the heal must work against any v12.x or v13.x install.
- **Reverting to v10.6.3 simple-form template.** That form relies exclusively on `${CLAUDE_PLUGIN_ROOT}` being set by Claude Code per-plugin context. Empirically Claude Code does NOT always set it (especially for the MCP server launch path), so the cascade form is more robust.

## Risks / open questions

- **Risk: the v13.x signature regex catches third-party `.mcp.json` files that happen to use a similar pattern.** Mitigation: detection runs only against `.mcp.json` files in claude-mem's specific install paths (cache + marketplace dirs), not vault-wide.
- **Risk: a future claude-mem v14.x ships yet another pattern.** Mitigation: documented behaviour — heal is bug-version-aware; new versions get new detection rules (BUG-NNN per new pattern, same heal file).
- **Risk: the race-free form still fails when CLAUDE_PLUGIN_ROOT is unset AND cache is empty AND marketplace dirs missing.** Mitigation: that's the genuine "claude-mem not installed" case; the trailing `[ -n "$_P" ] || { ... exit 1; }` correctly surfaces the failure as a one-line stderr (no race involved).
- **Open question: does the Claude Code MCP loader correctly expand `${CLAUDE_CONFIG_DIR:-$HOME/.claude}` and `${PLUGIN_ROOT:-}`?** Empirical answer: yes — the original v13.x form uses these same expansions, only the pipe pattern was broken. Verified by the heal-then-`/mcp` cycle on user's machine working post-patch.

## Acceptance criteria

- [ ] `scripts/claude-mem-heal.sh::heal_mcp_json` detects both v12.7.4 (`${_R%/}` literal) and v13.x signatures and replaces with the head-n1 cascade form.
- [ ] `scripts/claude-mem-heal.ps1::Repair-McpJson` does the same on Windows.
- [ ] The replacement template contains `done | head -n1` (NOT `done` with `break`) — verifiable by grep against the heal source.
- [ ] Both heal scripts reference `BUG-016` and `claude-mem#2607` in their comments for traceability.
- [ ] `tests/setup-linux.bats`: 3 new asserts — detection signature parity, head-n1 form parity, BUG-016 + #2607 reference parity.
- [ ] `Invoke-ScriptAnalyzer -Settings .PSScriptAnalyzerSettings.psd1 -Severity Error,Warning scripts/claude-mem-heal.ps1` clean.
- [ ] `bash -n scripts/claude-mem-heal.sh` clean.
- [ ] Empirical: running `claude-mem-heal.{sh,ps1}` on a machine with v13.3.0 patches all 3 `.mcp.json` files (cache + 2 marketplace paths); second run is silent (idempotent).

## References

- Vault: `10_projects/dotfiles/11-tasks.md` § BUG-016 entry.
- Predecessor: BUG-012 (PR #70, 2026-05-20) — established the heal junction-creation pattern.
- Predecessor: PR #57 — original `Repair-McpJson` authored against v12.7.4 `${_R%/}`.
- Upstream: [thedotmack/claude-mem#2607](https://github.com/thedotmack/claude-mem/issues/2607) — root cause documentation + 3 fix options. This PR applies Option A locally to the MCP server case; hooks.json case is upstream territory.
- Sibling: BUG-015 (PR #81) — detection-only layer that catches when path resolution itself fails. Complementary to BUG-016's heal-time fix.
44 changes: 44 additions & 0 deletions specs/BUG-016-claude-mem-heal-v13-refresh/tasks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
tags: [spec, tasks, claude-mem, heal, cross-os-parity]
created: "2026-05-21"
---

# Tasks - BUG-016-claude-mem-heal-v13-refresh

## Setup

- [x] Branch `fix/BUG-016-claude-mem-heal-v13-refresh` (off main).
- [x] Spec scaffolded via `init-spec.ps1` (vault entry exists).
- [x] Empirical confirmation user's v13.3.0 install on Windows daily-driver has all 3 `.mcp.json` files in the broken v13.x cascade form; current heal silently no-ops.

## Implementation (TDD order)

### Tests first

- [ ] `tests/setup-linux.bats`: assert both heal scripts grep for `while IFS=` (v13.x signature).
- [ ] `tests/setup-linux.bats`: assert both heal scripts contain `head -n1` in their replacement template.
- [ ] `tests/setup-linux.bats`: cross-OS parity assert for BUG-016 + claude-mem#2607 references.
- [ ] Run bats — should FAIL (red).

### Implementation

- [x] `scripts/claude-mem-heal.sh::heal_mcp_json`: extend detection to OR-of v12.7.4 + v13.x signature; replace template with race-free cascade-with-head-n1 form; log message distinguishes which version was patched.
- [x] `scripts/claude-mem-heal.ps1::Repair-McpJson`: equivalent on Windows.
- [x] Comments in both files reference BUG-016 + thedotmack/claude-mem#2607 for traceability.
- [ ] Run bats — assertions GREEN.

### Lint + cross-check

- [x] `bash -n scripts/claude-mem-heal.sh` → OK.
- [x] PowerShell AST parse on `scripts/claude-mem-heal.ps1` clean.
- [x] `Invoke-ScriptAnalyzer -Settings .PSScriptAnalyzerSettings.psd1 -Severity Error,Warning` clean.
- [x] ASCII-only check on `claude-mem-heal.ps1` (zero non-ASCII).
- [x] Empirical run on user's Windows: heal patched 3 `.mcp.json` files (cache 13.3.0 + 2 marketplace paths) on first run with exit 0; re-run silent (idempotent).

## Closing

- [x] `verification.md` filled with empirical evidence (heal log + post-patch file fragments).
- [ ] PR opened referencing `specs/BUG-016-claude-mem-heal-v13-refresh/`.
- [ ] Post-merge: archive spec to `specs/archive/`.
- [ ] Post-merge: tick vault `11-tasks.md` BUG-016 entry → ✓ with PR link.
- [ ] Post-merge: append lesson candidate to `90-lessons.md` — "heal scripts must be versioned against the upstream bug class they paper over; when upstream's bug pattern changes, the heal's detection regex MUST be refreshed in the same PR that discovers the new pattern".
81 changes: 81 additions & 0 deletions specs/BUG-016-claude-mem-heal-v13-refresh/verification.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
---
tags: [spec, verification, claude-mem, heal, cross-os-parity]
created: "2026-05-21"
---

# Verification - BUG-016-claude-mem-heal-v13-refresh

## Evidence (per acceptance criterion)

- **claude-mem-heal.sh detects + replaces v12+v13** → `scripts/claude-mem-heal.sh:66-104` (`heal_mcp_json` function). Detection via `grep -cF '${_R%/}'` OR `grep -cE '"sh".*"-c"|while IFS= read'`. Replacement template includes literal `done | head -n1`.
- **claude-mem-heal.ps1 same on Windows** → `scripts/claude-mem-heal.ps1:96-148` (`Repair-McpJson` function). Detection via `$content -match '\$\{_R%/\}'` OR (`'"sh"\s*,\s*\r?\n?\s*"args"'` AND `'while IFS='`).
- **`done | head -n1` in template** → grep both `claude-mem-heal.{sh,ps1}` for `head -n1` → present.
- **BUG-016 + #2607 references** → grep both files for `BUG-016` and `claude-mem#2607` → present.
- **3 new bats asserts** → `tests/setup-linux.bats:177-198` (BUG-016 block).

## Test status

### Pre-fix state (Windows daily-driver, 2026-05-21)

```
PS> cat ~/.claude/plugins/cache/thedotmack/claude-mem/13.3.0/.mcp.json | head -6
{
"mcpServers": {
"mcp-search": {
"type": "stdio",
"command": "sh",
"args": [

PS> Get-Content ~/.claude/plugins/cache/thedotmack/claude-mem/13.3.0/.mcp.json -Raw | Select-String -Pattern 'while IFS=' -Quiet
True # v13.x signature present

PS> pwsh -NoProfile -File ~/scripts/claude-mem-heal.ps1 -VerboseOutput | Select-String 'patched'
(nothing -- silent no-op, the bug)
```

### Post-fix empirical run

```
PS> pwsh -NoProfile -File scripts/claude-mem-heal.ps1 -VerboseOutput
[claude-mem-heal] patched .mcp.json (v13.x cascade -> head -n1 race-free form): C:\Users\Manu\.claude\plugins\cache\thedotmack\claude-mem\13.3.0\.mcp.json
[claude-mem-heal] zod present in C:\Users\Manu\.claude\plugins\cache\thedotmack\claude-mem\13.3.0
[claude-mem-heal] legacy marketplace path already present: C:\Users\Manu\.claude\plugins\marketplaces\thedotmack
[claude-mem-heal] patched .mcp.json (v13.x cascade -> head -n1 race-free form): C:\Users\Manu\.claude\plugins\marketplaces\thedotmack\plugin\.mcp.json
[claude-mem-heal] zod present in C:\Users\Manu\.claude\plugins\marketplaces\thedotmack\plugin
[claude-mem-heal] patched .mcp.json (v13.x cascade -> head -n1 race-free form): C:\Users\Manu\.claude\plugins\marketplaces\thedotmack-claude-mem\plugin\.mcp.json
[claude-mem-heal] zod present in C:\Users\Manu\.claude\plugins\marketplaces\thedotmack-claude-mem\plugin
$LASTEXITCODE = 0
```

All 3 `.mcp.json` files patched on first run. Second run silent (idempotent — no v12/v13 signature left).

### Lint results

- `bash -n scripts/claude-mem-heal.sh` → OK
- PowerShell AST `[Parser]::ParseFile` on `claude-mem-heal.ps1` → clean
- `Invoke-ScriptAnalyzer -Settings .PSScriptAnalyzerSettings.psd1 -Severity Error,Warning` → clean
- ASCII-only check on `claude-mem-heal.ps1` → zero non-ASCII chars

### Bats (post-CI)

To be confirmed after CI green.

## Decisions made during implementation

- **Cascade-with-head-n1 over v10.6.3 simple form**: the simple form (`${CLAUDE_PLUGIN_ROOT}/scripts/mcp-server.cjs`) only works when Claude Code sets `CLAUDE_PLUGIN_ROOT` per-plugin context. Empirically that's not always the case (the v13.x cascade ITSELF falls back to cache/marketplace dirs because CLAUDE_PLUGIN_ROOT is often unset for the MCP server launch). Keeping the cascade structure makes the heal robust to both states.
- **`done | head -n1` over `done` + `break`**: the upstream pattern uses `break` to exit early, leaving unconsumed producer writes that EPIPE. Replacing with `head -n1` lets the consumer drain the whole upstream pipe, then `head` takes the first match line. No leftover writes, no EPIPE.
- **NOT removing the `thedotmack/plugin` fallback path**: the BUG-012 legacy junction is still in place on user machines; removing it from the template would mean missing the BUG-012 fallback case. Both paths stay.
- **Single template for both v12 and v13 detection paths**: simpler than maintaining a v12-specific and v13-specific template. Whichever signature triggered the patch, the result is the same canonical form.

## Promotion candidates

- [x] **Lesson for `90-lessons.md`** — yes: "heal scripts must be versioned against the upstream bug class they paper over; when upstream's bug pattern changes, the heal's detection regex MUST be refreshed in the same PR that discovers the new pattern. Else the heal silently no-ops while users continue hitting the bug." Pairs with the existing BUG-012 lesson on heal walking real disk paths.
- [ ] ADR-worthy? **no** — tactical pattern refresh; ADR-007 (heal-at-session-start) covers the strategy.
- [ ] New pattern candidate? **possibly** — "incident → guard" pattern (existing) could be extended to "incident → guard → re-validate guard when upstream signature changes". Worth a discussion in `00_meta/patterns/` if BUG-017+ happens.

## Archive checklist

- [ ] `proposal.md` frontmatter set to `status: archived` (post-merge).
- [ ] Folder moved to `specs/archive/BUG-016-claude-mem-heal-v13-refresh/`.
- [ ] Vault `11-tasks.md` BUG-016 entry ticked ✓ with PR link.
- [ ] Vault `90-lessons.md` lesson appended.
Loading
Loading