diff --git a/examples/claudecode-skills-memanto/.claude-plugin/plugin.json b/examples/claudecode-skills-memanto/.claude-plugin/plugin.json new file mode 100644 index 00000000..f62200dd --- /dev/null +++ b/examples/claudecode-skills-memanto/.claude-plugin/plugin.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://code.claude.com/schemas/plugin-manifest.json", + "name": "memanto-skills", + "displayName": "Memanto Engineering Memory", + "description": "Cross-session engineering memory for Claude Code skills, powered by Memanto. Recalls past architectural decisions before a skill runs and distills new ones after — automatically, via lifecycle hooks.", + "version": "0.1.0", + "author": { + "name": "Moorcheh AI", + "url": "https://memanto.ai" + }, + "homepage": "https://memanto.ai", + "repository": "https://github.com/moorcheh-ai/memanto", + "license": "MIT", + "keywords": ["memanto", "memory", "skills", "hooks", "claude-code", "mattpocock"], + "skills": [ + "./skills/memanto-companion" + ] +} diff --git a/examples/claudecode-skills-memanto/.env.example b/examples/claudecode-skills-memanto/.env.example new file mode 100644 index 00000000..378737f6 --- /dev/null +++ b/examples/claudecode-skills-memanto/.env.example @@ -0,0 +1,16 @@ +# Moorcheh API key — create one at https://console.moorcheh.ai/api-keys +# Free tier: 100K ops/month. +MOORCHEH_API_KEY=mch_xxxxxxxxxxxxxxxxxxxxxxxx + +# Stable per-developer (or per-project) memory namespace. All skills in this +# repo share this agent, so context flows across every skill invocation. +MEMANTO_AGENT_ID=skills-dev-profile + +# How many memories to inject before a skill runs (default 8). +MEMANTO_RECALL_LIMIT=8 + +# Optional floor on Memanto's information-theoretic retrieval (ITS) score. +# Leave unset: Memanto already returns only relevant results, and ITS scores +# live on a small, non-cosine scale (top hits are often ~0.1-0.2), so a naive +# 0-1 floor would discard everything. Only set this if you know the scale. +# MEMANTO_MIN_SIMILARITY= diff --git a/examples/claudecode-skills-memanto/.gitignore b/examples/claudecode-skills-memanto/.gitignore new file mode 100644 index 00000000..7266bacc --- /dev/null +++ b/examples/claudecode-skills-memanto/.gitignore @@ -0,0 +1,11 @@ +.env +__pycache__/ +*.pyc +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ +*.egg-info/ +dist/ +build/ +.venv/ +venv/ diff --git a/examples/claudecode-skills-memanto/README.md b/examples/claudecode-skills-memanto/README.md new file mode 100644 index 00000000..7f719189 --- /dev/null +++ b/examples/claudecode-skills-memanto/README.md @@ -0,0 +1,203 @@ +# Claude Code Skills × Memanto — Cross-Session Engineering Memory + +> Your AI coding agent finally remembers your architecture. `/grill-with-docs` +> decides "Cart ≠ Order, use CQRS" in one terminal — and `/tdd` honours it in +> the next, in a fresh session, with **zero repeated instructions**. + +This example makes [Memanto](https://memanto.ai) a **global, active memory +companion** across [`mattpocock/skills`](https://github.com/mattpocock/skills) +executions. It solves the *context fragmentation* problem: skills like `/tdd`, +`/diagnose`, `/grill-with-docs`, and `/handoff` each run cold, so architectural +choices, codebase quirks, and coding preferences vanish when a session ends. + +Closes [#508](https://github.com/moorcheh-ai/memanto/issues/508). + +--- + +## How it works — three real lifecycle hooks + +Memory is wired into the **Claude Code hook lifecycle**, not bolted onto forked +skills. The hooks fire on the *real, unmodified* mattpocock skills — nothing to +remember to invoke, nothing to copy-paste. + +![How it works](./assets/how-it-works-three-real-lifecycle-hooks.png) + +### Component architecture + +![Component architecture](./assets/component-architecture.png) + +### Mapping to the bounty's implementation guidelines + +| Guideline | Where | What it does | +|---|---|---| +| **Global Memory Hook** | `install.py` → `.claude/settings.json` | Registers `SessionStart`, `UserPromptSubmit`, `Stop` against the Memanto-backed scripts in `hooks/`. One command, idempotent, backs up your settings, and preserves any hooks you already had — even inside shared entries. | +| **Active Extraction** | `hooks/on_stop.py` → `SkillMemory.distill_and_store` | Hands the session summary to **Memanto's backend LLM** (`answer()`), which distills durable decisions/rules/preferences into Memanto's typed memory categories and persists them. Guards against `stop_hook_active` re-fires so a session is never distilled twice. | +| **Dynamic Injection** | `hooks/on_prompt.py` → `SkillMemory.recall_for_skill` | Detects the invoked skill (path-safe: `/usr/local/bin` is not a skill), recalls the memories most relevant to it, and injects them as a concise `` system-constraint block. | + +> **LLM-powered, not regex.** Extraction leads with Memanto's backend LLM (the +> bounty's "backend LLM access to actively listen"), and falls back to a +> conservative heuristic only if the LLM path is unavailable — so a hook never +> silently no-ops. + +--- + +## Quick start + +```bash +cd examples/claudecode-skills-memanto +python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate + +pip install -e ".[dev]" # installs package + dev tools (pytest, ruff, mypy) + +cp .env.example .env # then add your key, or just export it: +export MOORCHEH_API_KEY=mch_xxxxxxxxxxxx # https://console.moorcheh.ai/api-keys + +python install.py # register the hooks in ./.claude/settings.json +# (use --global to install into ~/.claude/settings.json for every project) +``` + +That's it. Now use the real mattpocock skills as you always do — `/tdd`, +`/grill-with-docs`, `/diagnose`, `/handoff`. Memory accrues and replays +automatically. + +Verify the setup at any time: + +```bash +memanto-skills doctor +``` + +--- + +## See it work (no Claude Code required) + +Three scripts prove cross-session, cross-skill persistence across **completely +separate processes** with no shared in-memory state: + +| Script | Simulates | Proves | +|---|---|---| +| `demo_session_1.py` | `/grill-with-docs` architecture session | Active extraction: LLM distills 4 decisions into Memanto | +| `demo_session_2.py` | Fresh `/tdd` session on same codebase | Dynamic injection: all 4 decisions recalled, zero re-prompting | +| `demo_session_3.py` | `/handoff` + fresh `/grill-with-docs` | Multi-skill accrual: profile grows across unrelated skills | + +```bash +python demo_session_1.py # /grill-with-docs → CQRS, Cart≠Order, Postgres+Redis, Money VO +python demo_session_2.py # fresh /tdd → recalls all 4 decisions, zero re-prompting +python demo_session_3.py # /handoff → TypeScript migration, Result, domain isolation + # then /grill-with-docs sees ALL memories from all sessions +``` + +`demo_session_2.py` prints the exact context block the `UserPromptSubmit` hook +injects before `/tdd` runs: + +```text + +Relevant engineering memory for /tdd (carried over from previous skill +sessions — honour it, do not re-ask the user): + +Rules (always honour): + - Cart and Order are distinct domain concepts. A Cart is mutable and + pre-purchase; an Order is immutable once placed. … + - Money values must always be represented using a Money value object. … + +Decisions made: + - The Orders service uses CQRS: commands and queries are strictly separated… + - The Orders service write side is backed by Postgres; the read-model cache + is backed by Redis. + +``` + +--- + +## Manual control — the `memanto-skills` CLI + +The hooks are automatic; the CLI (and the `/memanto-companion` skill) is the +manual surface. + +```bash +memanto-skills profile # show the accumulated engineering profile +memanto-skills recall tdd --hint "auth flow" # preview what /tdd would receive +memanto-skills store tdd "We standardised on Vitest + AAA structure." +memanto-skills install [--global] # (re)install hooks +memanto-skills uninstall [--global] # remove hooks (yours stay untouched) +memanto-skills doctor # config + connectivity + skill routes +``` + +--- + +## Configuration + +| Env var | Default | Meaning | +|---|---|---| +| `MOORCHEH_API_KEY` | *(required)* | Your Moorcheh key. Free tier: 100K ops/month. | +| `MEMANTO_AGENT_ID` | `skills-dev-profile` | The shared memory namespace. Use a stable per-developer or per-project id. | +| `MEMANTO_RECALL_LIMIT` | `8` | How many memories to inject before a skill. | +| `MEMANTO_MIN_SIMILARITY` | *(unset — no floor)* | Optional floor on Memanto's ITS retrieval score. Leave unset: ITS scores live on a small, non-cosine scale (top hits ≈ 0.1–0.2), and Memanto already returns relevant results only. | + +--- + +## Repository layout + +```text +claudecode-skills-memanto/ +├── install.py # one-command, idempotent hook installer (+ --uninstall) +├── .claude-plugin/plugin.json # Claude Code plugin manifest (ships /memanto-companion) +├── memanto_skills/ # the installable package +│ ├── client.py # SkillMemory: setup / recall_for_skill / distill_and_store +│ ├── extractor.py # backend-LLM distillation (+ heuristic fallback) +│ ├── profile.py # MemoryProfile -> injectable block +│ ├── skill_map.py # per-skill recall routing +│ ├── config.py # env-driven config +│ ├── installer.py # settings.json patching (preserves your hooks) +│ └── cli.py # `memanto-skills` +├── hooks/ # the three lifecycle hook entry points +│ ├── session_start.py # SessionStart -> profile briefing +│ ├── on_prompt.py # UserPromptSubmit -> recall + inject +│ ├── on_stop.py # Stop (async) -> distill + store +│ └── _common.py # exit-0 contract, skill detection, transcript reading +├── skills/memanto-companion/ # SKILL.md for manual inspect/recall/store +├── demo_session_1.py # /grill-with-docs → stores 4 architectural decisions +├── demo_session_2.py # fresh /tdd → recalls them all, zero re-prompting +├── demo_session_3.py # /handoff → adds more; /grill-with-docs sees all of it +└── tests/ # 56 unit tests, fully mocked (no network, no key) +``` + +### Design principles + +- **Zero-overhead & fail-safe.** Hooks never block the editor. The exit-0 + contract is enforced in exactly one place (`hooks/_common.py:run`), so any + internal failure — no key, network down, malformed transcript — degrades + silently instead of surfacing editor errors. +- **One network call per hook.** Session activation is attempted first and + agent creation only happens on the first ever run, keeping the prompt-path + latency minimal. +- **Skill-aware recall.** `/tdd` pulls testing conventions; `/grill-with-docs` + pulls architecture and domain terminology — see `skill_map.py`. Unknown or + custom skills fall back to a generic engineering-profile route. +- **Typed memory, sourced from the SDK.** Memory types and input limits are + imported from the `memanto` package itself, so this example can never drift + from the platform's schema. +- **Respectful install.** Re-running `install.py` replaces only our own hook + commands (matched by path), preserves your hooks even when they share an + entry with ours, and backs up your settings before any write. + +--- + +## Run the tests + +```bash +pytest # 56 tests, mocked SDK — no API key needed +ruff check . +``` + +--- + +## Built on + +- [Memanto](https://github.com/moorcheh-ai/memanto) — typed semantic memory with + information-theoretic retrieval (`remember` / `recall` / `answer`). +- [mattpocock/skills](https://github.com/mattpocock/skills) — sharp, + single-purpose Claude Code skills. +- [Claude Code hooks](https://code.claude.com/docs/en/hooks) — the lifecycle + events this layer plugs into. + +MIT licensed. diff --git a/examples/claudecode-skills-memanto/assets/component-architecture.png b/examples/claudecode-skills-memanto/assets/component-architecture.png new file mode 100644 index 00000000..4d8cea1a Binary files /dev/null and b/examples/claudecode-skills-memanto/assets/component-architecture.png differ diff --git a/examples/claudecode-skills-memanto/assets/how-it-works-three-real-lifecycle-hooks.png b/examples/claudecode-skills-memanto/assets/how-it-works-three-real-lifecycle-hooks.png new file mode 100644 index 00000000..2a9dab82 Binary files /dev/null and b/examples/claudecode-skills-memanto/assets/how-it-works-three-real-lifecycle-hooks.png differ diff --git a/examples/claudecode-skills-memanto/demo_session_1.py b/examples/claudecode-skills-memanto/demo_session_1.py new file mode 100644 index 00000000..5ab07afe --- /dev/null +++ b/examples/claudecode-skills-memanto/demo_session_1.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +"""Demo — Session 1: a developer makes engineering decisions via /grill-with-docs. + +Run this first. It simulates a finished ``/grill-with-docs`` session and lets +Memanto's backend LLM distill the durable engineering decisions into memory. + + export MOORCHEH_API_KEY=mch_... + python demo_session_1.py + +Then run ``demo_session_2.py`` in a SEPARATE process to prove the decisions are +recalled with zero shared in-process state. +""" + +from __future__ import annotations + +from memanto_skills import SkillMemory + +SESSION_1_TRANSCRIPT = """ +user: /grill-with-docs let's nail down the architecture for the orders service +assistant: A few questions to align on the design. +user: We will use CQRS for the Order domain — commands and queries are separate. + The read model is denormalised and rebuilt from events. +assistant: Understood. What about terminology? +user: Important rule: Cart and Order are different concepts. A Cart is mutable and + pre-purchase; an Order is immutable once placed. Never use the terms + interchangeably in code or docs. +assistant: Got it. Storage? +user: We decided on Postgres for the write side and Redis for the read-model cache. + Always wrap money values in a Money value object — never raw floats. +assistant: Summary: CQRS for Orders, Postgres + Redis, Cart != Order, Money VO for currency. +""" + + +def main() -> None: + mem = SkillMemory() + mem.setup() + print("Session 1: distilling /grill-with-docs decisions via Memanto's LLM…\n") + stored = mem.distill_and_store("grill-with-docs", SESSION_1_TRANSCRIPT) + if not stored: + print("No memories were extracted. Check MOORCHEH_API_KEY and connectivity.") + return + print(f"Stored {len(stored)} engineering memories:") + for m in stored: + print(f" - [{m['type']}] {m['content']}") + print("\nNow run: python demo_session_2.py") + + +if __name__ == "__main__": + try: + main() + except Exception as exc: + print(f"\n[error] {exc}") + print("Check that MOORCHEH_API_KEY is valid and your subscription is active.") + raise SystemExit(1) diff --git a/examples/claudecode-skills-memanto/demo_session_2.py b/examples/claudecode-skills-memanto/demo_session_2.py new file mode 100644 index 00000000..e9b6ae1a --- /dev/null +++ b/examples/claudecode-skills-memanto/demo_session_2.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Demo — Session 2: a fresh /tdd session inherits Session 1's decisions. + +Run this AFTER ``demo_session_1.py``, ideally in a new terminal. It is a brand +new process with no shared in-memory state — everything it knows comes from +Memanto. This is the exact context block the ``UserPromptSubmit`` hook would +inject before the real ``/tdd`` skill runs. + + python demo_session_2.py +""" + +from __future__ import annotations + +from memanto_skills import SkillMemory + + +def main() -> None: + mem = SkillMemory() + mem.setup() + print("Session 2 (fresh process): /tdd is about to run on the orders service.\n") + print("What the UserPromptSubmit hook would inject before /tdd:\n") + + profile = mem.recall_for_skill( + "tdd", task_hint="write tests for the Order placement flow" + ) + block = profile.format_context_block(skill_name="tdd") + + if block: + print(block) + print( + "\n✅ Cross-session memory works: /tdd already knows the Order/Cart rule, " + "CQRS, and storage decisions — with zero re-prompting." + ) + else: + print( + "No memories recalled yet. Run demo_session_1.py first, and confirm " + "MOORCHEH_API_KEY points at the same agent." + ) + + +if __name__ == "__main__": + try: + main() + except Exception as exc: + print(f"\n[error] {exc}") + print("Check that MOORCHEH_API_KEY is valid and your subscription is active.") + raise SystemExit(1) diff --git a/examples/claudecode-skills-memanto/demo_session_3.py b/examples/claudecode-skills-memanto/demo_session_3.py new file mode 100644 index 00000000..65480e16 --- /dev/null +++ b/examples/claudecode-skills-memanto/demo_session_3.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Demo — Session 3: multi-skill memory accrual across three separate processes. + +Shows that memory is NOT limited to a single skill pair. This script: + + 1. Stores a new batch of decisions as if they came from a ``/handoff`` session + (TypeScript migration, error-handling conventions, team norms). + 2. In the same process, immediately shows what a fresh ``/grill-with-docs`` + session would receive — memories accumulated from ALL three sessions. + +Run AFTER demo_session_1.py and demo_session_2.py: + + python demo_session_3.py +""" + +from __future__ import annotations + +from memanto_skills import SkillMemory + +SESSION_3_TRANSCRIPT = """ +user: /handoff I'm handing the codebase to a new engineer. Here's what they must know. +assistant: I'll capture the key decisions and conventions. +user: We are migrating the entire frontend from JavaScript to TypeScript. All new files + must be .ts or .tsx. No new .js files should be created — ever. +assistant: TypeScript-only policy noted. +user: For error handling: we use a Result pattern, not try/catch at the + application layer. Only infrastructure code (DB, HTTP adapters) uses try/catch. +assistant: Result for application errors, noted. +user: The team uses conventional commits: feat:, fix:, chore:, docs:. Squash merges only. + No merge commits into main. +assistant: Commit and merge conventions noted. +user: One more: the domain layer must have zero runtime dependencies on frameworks. + Pure TypeScript, no imports from Next.js, Express, or any DI container. +assistant: Domain layer isolation noted. I'll make sure the handoff doc captures all of this. +""" + + +def main() -> None: + mem = SkillMemory() + mem.setup() + + # --- Step 1: store /handoff decisions --- + print("Session 3a: distilling /handoff decisions into Memanto…\n") + stored = mem.distill_and_store("handoff", SESSION_3_TRANSCRIPT) + if not stored: + print("Nothing extracted. Check MOORCHEH_API_KEY and connectivity.") + return + print(f"Stored {len(stored)} engineering memories from /handoff:") + for m in stored: + print(f" - [{m['type']}] {m['content'][:90]}") + + # --- Step 2: show accumulated cross-skill recall for /grill-with-docs --- + print( + "\nSession 3b: fresh /grill-with-docs session — what it inherits " + "from ALL previous sessions:\n" + ) + profile = mem.recall_for_skill( + "grill-with-docs", + task_hint="architecture review for the Orders and frontend services", + ) + block = profile.format_context_block(skill_name="grill-with-docs") + + if block: + print(block) + print( + "\n✅ Multi-skill memory accrual works:\n" + " /grill-with-docs sees CQRS/Postgres/Redis (from /grill-with-docs session 1)\n" + " AND TypeScript migration, Result, domain isolation (from /handoff).\n" + " Three separate processes, one growing Engineering Profile." + ) + else: + print("No memories recalled. Run demo_session_1.py first.") + + +if __name__ == "__main__": + try: + main() + except Exception as exc: + print(f"\n[error] {exc}") + print("Check that MOORCHEH_API_KEY is valid and your subscription is active.") + raise SystemExit(1) diff --git a/examples/claudecode-skills-memanto/hooks/__init__.py b/examples/claudecode-skills-memanto/hooks/__init__.py new file mode 100644 index 00000000..97ab3f4c --- /dev/null +++ b/examples/claudecode-skills-memanto/hooks/__init__.py @@ -0,0 +1 @@ +"""Claude Code lifecycle hooks for the Memanto skills memory layer.""" diff --git a/examples/claudecode-skills-memanto/hooks/_common.py b/examples/claudecode-skills-memanto/hooks/_common.py new file mode 100644 index 00000000..84d18240 --- /dev/null +++ b/examples/claudecode-skills-memanto/hooks/_common.py @@ -0,0 +1,233 @@ +"""Shared plumbing for the three Claude Code lifecycle hooks. + +Design rules (these hooks run on the developer's hot path): + +* **Never break Claude Code.** Any internal failure exits 0 silently. A memory + companion that crashes the editor is worse than one that misses a memory. +* **Stay fast.** ``SessionStart`` and ``UserPromptSubmit`` gate the user, so we + keep them lean. Heavy LLM distillation lives in ``Stop`` (registered async). +* **Be schema-tolerant.** The transcript line format is not officially pinned, + so we extract text from whatever shape we find. + +Input fields follow the official Claude Code hooks reference: common fields are +``session_id``, ``transcript_path``, ``cwd``, ``permission_mode``, +``hook_event_name``; ``UserPromptSubmit`` additionally carries ``prompt``. +""" + +from __future__ import annotations + +import json +import os +import re +import sys +from collections.abc import Callable +from pathlib import Path +from typing import Any + +# Make the sibling ``memanto_skills`` package importable whether or not the +# example has been pip-installed. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + + +def run(main: Callable[[], int]) -> None: + """Execute a hook entry point under the never-break-Claude contract. + + This is the single place that guarantees a hook process exits 0: a nonzero + exit would surface an error notice in the editor (or, for Stop hooks with + exit code 2, block the session from stopping). Every hook's ``__main__`` + block goes through here so no individual hook can forget the contract. + """ + try: + code = main() + except Exception: + code = 0 + raise SystemExit(code) + + +def read_hook_input() -> dict[str, Any]: + """Parse the hook's stdin JSON. Returns {} if absent/malformed.""" + try: + raw = sys.stdin.read() + except Exception: + return {} + if not raw or not raw.strip(): + return {} + try: + data = json.loads(raw) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + return {} + + +def emit_additional_context(event_name: str, context: str) -> None: + """Print the JSON that injects ``context`` for Claude to read. + + Matches the documented output shape: + {"hookSpecificOutput": {"hookEventName": ..., "additionalContext": ...}} + """ + if not context: + return + json.dump( + { + "hookSpecificOutput": { + "hookEventName": event_name, + "additionalContext": context, + } + }, + sys.stdout, + ) + sys.stdout.write("\n") + + +# Matches a skill invocation like "/tdd" or "/grill-with-docs". The trailing +# negative lookahead rejects path-like tokens ("/usr/local/bin" is not a skill). +_SKILL_RE = re.compile(r"(?:^|\s)/([a-z][a-z0-9-]+)\b(?!/)", re.IGNORECASE) + + +def detect_skill(text: str | None) -> str | None: + """Extract the first ``/skill`` token from text, else None.""" + if not text: + return None + m = _SKILL_RE.search(text) + return m.group(1).lower() if m else None + + +def memory_enabled() -> bool: + """Cheap hot-path gate: is an API key present at all? + + This deliberately duplicates one line of ``SkillsConfig.from_env`` so that + hooks can no-op without importing the Memanto SDK (a substantial import on + a path that runs every prompt). ``get_memory`` remains the authoritative + check — it returns None for any configuration problem. + """ + return bool((os.environ.get("MOORCHEH_API_KEY") or "").strip()) + + +def get_memory(): + """Construct a SkillMemory, or None if config/import fails.""" + try: + from memanto_skills import SkillMemory + + return SkillMemory() + except Exception: + return None + + +# --------------------------------------------------------------------------- # +# Transcript reading (schema-tolerant) +# --------------------------------------------------------------------------- # + + +def read_transcript_text( + transcript_path: str | None, + max_messages: int = 40, + max_chars: int = 8000, +) -> str: + """Return a plain-text rendering of the most recent transcript messages. + + The transcript is JSONL (one JSON object per line). We do not assume a + fixed schema: we walk each entry and pull any human-readable text we can + find (string content, or content blocks carrying a ``text`` field), + labelling it by role when available. Returns the trailing ``max_chars``. + """ + _, rendered = _read_transcript_full( + transcript_path, max_messages=max_messages, max_chars=max_chars + ) + return rendered + + +def read_transcript_for_distillation( + transcript_path: str | None, + max_messages: int = 40, + max_chars: int = 8000, +) -> tuple[str | None, str]: + """Return ``(skill, tail_text)`` from a single pass over the transcript. + + Long sessions truncate to the tail for the LLM (the latest discussion is + where decisions usually crystallise), but the user's original ``/tdd`` or + ``/grill-with-docs`` invocation typically sits at the very start of the + conversation and would fall outside that tail. We scan the entire + transcript for the first skill token, then return it alongside the + truncated text so ``distill_and_store`` can tag memories correctly even + on long sessions. + """ + return _read_transcript_full( + transcript_path, max_messages=max_messages, max_chars=max_chars + ) + + +def _read_transcript_full( + transcript_path: str | None, + max_messages: int, + max_chars: int, +) -> tuple[str | None, str]: + """Read the transcript once and return (first-skill-seen, tail-text).""" + if not transcript_path: + return None, "" + path = Path(transcript_path) + if not path.exists(): + return None, "" + + try: + with path.open(encoding="utf-8") as fh: + lines = fh.readlines() + except Exception: + return None, "" + + pieces: list[str] = [] + skill: str | None = None + for line in lines: + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + except json.JSONDecodeError: + continue + role, text = _extract_role_text(entry) + if not text: + continue + # First skill mention wins — typically the opening user prompt. + if skill is None: + found = detect_skill(text) + if found: + skill = found + pieces.append(f"{role}: {text}" if role else text) + + rendered = "\n".join(pieces[-max_messages:]) + return skill, rendered[-max_chars:] + + +def _extract_role_text(entry: Any) -> tuple[str | None, str]: + """Best-effort (role, text) extraction from one transcript entry.""" + if not isinstance(entry, dict): + return None, "" + + message = entry.get("message", entry) + role = None + if isinstance(message, dict): + role = message.get("role") or entry.get("role") or entry.get("type") + content = message.get("content") + else: + content = entry.get("content") + + return role, _flatten_content(content) + + +def _flatten_content(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content.strip() + if isinstance(content, list): + out: list[str] = [] + for block in content: + if isinstance(block, str): + out.append(block) + elif isinstance(block, dict): + # Common block shapes: {"type":"text","text":"..."}; tool blocks + # are skipped to keep the summary focused on prose. + if block.get("type") in (None, "text") and block.get("text"): + out.append(str(block["text"])) + return " ".join(s.strip() for s in out if s.strip()) + return "" diff --git a/examples/claudecode-skills-memanto/hooks/on_prompt.py b/examples/claudecode-skills-memanto/hooks/on_prompt.py new file mode 100644 index 00000000..e39974d7 --- /dev/null +++ b/examples/claudecode-skills-memanto/hooks/on_prompt.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""UserPromptSubmit hook — dynamic injection before a skill runs. + +Fires when the developer submits a prompt, before Claude processes it. If the +prompt invokes a skill (``/tdd``, ``/grill-with-docs``, …), we recall the +memories most relevant to that skill and inject them as ``additionalContext`` +so Claude honours past decisions instead of re-asking. + +This is what makes the layer *zero-touch*: it works on the real, unmodified +mattpocock skills — no forked ``-with-memory`` variants required. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from _common import ( # noqa: E402 + detect_skill, + emit_additional_context, + get_memory, + memory_enabled, + read_hook_input, + run, +) + +EVENT = "UserPromptSubmit" + + +def main() -> int: + """Detect the invoked skill and inject its relevant memories as context. + + Reads the ``UserPromptSubmit`` payload from stdin, routes by skill, and + emits an ``additionalContext`` block. Bare prompts are skipped to avoid + polluting every turn. + """ + if not memory_enabled(): + return 0 + + data = read_hook_input() + prompt = data.get("prompt", "") or "" + skill = detect_skill(prompt) + + # Only inject when a skill is invoked. Bare prompts are left untouched so we + # don't pollute every turn — SessionStart already provides the baseline + # profile once per session. + if not skill: + return 0 + + mem = get_memory() + if mem is None: + return 0 + + profile = mem.recall_for_skill(skill, task_hint=prompt) + emit_additional_context(EVENT, profile.format_context_block(skill_name=skill)) + return 0 + + +if __name__ == "__main__": + run(main) diff --git a/examples/claudecode-skills-memanto/hooks/on_stop.py b/examples/claudecode-skills-memanto/hooks/on_stop.py new file mode 100644 index 00000000..f9d1fcd4 --- /dev/null +++ b/examples/claudecode-skills-memanto/hooks/on_stop.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +"""Stop hook — active extraction after a skill session finishes. + +Fires when Claude finishes responding. We read the conversation transcript, +detect which skill was used, and hand the session summary to Memanto's backend +LLM, which distills durable engineering memories and persists them. Future +sessions then inherit those decisions automatically. + +Register this hook with ``"async": true`` so distillation runs in the +background and never delays the developer. It produces no output and never +blocks — on any failure it simply exits 0. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from _common import ( # noqa: E402 + get_memory, + memory_enabled, + read_hook_input, + read_transcript_for_distillation, + run, +) + + +def main() -> int: + """Read the finished transcript and distill it into typed memories. + + Skips when ``stop_hook_active`` is set so the same session is never + distilled twice on re-fires. Runs asynchronously and silently — failure + paths exit 0 instead of surfacing errors to the developer. + """ + if not memory_enabled(): + return 0 + + data = read_hook_input() + # When another Stop hook blocked and forced Claude to continue, the hook + # fires again with stop_hook_active=true. Skip re-distilling the same + # session so memories aren't stored twice. + if data.get("stop_hook_active"): + return 0 + + # Single pass: finds the original /skill across the FULL transcript, + # then returns only the tail for LLM distillation. This avoids tagging + # long sessions as skill:unknown when the opening prompt has fallen + # outside the truncation window. + skill, transcript = read_transcript_for_distillation(data.get("transcript_path")) + if not transcript: + return 0 + + mem = get_memory() + if mem is None: + return 0 + + mem.distill_and_store(skill, transcript) + return 0 + + +if __name__ == "__main__": + run(main) diff --git a/examples/claudecode-skills-memanto/hooks/session_start.py b/examples/claudecode-skills-memanto/hooks/session_start.py new file mode 100644 index 00000000..d4189662 --- /dev/null +++ b/examples/claudecode-skills-memanto/hooks/session_start.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +"""SessionStart hook — brief Claude with the accumulated Engineering Profile. + +Fires once when a session starts/resumes. Injects a compact snapshot of the +developer's most recent engineering memories so every session begins already +aware of established conventions — not just sessions that invoke a skill. + +Registered for command hooks (the only type SessionStart supports). +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from _common import ( # noqa: E402 + emit_additional_context, + get_memory, + memory_enabled, + run, +) + +EVENT = "SessionStart" + + +def main() -> int: + """Recall the Engineering Profile and emit it as SessionStart context. + + Exits 0 silently when ``MOORCHEH_API_KEY`` is unset or the SDK cannot be + initialised, so Claude Code is never blocked by this hook. + """ + if not memory_enabled(): + return 0 + mem = get_memory() + if mem is None: + return 0 + emit_additional_context(EVENT, mem.profile_block()) + return 0 + + +if __name__ == "__main__": + run(main) diff --git a/examples/claudecode-skills-memanto/install.py b/examples/claudecode-skills-memanto/install.py new file mode 100644 index 00000000..5673eccb --- /dev/null +++ b/examples/claudecode-skills-memanto/install.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +"""One-command setup for the Memanto + Claude Code skills memory layer. + + python install.py # install hooks into ./.claude/settings.json + python install.py --global # install into ~/.claude/settings.json + python install.py --uninstall + +Thin wrapper around ``memanto_skills.installer`` so the example is usable +without pip-installing the package first. +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from memanto_skills.installer import install_hooks, uninstall_hooks # noqa: E402 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--global", + dest="global_scope", + action="store_true", + help="Use ~/.claude/settings.json instead of ./.claude/settings.json.", + ) + parser.add_argument( + "--uninstall", + action="store_true", + help="Remove the hooks instead of installing them.", + ) + args = parser.parse_args() + + if args.uninstall: + return uninstall_hooks(global_scope=args.global_scope) + return install_hooks(global_scope=args.global_scope) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/claudecode-skills-memanto/memanto_skills/__init__.py b/examples/claudecode-skills-memanto/memanto_skills/__init__.py new file mode 100644 index 00000000..6bb63e23 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/__init__.py @@ -0,0 +1,28 @@ +"""Cross-session engineering memory for Claude Code + mattpocock/skills. + +Memanto becomes a global, active memory companion across skill executions. +Three real Claude Code lifecycle hooks make it work without manual effort: + +* ``SessionStart`` -> inject the accumulated Engineering Profile once. +* ``UserPromptSubmit`` -> recall memories relevant to the skill being invoked + and inject them before Claude reads the prompt. +* ``Stop`` -> distill the just-finished session into typed memories + using Memanto's backend LLM, then persist them. + +The public surface is intentionally tiny: + + from memanto_skills import SkillMemory + + mem = SkillMemory() # reads MOORCHEH_API_KEY + MEMANTO_AGENT_ID + block = mem.recall_for_skill("tdd", task_hint="auth module") + mem.distill_and_store("tdd", transcript) +""" + +from __future__ import annotations + +from .client import SkillMemory +from .config import SkillsConfig +from .profile import MemoryProfile + +__all__ = ["SkillMemory", "SkillsConfig", "MemoryProfile"] +__version__ = "0.1.0" diff --git a/examples/claudecode-skills-memanto/memanto_skills/cli.py b/examples/claudecode-skills-memanto/memanto_skills/cli.py new file mode 100644 index 00000000..2eea476f --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/cli.py @@ -0,0 +1,141 @@ +"""``memanto-skills`` CLI — manual control over the skill memory layer. + +The lifecycle hooks make memory automatic; this CLI is the manual escape hatch +(and what the /memanto-companion skill shells out to): + + memanto-skills recall [--hint TEXT] # print injectable context + memanto-skills store "summary..." # distill + persist + memanto-skills profile # show accumulated profile + memanto-skills install [--global] # register lifecycle hooks + memanto-skills doctor # check config + connectivity + +Uses only the standard library (argparse) to stay dependency-light. +""" + +from __future__ import annotations + +import argparse +import sys + +from .client import SkillMemory +from .config import ConfigError, SkillsConfig +from .skill_map import known_skills + + +def main(argv: list[str] | None = None) -> int: + """Dispatch the ``memanto-skills`` CLI. + + Parses ``argv`` (defaulting to ``sys.argv``), runs the matching subcommand, + and returns its exit code. Network/API failures are surfaced as a single + clean error line, not a traceback. + """ + parser = argparse.ArgumentParser( + prog="memanto-skills", + description="Cross-session engineering memory for Claude Code skills.", + ) + sub = parser.add_subparsers(dest="command", required=True) + + p_recall = sub.add_parser("recall", help="Print injectable context for a skill.") + p_recall.add_argument("skill", help="Skill name, e.g. tdd, grill-with-docs.") + p_recall.add_argument("--hint", default=None, help="Current task hint.") + + p_store = sub.add_parser("store", help="Distill a session summary into memory.") + p_store.add_argument("skill", help="Skill name the summary came from.") + p_store.add_argument("summary", help="Free-text session summary.") + + sub.add_parser("profile", help="Show the accumulated engineering profile.") + sub.add_parser("doctor", help="Check configuration and connectivity.") + + p_install = sub.add_parser("install", help="Register Claude Code lifecycle hooks.") + p_install.add_argument( + "--global", + dest="global_scope", + action="store_true", + help="Install into ~/.claude/settings.json instead of ./.claude/settings.json.", + ) + + p_uninstall = sub.add_parser("uninstall", help="Remove the lifecycle hooks.") + p_uninstall.add_argument( + "--global", + dest="global_scope", + action="store_true", + help="Uninstall from ~/.claude/settings.json instead of ./.claude/settings.json.", + ) + + args = parser.parse_args(argv) + + if args.command == "install": + from .installer import install_hooks + + return install_hooks(global_scope=args.global_scope) + + if args.command == "uninstall": + from .installer import uninstall_hooks + + return uninstall_hooks(global_scope=args.global_scope) + + if args.command == "doctor": + return _doctor() + + try: + mem = SkillMemory() + except ConfigError as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + + try: + return _run_memory_command(mem, args) + except Exception as exc: # network/API failures — show cleanly, not as a traceback + print(f"error: {exc}", file=sys.stderr) + return 1 + + +def _run_memory_command(mem: SkillMemory, args: argparse.Namespace) -> int: + if args.command == "recall": + profile = mem.recall_for_skill(args.skill, task_hint=args.hint) + block = profile.format_context_block(skill_name=args.skill) + if block: + print(block) + else: + print("(no relevant memories yet)", file=sys.stderr) + return 0 + + if args.command == "store": + stored = mem.distill_and_store(args.skill, args.summary) + print(f"stored {len(stored)} memory(ies) from /{args.skill}") + for m in stored: + mtype = m.get("type") or "memory" + title = m.get("title") or (m.get("content") or "")[:80] or "(no title)" + print(f" - [{mtype}] {title}") + return 0 + + if args.command == "profile": + block = mem.profile_block() + print(block or "(profile is empty)") + return 0 + + return 1 + + +def _doctor() -> int: + try: + cfg = SkillsConfig.from_env() + except ConfigError as exc: + print(f"✗ config: {exc}", file=sys.stderr) + return 2 + print(f"✓ MOORCHEH_API_KEY set (…{cfg.api_key[-4:]})") + print(f"✓ agent_id: {cfg.agent_id}") + print(f"✓ recall_limit: {cfg.recall_limit}, min_similarity: {cfg.min_similarity}") + print(f"✓ curated skill routes: {', '.join(known_skills())} (others use a generic route)") + try: + mem = SkillMemory(cfg) + mem.setup() + print("✓ connected to Memanto and session active") + except Exception as exc: + print(f"✗ connectivity: {exc}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/claudecode-skills-memanto/memanto_skills/client.py b/examples/claudecode-skills-memanto/memanto_skills/client.py new file mode 100644 index 00000000..c868d091 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/client.py @@ -0,0 +1,231 @@ +"""SkillMemory — the single integration point between Claude Code skills and Memanto. + +Wraps Memanto's ``SdkClient`` with the three operations the bounty calls for: + +* ``recall_for_skill`` — dynamic injection: pull memories relevant to the skill + being invoked and render an injectable context block. +* ``distill_and_store`` — active extraction: use the backend LLM to distill a + finished session into typed memories, then persist them. +* ``profile_block`` — the accumulated Engineering Profile (for SessionStart). + +Agent + session lifecycle is idempotent and lazy: ``setup`` is safe to call on +every hook invocation, mirroring the pattern used by the official MCP and +LangGraph integrations in this repo. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from memanto.app.utils.errors import AgentAlreadyExistsError, AgentNotFoundError +from memanto.cli.client.sdk_client import SdkClient + +from . import extractor +from .config import SkillsConfig +from .profile import MemoryProfile +from .skill_map import normalize_skill, route_for + +logger = logging.getLogger(__name__) + +# Stamped onto every memory this layer writes, so skill memories are filterable +# and separable from memories written by other Memanto integrations. +SOURCE_TAG = "claudecode-skills-memanto" + +# Memanto session lifetime requested on activation. Sessions are JWT-backed and +# auto-renewed by the SDK when nearing expiry, so the exact value is not load-bearing. +_SESSION_HOURS = 6 + + +class SkillMemory: + """Drop-in cross-session memory companion for Claude Code skills.""" + + def __init__( + self, + config: SkillsConfig | None = None, + client: SdkClient | None = None, + ) -> None: + self.config = config or SkillsConfig.from_env() + # ``client`` injection keeps this unit-testable without a network. + self._sdk = client or SdkClient(api_key=self.config.api_key) + self._ready = False + + # ------------------------------------------------------------------ # + # Lifecycle (idempotent, lazy) + # ------------------------------------------------------------------ # + + def setup(self) -> None: + """Ensure the agent exists and a session is active. Safe to repeat. + + Activation is attempted first: each hook runs in a fresh process, and + for the common case (the agent already exists) this costs one network + call instead of two. Creation only happens on AgentNotFoundError. + + Exceptions propagate to the caller. Hook entry points are wrapped in + ``_common.run()``, which catches all exceptions and exits 0, so a + failed setup never blocks Claude Code. Demo scripts and the CLI surface + the real error directly. + """ + if self._ready: + return + agent_id = self.config.agent_id + try: + self._sdk.activate_agent(agent_id, duration_hours=_SESSION_HOURS) + except AgentNotFoundError: + self._create_and_activate(agent_id) + self._ready = True + + def _create_and_activate(self, agent_id: str) -> None: + """First-run path: create the agent, then activate a session for it.""" + try: + self._sdk.create_agent(agent_id=agent_id, pattern="tool") + except AgentAlreadyExistsError: + pass # concurrent hook created it between our activate and create + self._sdk.activate_agent(agent_id, duration_hours=_SESSION_HOURS) + + # ------------------------------------------------------------------ # + # Dynamic injection (UserPromptSubmit / skill start) + # ------------------------------------------------------------------ # + + def recall_for_skill( + self, + skill_name: str | None, + task_hint: str | None = None, + ) -> MemoryProfile: + """Recall memories relevant to ``skill_name`` (+ optional task hint).""" + self.setup() + route = route_for(skill_name) + query = route.query + if task_hint: + query = f"{query}; current task: {task_hint.strip()}" + + # We deliberately do NOT pass a hard ``type`` filter here. Live testing + # showed that combining a type filter with Memanto's semantic threshold + # over-constrains and can return nothing even when matching-typed + # memories exist. The skill-specific ``query`` already biases retrieval + # toward the right memories, and Memanto returns relevant results only. + result = self._sdk.recall( + agent_id=self.config.agent_id, + query=query, + limit=self.config.recall_limit, + ) + return MemoryProfile.from_recall(result, self.config.min_similarity) + + def profile_block(self, limit: int | None = None) -> str: + """Render the most recent slice of the Engineering Profile. + + Used by the SessionStart hook to brief Claude once per session. + """ + self.setup() + result = self._sdk.recall_recent( + agent_id=self.config.agent_id, + limit=limit or self.config.recall_limit, + ) + return MemoryProfile.from_recall(result).format_context_block() + + # ------------------------------------------------------------------ # + # Active extraction (Stop / skill complete) + # ------------------------------------------------------------------ # + + def distill_and_store( + self, + skill_name: str | None, + summary: str, + ) -> list[dict[str, Any]]: + """Distill a finished session into memories and persist them. + + Leads with the backend LLM (``answer``); falls back to a conservative + heuristic only if the LLM yields nothing parseable. Returns only the + memories that were actually persisted — items that failed to write are + omitted so the return value matches reality. + """ + summary = (summary or "").strip() + if not summary: + return [] + self.setup() + + memories = self._llm_extract(skill_name, summary) + if not memories: + logger.debug( + "LLM extraction yielded nothing for skill=%s; using heuristic fallback", + skill_name, + ) + memories = extractor.heuristic_memories(summary) + if not memories: + return [] + + normalized = normalize_skill(skill_name) + route = route_for(skill_name) + skill_tag = f"skill:{normalized}" if normalized else "skill:unknown" + for mem in memories: + mem["tags"] = sorted({*route.tags, skill_tag, SOURCE_TAG}) + + return self._persist(memories) + + # ------------------------------------------------------------------ # + # Internals + # ------------------------------------------------------------------ # + + def _llm_extract( + self, skill_name: str | None, summary: str + ) -> list[dict[str, Any]]: + """Run backend-LLM distillation. Returns [] on any failure.""" + question = extractor.build_extraction_question(skill_name, summary) + try: + result = self._sdk.answer( + agent_id=self.config.agent_id, + question=question, + header_prompt=extractor.EXTRACTION_HEADER, + footer_prompt=extractor.EXTRACTION_FOOTER, + temperature=0.0, + ) + except Exception as exc: + logger.debug("LLM extraction call failed: %s", exc) + return [] + return extractor.parse_llm_memories(result.get("answer", "")) + + def _persist(self, memories: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Store memories and return only those that were actually persisted. + + Tries ``batch_remember`` first; on failure (or partial success), falls + back to per-memory ``remember`` calls. Any item whose write raised is + excluded from the returned list so callers can trust the count and the + ``stored 0 memory(ies)`` output is honest rather than optimistic. + """ + payload = [ + { + "type": m["type"], + "title": m["title"], + "content": m["content"], + "confidence": m.get("confidence", 0.85), + "tags": m.get("tags", []), + "source": SOURCE_TAG, + "provenance": "inferred", + } + for m in memories + ] + try: + self._sdk.batch_remember( + agent_id=self.config.agent_id, memories=payload + ) + return list(memories) + except Exception as exc: + logger.debug("batch_remember failed, falling back to remember: %s", exc) + + persisted: list[dict[str, Any]] = [] + for original, m in zip(memories, payload, strict=True): + try: + self._sdk.remember( + agent_id=self.config.agent_id, + memory_type=m["type"], + title=m["title"], + content=m["content"], + confidence=m["confidence"], + tags=m["tags"], + source=m["source"], + provenance=m["provenance"], + ) + persisted.append(original) + except Exception as exc: + logger.debug("remember failed for %r: %s", m["title"], exc) + return persisted diff --git a/examples/claudecode-skills-memanto/memanto_skills/config.py b/examples/claudecode-skills-memanto/memanto_skills/config.py new file mode 100644 index 00000000..10fcc89a --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/config.py @@ -0,0 +1,95 @@ +"""Environment-driven configuration for the Claude Code skills memory layer. + +Kept dependency-light on purpose: the hooks run on *every* prompt and must +start fast, so we avoid pulling pydantic into the hot path and read plain +environment variables (optionally seeded from a local ``.env``). +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass + +try: # .env support is convenient but never required. + from dotenv import load_dotenv + + load_dotenv() +except Exception: # pragma: no cover - dotenv is an optional convenience + pass + + +DEFAULT_AGENT_ID = "skills-dev-profile" +DEFAULT_RECALL_LIMIT = 8 +# No floor by default: Memanto's information-theoretic retrieval already returns +# only relevant results, and its ITS scores live on a small, non-cosine scale +# (top hits are often ~0.1-0.2), so a naive 0-1 floor would discard everything. +# Advanced users can opt in via MEMANTO_MIN_SIMILARITY once they know the scale. +DEFAULT_MIN_SIMILARITY: float | None = None + + +class ConfigError(RuntimeError): + """Raised when required configuration (the API key) is missing.""" + + +@dataclass(frozen=True) +class SkillsConfig: + """Resolved configuration for a memory-backed skill session.""" + + api_key: str + agent_id: str = DEFAULT_AGENT_ID + recall_limit: int = DEFAULT_RECALL_LIMIT + min_similarity: float | None = DEFAULT_MIN_SIMILARITY + + @classmethod + def from_env(cls) -> SkillsConfig: + """Build config from the environment. + + Raises: + ConfigError: if ``MOORCHEH_API_KEY`` is absent. We fail loud here + because a silent no-op would make the memory layer look broken. + """ + api_key = (os.environ.get("MOORCHEH_API_KEY") or "").strip() + if not api_key: + raise ConfigError( + "MOORCHEH_API_KEY is not set. Create a key at " + "https://console.moorcheh.ai/api-keys and export it " + "(or add it to a .env file in this directory)." + ) + + agent_id = ( + os.environ.get("MEMANTO_AGENT_ID") or DEFAULT_AGENT_ID + ).strip() or DEFAULT_AGENT_ID + + recall_limit = _int_env("MEMANTO_RECALL_LIMIT", DEFAULT_RECALL_LIMIT) + min_similarity = _float_env("MEMANTO_MIN_SIMILARITY", DEFAULT_MIN_SIMILARITY) + + return cls( + api_key=api_key, + agent_id=agent_id, + recall_limit=recall_limit, + min_similarity=min_similarity, + ) + + +def _int_env(name: str, default: int) -> int: + raw = os.environ.get(name) + if raw is None or not raw.strip(): + return default + try: + return max(1, int(raw)) + except ValueError: + return default + + +def _float_env(name: str, default: float | None) -> float | None: + raw = os.environ.get(name) + if raw is None or not raw.strip(): + return default + try: + value = float(raw) + except ValueError: + return default + # Clamp into the valid similarity range; treat <=0 as "no floor". + if value <= 0: + return None + return min(value, 1.0) diff --git a/examples/claudecode-skills-memanto/memanto_skills/extractor.py b/examples/claudecode-skills-memanto/memanto_skills/extractor.py new file mode 100644 index 00000000..1293d240 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/extractor.py @@ -0,0 +1,213 @@ +"""Distill a finished skill session into typed engineering memories. + +This is the heart of the "active extraction" guideline: instead of regex-only +keyword matching, we lead with **Memanto's backend LLM** to read the session +summary and emit structured memories. We degrade gracefully to a lightweight +heuristic only if the LLM path yields nothing parseable, so a lifecycle hook +never silently no-ops. + +Why ``answer()`` for extraction? + ``SdkClient.answer`` routes to Memanto's backend LLM + (``moorcheh.answer.generate``) and accepts an arbitrary ``question`` plus a + ``header_prompt``. We turn that LLM into an extraction engine: the header + frames it as a distiller, the question carries the session summary, and we + ask for a strict JSON array of typed memories. The agent's existing + memories are also retrieved as context, which helps the LLM avoid emitting + near-duplicates of what is already stored. +""" + +from __future__ import annotations + +import json +import re +from typing import Any + +from memanto.app.constants import VALID_MEMORY_TYPES +from memanto.app.utils.validation import InputLimits + +# Sourced from the SDK so this never drifts if Memanto adds a 14th type. +VALID_TYPES = set(VALID_MEMORY_TYPES) + +# Cap how much of a transcript we hand to the LLM. The bounty asks us to pass +# an "interaction summary"; we keep the most recent slice, which is where +# decisions usually land, and stay well under model context limits. +_MAX_SUMMARY_CHARS = 6000 + +EXTRACTION_HEADER = ( + "You are an engineering-memory distiller for a developer's coding agent. " + "You read a summary of a finished coding session and extract only the " + "DURABLE engineering signals worth remembering across future sessions: " + "architectural decisions, hard rules/conventions, coding preferences, " + "stable codebase facts, root-cause learnings, and explicit goals. " + "Ignore ephemeral chatter, greetings, and one-off task details. " + "Each item must stand alone without the surrounding conversation." +) + +# The footer deliberately steers the LLM toward the 8 types that durable +# engineering signals actually land in; ``_coerce_memory`` still accepts any +# of the SDK's valid types if the model picks one outside this list. +EXTRACTION_FOOTER = ( + "Respond with ONLY a JSON array (no prose, no code fences). Each element: " + '{"type": , "title": <<=80 chars>, "content": , "confidence": <0.0-1.0>}. ' + "Return [] if nothing durable was established." +) + + +def build_extraction_question(skill_name: str | None, summary: str) -> str: + """Compose the question handed to the backend LLM.""" + summary = (summary or "").strip()[:_MAX_SUMMARY_CHARS] + skill_line = ( + f"The session used the /{skill_name} skill.\n" if skill_name else "" + ) + return ( + f"{skill_line}" + "Extract the durable engineering memories from this session summary.\n\n" + "=== SESSION SUMMARY ===\n" + f"{summary}\n" + "=== END SUMMARY ===" + ) + + +def parse_llm_memories(answer_text: str) -> list[dict[str, Any]]: + """Parse the LLM's JSON-array answer into validated memory dicts. + + Tolerant of code fences and leading/trailing prose: we locate the first + JSON array in the text. Invalid items are dropped rather than raising, so + a partially-malformed answer still yields usable memories. + """ + if not answer_text: + return [] + + payload = _extract_json_array(answer_text) + if payload is None: + return [] + + try: + raw = json.loads(payload) + except (json.JSONDecodeError, ValueError): + return [] + if not isinstance(raw, list): + return [] + + out: list[dict[str, Any]] = [] + for item in raw: + mem = _coerce_memory(item) + if mem is not None: + out.append(mem) + return out + + +def heuristic_memories(summary: str) -> list[dict[str, Any]]: + """Lightweight fallback used only when the LLM path yields nothing. + + Splits the summary into sentences and classifies each by keyword. Kept + deliberately conservative — it exists so the hook degrades gracefully when + the network/LLM is unavailable, not to compete with the LLM path. + """ + summary = (summary or "").strip() + if not summary: + return [] + + out: list[dict[str, Any]] = [] + seen: set[str] = set() + for sentence in _split_sentences(summary): + s = _ROLE_PREFIX_RE.sub("", sentence).strip() + if len(s) < 12: + continue + mtype = _classify(s) + if mtype is None: + continue + key = s.lower()[:80] + if key in seen: + continue + seen.add(key) + out.append( + { + "type": mtype, + "title": s[:80], + "content": s, + "confidence": 0.6, + } + ) + if len(out) >= 12: + break + return out + + +# --------------------------------------------------------------------------- # +# Internals +# --------------------------------------------------------------------------- # + +_FENCE_RE = re.compile(r"```(?:json)?", re.IGNORECASE) + + +def _extract_json_array(text: str) -> str | None: + cleaned = _FENCE_RE.sub("", text).strip() + start = cleaned.find("[") + end = cleaned.rfind("]") + if start == -1 or end == -1 or end <= start: + return None + return cleaned[start : end + 1] + + +def _coerce_memory(item: Any) -> dict[str, Any] | None: + if not isinstance(item, dict): + return None + content = str(item.get("content") or "").strip() + if not content: + return None + + mtype = str(item.get("type") or "learning").strip().lower() + if mtype not in VALID_TYPES: + mtype = "learning" + + title = str(item.get("title") or content).strip()[:80] + + confidence = item.get("confidence", 0.85) + try: + confidence = float(confidence) + except (TypeError, ValueError): + confidence = 0.85 + confidence = min(max(confidence, 0.0), 1.0) + + return { + "type": mtype, + "title": title, + "content": content[: InputLimits.MAX_TEXT_LENGTH], + "confidence": confidence, + } + + +# Strip dialogue-format role prefixes ("user:", "assistant:", "human:", "claude:") +# that appear in raw transcript strings. Heuristic classification should see +# the content only, not the speaker label. +_ROLE_PREFIX_RE = re.compile( + r"^\s*(?:user|assistant|human|claude|system)\s*:\s*", re.IGNORECASE +) + +_SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?\n])\s+") + +# Ordered most-specific first; first match wins. +_HEURISTIC_RULES: list[tuple[str, tuple[str, ...]]] = [ + ("instruction", ("always", "never", "must ", "should ", "do not", "don't", "enforce", "convention")), + ("decision", ("decided", "chose", "will use", "going with", "we use", "picked", "selected", "switched to")), + ("preference", ("prefer", "favour", "favor", "instead of", "rather than", "like to")), + ("error", ("bug", "root cause", "regression", "failed because", "broke")), + ("goal", ("goal is", "aim to", "objective", "we want to")), +] + + +def _classify(sentence: str) -> str | None: + lower = sentence.lower() + for mtype, keywords in _HEURISTIC_RULES: + if any(kw in lower for kw in keywords): + return mtype + return None + + +def _split_sentences(text: str) -> list[str]: + # Treat bullet markers as sentence boundaries too. + normalized = re.sub(r"^\s*[-*•]\s*", "", text, flags=re.MULTILINE) + return _SENTENCE_SPLIT_RE.split(normalized) diff --git a/examples/claudecode-skills-memanto/memanto_skills/installer.py b/examples/claudecode-skills-memanto/memanto_skills/installer.py new file mode 100644 index 00000000..222cbb92 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/installer.py @@ -0,0 +1,208 @@ +"""Idempotent installer for the Claude Code lifecycle hooks. + +Registers three hooks in a Claude Code ``settings.json`` (project-local by +default, or ``~/.claude`` with ``--global``): + +* ``SessionStart`` -> hooks/session_start.py (inject profile once) +* ``UserPromptSubmit`` -> hooks/on_prompt.py (recall + inject per skill) +* ``Stop`` -> hooks/on_stop.py (async distill + store) + +The hook command uses the *current* Python interpreter (``sys.executable``) so +the hooks run with the same environment that has ``memanto`` installed. The +settings file is backed up before modification, and re-running cleanly replaces +our previously-installed entries (matched by the hooks directory path) without +touching anyone else's hooks. + +Hook JSON structure follows the official Claude Code hooks reference. +""" + +from __future__ import annotations + +import json +import sys +import time +from pathlib import Path +from typing import Any + +# Absolute path to the hooks directory shipped beside this package. +_HOOKS_DIR = Path(__file__).resolve().parent.parent / "hooks" + +# Marker used to recognise (and replace) the entries we manage. +_MARKER = str(_HOOKS_DIR) + + +def _hook_command(script: str) -> str: + return f'"{sys.executable}" "{_HOOKS_DIR / script}"' + + +def _managed_hooks() -> dict[str, list[dict[str, Any]]]: + """The hook entries this installer owns, in settings.json shape.""" + return { + "SessionStart": [ + { + "matcher": "startup|resume|clear", + "hooks": [ + { + "type": "command", + "command": _hook_command("session_start.py"), + "timeout": 15, + } + ], + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": _hook_command("on_prompt.py"), + "timeout": 15, + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": _hook_command("on_stop.py"), + "async": True, + "timeout": 60, + } + ] + } + ], + } + + +def _settings_path(global_scope: bool) -> Path: + base = Path.home() / ".claude" if global_scope else Path.cwd() / ".claude" + return base / "settings.json" + + +def _load(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + return {} + + +def _backup(path: Path) -> Path | None: + if not path.exists(): + return None + stamp = time.strftime("%Y%m%d-%H%M%S") + backup = path.with_suffix(f".json.bak-{stamp}") + backup.write_text(path.read_text(encoding="utf-8"), encoding="utf-8") + return backup + + +def _is_managed(hook: Any) -> bool: + return isinstance(hook, dict) and _MARKER in str(hook.get("command", "")) + + +def _strip_managed(entries: list[Any]) -> tuple[list[Any], int]: + """Remove our hook commands from ``entries``; returns (kept, removed_count). + + Stripping happens at the individual-hook level: if a user merged their own + hook into one of our entries, their hook survives and only ours is removed. + """ + kept: list[Any] = [] + removed = 0 + for entry in entries: + hooks = entry.get("hooks") if isinstance(entry, dict) else None + if not isinstance(hooks, list): + kept.append(entry) + continue + foreign = [h for h in hooks if not _is_managed(h)] + removed += len(hooks) - len(foreign) + if len(foreign) == len(hooks): + kept.append(entry) + elif foreign: + kept.append({**entry, "hooks": foreign}) + # An entry whose hooks were all ours is dropped entirely. + return kept, removed + + +def install_hooks(global_scope: bool = False) -> int: + """Register the three lifecycle hooks in Claude Code's ``settings.json``. + + Writes to the project-local settings by default, or the user-global file + when ``global_scope=True``. Idempotent: re-running replaces only our + previously-installed entries, preserves foreign hooks, and backs up the + settings file before any write. Returns 0 on success for CLI use. + """ + path = _settings_path(global_scope) + path.parent.mkdir(parents=True, exist_ok=True) + + settings = _load(path) + backup = _backup(path) + + # If "hooks" is missing or malformed (list/string from a hand-edited file), + # reset it to a dict rather than crashing with AttributeError on .get(). + existing_hooks = settings.get("hooks") + if not isinstance(existing_hooks, dict): + existing_hooks = {} + settings["hooks"] = existing_hooks + hooks_section: dict[str, Any] = existing_hooks + + for event, managed in _managed_hooks().items(): + existing = hooks_section.get(event, []) + if not isinstance(existing, list): + existing = [] + kept, _ = _strip_managed(existing) + hooks_section[event] = kept + managed + + path.write_text(json.dumps(settings, indent=2) + "\n", encoding="utf-8") + + print(f"✓ Installed Memanto skill-memory hooks into {path}") + if backup: + print(f" (backed up previous settings to {backup.name})") + print(" Hooks: SessionStart, UserPromptSubmit, Stop") + print(" Ensure MOORCHEH_API_KEY is set in this shell's environment.") + return 0 + + +def uninstall_hooks(global_scope: bool = False) -> int: + """Remove only the hooks this installer manages from ``settings.json``. + + Foreign hooks — including any the user merged into one of our entries — + are preserved. No-ops cleanly when nothing matches (no rewrite, no backup + churn). Returns 0 in all non-error paths so the CLI exits successfully. + """ + path = _settings_path(global_scope) + if not path.exists(): + print(f"Nothing to uninstall ({path} does not exist).") + return 0 + + settings = _load(path) + hooks_section = settings.get("hooks") + if not isinstance(hooks_section, dict): + # Nothing of ours could possibly be here if the shape is wrong. + print(f"No Memanto hooks found in {path}; nothing to remove.") + return 0 + removed = 0 + for event in _managed_hooks(): + entries = hooks_section.get(event) + if not isinstance(entries, list): + continue + kept, n = _strip_managed(entries) + removed += n + if kept: + hooks_section[event] = kept + else: + hooks_section.pop(event, None) + + if not removed: + print(f"No Memanto hooks found in {path}; nothing to remove.") + return 0 + + backup = _backup(path) + path.write_text(json.dumps(settings, indent=2) + "\n", encoding="utf-8") + print(f"✓ Removed {removed} Memanto hook(s) from {path}") + if backup: + print(f" (backed up previous settings to {backup.name})") + return 0 diff --git a/examples/claudecode-skills-memanto/memanto_skills/profile.py b/examples/claudecode-skills-memanto/memanto_skills/profile.py new file mode 100644 index 00000000..076aa947 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/profile.py @@ -0,0 +1,145 @@ +"""The Engineering Profile — recalled memories shaped for prompt injection. + +Memanto returns a list of memory dicts from ``recall``. ``MemoryProfile`` wraps +that list and renders a compact, token-frugal context block that Claude Code +injects via a hook's ``additionalContext``. The block is deterministic and +grouped by memory type so hard rules (instructions) read before softer ones +(preferences). +""" + +from __future__ import annotations + +import html +from dataclasses import dataclass +from typing import Any + +# Render order: hard constraints first, context last. Mirrors how a senior +# engineer would brief a teammate — rules before nice-to-knows. Covers all of +# Memanto's valid memory types; anything unrecognised renders after these with +# a capitalised fallback label. +_TYPE_ORDER = [ + "instruction", + "decision", + "commitment", + "preference", + "error", + "learning", + "fact", + "observation", + "relationship", + "artifact", + "event", + "context", + "goal", +] + +_TYPE_LABEL = { + "instruction": "Rules (always honour)", + "decision": "Decisions made", + "commitment": "Commitments", + "preference": "Preferences", + "error": "Known failure modes", + "learning": "Lessons learned", + "fact": "Codebase facts", + "observation": "Observed patterns", + "relationship": "Relationships", + "artifact": "Artifacts", + "event": "Events", + "context": "Background", + "goal": "Goals", +} + + +@dataclass +class MemoryProfile: + """A set of recalled memories, ready to format as an injectable block.""" + + memories: list[dict[str, Any]] + + def __bool__(self) -> bool: + return bool(self.memories) + + def __len__(self) -> int: + return len(self.memories) + + @classmethod + def from_recall( + cls, + result: dict[str, Any] | None, + min_similarity: float | None = None, + ) -> MemoryProfile: + """Build a profile from a Memanto ``recall`` response. + + Applies an optional similarity floor. Memanto exposes the score as + either ``score`` or ``similarity_score`` depending on the endpoint, so + we coalesce both. + """ + memories = list((result or {}).get("memories", []) or []) + if min_similarity is not None: + memories = [m for m in memories if _score(m) >= min_similarity] + return cls(memories=memories) + + def format_context_block(self, skill_name: str | None = None) -> str: + """Render the profile as a Markdown block for prompt injection. + + Returns an empty string when there is nothing to inject, so callers can + cheaply skip injection (``if block:``). + """ + if not self.memories: + return "" + + grouped: dict[str, list[dict[str, Any]]] = {} + for mem in self.memories: + grouped.setdefault((mem.get("type") or "context").lower(), []).append(mem) + + # Escape skill_name everywhere it appears in the injected block. The + # block is fed back into the model as system context, so a crafted + # value containing ``"`` or ``>`` would otherwise break attribute + # boundaries or close the wrapper early and reshape the prompt. + safe_skill = html.escape(skill_name, quote=True) if skill_name else "" + header_skill = f" for /{safe_skill}" if safe_skill else "" + lines = [ + f'', + f"Relevant engineering memory{header_skill} " + "(carried over from previous skill sessions — honour it, " + "do not re-ask the user):", + ] + + ordered_types = [t for t in _TYPE_ORDER if t in grouped] + ordered_types += [t for t in grouped if t not in _TYPE_ORDER] + + for mtype in ordered_types: + label = _TYPE_LABEL.get(mtype, mtype.capitalize()) + lines.append(f"\n{label}:") + for mem in grouped[mtype]: + lines.append(f" - {_render_memory(mem)}") + + lines.append("") + return "\n".join(lines) + + def to_plain_list(self) -> list[str]: + """Flat list of memory contents (for CLI/profile display).""" + return [_render_memory(m) for m in self.memories] + + +def _render_memory(mem: dict[str, Any]) -> str: + content = (mem.get("content") or mem.get("title") or "").strip() + confidence = mem.get("confidence") + if isinstance(confidence, (int, float)) and confidence < 0.6: + return f"{content} (tentative)" + return content + + +def _score(mem: dict[str, Any]) -> float: + raw = mem.get("score") + if raw is None: + raw = mem.get("similarity_score") + try: + return float(raw) if raw is not None else 1.0 + except (TypeError, ValueError): + return 1.0 + + +def _skill_attr(skill_name: str | None) -> str: + """Render the ``skill="..."`` attribute. Caller must pass an HTML-escaped value.""" + return f' skill="{skill_name}"' if skill_name else "" diff --git a/examples/claudecode-skills-memanto/memanto_skills/skill_map.py b/examples/claudecode-skills-memanto/memanto_skills/skill_map.py new file mode 100644 index 00000000..5187a7c8 --- /dev/null +++ b/examples/claudecode-skills-memanto/memanto_skills/skill_map.py @@ -0,0 +1,116 @@ +"""Skill-aware routing for the mattpocock/skills catalogue. + +Each skill cares about a different slice of the engineering profile. When the +user invokes ``/tdd`` we bias recall toward testing conventions; ``/grill-with-docs`` +biases toward architecture and domain terminology. This module maps the real +mattpocock skill names to: + + * a natural-language ``query`` (semantic bias for what to recall), and + * ``tags`` to stamp on memories captured from that skill. + +Recall is intentionally *not* hard-filtered by memory type — live testing +showed that combining a type filter with Memanto's semantic threshold can +return nothing even when matching-typed memories exist. The ``query`` provides +the skill-awareness instead, and Memanto returns relevant results only. + +Skill names are taken from the mattpocock/skills repository +(github.com/mattpocock/skills): engineering skills (tdd, diagnose, +grill-with-docs, triage, to-prd, to-issues, improve-codebase-architecture, +zoom-out, prototype) and productivity skills (grill-me, caveman, handoff, +write-a-skill). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class SkillRoute: + """How to recall context for, and tag memories from, a given skill.""" + + query: str + tags: list[str] = field(default_factory=list) + + +# Curated routes for the real mattpocock skills. The query is phrased as +# natural language because Memanto recall is semantic, not keyword-based. +_ROUTES: dict[str, SkillRoute] = { + "tdd": SkillRoute( + query="testing conventions, test framework, mocking strategy, and test structure preferences", + tags=["testing", "tdd"], + ), + "diagnose": SkillRoute( + query="known failure modes, past bugs, root causes, and debugging conventions", + tags=["debugging", "diagnose"], + ), + "grill-with-docs": SkillRoute( + query="architectural decisions, domain model, terminology, and documented design constraints", + tags=["architecture", "domain", "docs"], + ), + "grill-me": SkillRoute( + query="prior plans, resolved questions, decisions already made, and stated goals", + tags=["planning"], + ), + "improve-codebase-architecture": SkillRoute( + query="architecture decisions, module boundaries, and refactoring direction", + tags=["architecture", "refactoring"], + ), + "zoom-out": SkillRoute( + query="system-level architecture, component relationships, and high-level design", + tags=["architecture"], + ), + "prototype": SkillRoute( + query="prototyping preferences, throwaway-code conventions, and stack choices", + tags=["prototyping"], + ), + "triage": SkillRoute( + query="issue triage conventions, priority rules, and labelling decisions", + tags=["triage", "issues"], + ), + "to-prd": SkillRoute( + query="product requirements conventions and how issues should be written", + tags=["planning", "prd"], + ), + "to-issues": SkillRoute( + query="how work is sliced into issues and tracker conventions", + tags=["planning", "issues"], + ), + "handoff": SkillRoute( + query="recent decisions, open threads, and session context to carry forward", + tags=["handoff"], + ), +} + +# Generic fallback for any skill not explicitly routed (covers caveman, +# write-a-skill, custom skills, and bare prompts). +_DEFAULT_ROUTE = SkillRoute( + query="engineering decisions, coding preferences, conventions, and codebase facts", + tags=[], +) + + +def normalize_skill(skill_name: str | None) -> str | None: + """Canonicalise a skill name for routing and tagging. + + Strips whitespace, lowercases, and removes any leading ``/`` so that + ``"/TDD "``, ``"tdd"``, and ``" TDD"`` all map to the same skill identity. + Returns ``None`` for empty input. + """ + if not skill_name: + return None + cleaned = skill_name.strip().lower().lstrip("/") + return cleaned or None + + +def route_for(skill_name: str | None) -> SkillRoute: + """Return the recall route for a skill, falling back to a generic one.""" + normalized = normalize_skill(skill_name) + if normalized is None: + return _DEFAULT_ROUTE + return _ROUTES.get(normalized, _DEFAULT_ROUTE) + + +def known_skills() -> list[str]: + """Names of skills with curated routes (for docs/tests).""" + return sorted(_ROUTES) diff --git a/examples/claudecode-skills-memanto/pyproject.toml b/examples/claudecode-skills-memanto/pyproject.toml new file mode 100644 index 00000000..b12d2afa --- /dev/null +++ b/examples/claudecode-skills-memanto/pyproject.toml @@ -0,0 +1,95 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "claudecode-skills-memanto" +version = "0.1.0" +description = "Cross-session engineering memory for Claude Code + mattpocock/skills, powered by Memanto. Real lifecycle hooks + LLM distillation = zero repeated instructions." +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.10,<4" +authors = [ + { name = "Memanto", email = "info@memanto.ai" } +] +keywords = [ + "memanto", + "moorcheh", + "claude-code", + "claude", + "anthropic", + "skills", + "mattpocock", + "agent memory", + "semantic memory", + "hooks", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "memanto>=0.1.0", + "python-dotenv>=1.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.2.0,<9", + "pytest-mock>=3.12.0,<4", + "ruff>=0.14.6,<0.15", + "mypy>=1.10.0,<2", +] + +[project.scripts] +memanto-skills = "memanto_skills.cli:main" + +[project.urls] +Homepage = "https://www.memanto.ai" +Documentation = "https://docs.memanto.ai" +Repository = "https://github.com/moorcheh-ai/memanto" +"Bug Tracker" = "https://github.com/moorcheh-ai/memanto/issues" + +[tool.hatch.build.targets.wheel] +packages = ["memanto_skills"] + +[tool.ruff] +target-version = "py310" +line-length = 88 + +[tool.ruff.lint] +select = ["E", "W", "F", "I", "B", "C4", "UP"] +ignore = ["B904", "E501", "B008", "C901"] + +[tool.pytest.ini_options] +testpaths = ["tests"] + +# Docstring-coverage configuration. Tests, dunder methods, private helpers, and +# the `main()` entry points of one-shot scripts are intentionally excluded — +# their behaviour is documented by name, by test cases, or by the module +# docstring directly above them. +[tool.interrogate] +ignore-init-method = true +ignore-init-module = true +ignore-magic = true +ignore-private = true +ignore-semiprivate = true +ignore-property-decorators = true +ignore-module = false +ignore-nested-functions = true +ignore-nested-classes = true +ignore-setters = true +fail-under = 80 +exclude = ["tests", "demo_session_1.py", "demo_session_2.py", "demo_session_3.py", "install.py"] +verbose = 0 +quiet = false +color = true diff --git a/examples/claudecode-skills-memanto/skills/memanto-companion/SKILL.md b/examples/claudecode-skills-memanto/skills/memanto-companion/SKILL.md new file mode 100644 index 00000000..170a1b03 --- /dev/null +++ b/examples/claudecode-skills-memanto/skills/memanto-companion/SKILL.md @@ -0,0 +1,68 @@ +--- +name: memanto-companion +description: Inspect and manage the cross-session engineering memory that Memanto maintains for your Claude Code skills. Use when the user asks what Memanto remembers, wants to see their engineering profile, manually recall context for a skill, or store a decision. The automatic lifecycle hooks handle capture/injection on their own — this skill is the manual control surface. +--- + +# Memanto Companion + +Cross-session engineering memory for Claude Code skills runs automatically via +lifecycle hooks (`SessionStart`, `UserPromptSubmit`, `Stop`). This skill is the +**manual control surface** for when the user wants to inspect or steer it. + +All operations go through the `memanto-skills` CLI. Requires `MOORCHEH_API_KEY` +in the environment. + +## When the user wants to SEE what is remembered + +Run: + +```bash +memanto-skills profile +``` + +Then summarise the returned engineering profile for the user in plain language, +grouped by decisions, rules, and preferences. + +## When the user wants context for a specific skill + +If they ask "what do you remember about testing / TDD?" or want to preview what +would be injected before a skill, run: + +```bash +memanto-skills recall --hint "" +``` + +`` is a mattpocock skill name such as `tdd`, `grill-with-docs`, +`diagnose`, or `handoff`. Read the returned `` block and +honour it — these are decisions from past sessions. + +## When the user states a durable decision to remember + +If the user explicitly says "remember that we …" or makes an architectural +decision they want persisted immediately (rather than waiting for the automatic +`Stop` hook), distill and store it: + +```bash +memanto-skills store "" +``` + +Memanto's backend LLM extracts the typed memories and persists them. Report +back which memories were stored. + +## When the user wants to verify the setup + +```bash +memanto-skills doctor +``` + +This checks the API key, agent id, and live connectivity. If it fails, the +likely cause is a missing `MOORCHEH_API_KEY` — point them to +https://console.moorcheh.ai/api-keys. + +## Important + +- Never invent memories. Only report what the CLI returns. +- Treat `instruction` memories as hard rules and `decision` memories as settled + choices; do not re-litigate them unless the user asks. +- The hooks already inject context automatically — only run `recall` manually + when the user explicitly wants to inspect or preview it. diff --git a/examples/claudecode-skills-memanto/tests/__init__.py b/examples/claudecode-skills-memanto/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/claudecode-skills-memanto/tests/conftest.py b/examples/claudecode-skills-memanto/tests/conftest.py new file mode 100644 index 00000000..3e080357 --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/conftest.py @@ -0,0 +1,126 @@ +"""Shared test fixtures: a fake SdkClient so tests need no network or API key.""" + +from __future__ import annotations + +import json +from typing import Any + +import pytest +from memanto.app.utils.errors import AgentNotFoundError + +from memanto_skills.config import SkillsConfig + + +class FakeSdkClient: + """Records calls and returns canned responses, mimicking SdkClient. + + ``agent_exists=False`` simulates a first run: ``activate_agent`` raises + ``AgentNotFoundError`` until ``create_agent`` is called, matching the real + SDK's behaviour (verified against the live API). + """ + + def __init__( + self, + recall_memories: list[dict[str, Any]] | None = None, + answer_text: str = "[]", + agent_exists: bool = True, + ) -> None: + self.recall_memories = recall_memories or [] + self.answer_text = answer_text + self.agent_exists = agent_exists + self.created: list[str] = [] + self.activated: list[str] = [] + self.remembered: list[dict[str, Any]] = [] + self.batch_calls: list[list[dict[str, Any]]] = [] + self.answer_calls: list[dict[str, Any]] = [] + self.recall_calls: list[dict[str, Any]] = [] + + # lifecycle + def create_agent(self, agent_id: str, pattern: str = "tool", **_: Any) -> dict: + self.created.append(agent_id) + self.agent_exists = True + return {"agent_id": agent_id} + + def activate_agent(self, agent_id: str, duration_hours: int | None = None) -> dict: + if not self.agent_exists: + raise AgentNotFoundError(f"Agent '{agent_id}' not found") + self.activated.append(agent_id) + return {"agent_id": agent_id} + + # recall family + def recall(self, agent_id: str, query: str, **kwargs: Any) -> dict: + self.recall_calls.append({"query": query, **kwargs}) + return {"memories": list(self.recall_memories)} + + def recall_recent(self, agent_id: str, **kwargs: Any) -> dict: + return {"memories": list(self.recall_memories)} + + # extraction + persistence + def answer(self, agent_id: str, question: str, **kwargs: Any) -> dict: + self.answer_calls.append({"question": question, **kwargs}) + return {"answer": self.answer_text, "sources": []} + + def batch_remember(self, agent_id: str, memories: list[dict[str, Any]]) -> dict: + self.batch_calls.append(memories) + return {"successful": len(memories), "failed": 0, "results": []} + + def remember(self, agent_id: str, **kwargs: Any) -> dict: + self.remembered.append(kwargs) + return {"memory_id": f"mem-{len(self.remembered)}"} + + +@pytest.fixture +def config() -> SkillsConfig: + return SkillsConfig( + api_key="mch_test_key", + agent_id="test-agent", + recall_limit=5, + min_similarity=None, + ) + + +@pytest.fixture +def sample_memories() -> list[dict[str, Any]]: + return [ + { + "type": "instruction", + "title": "Always Vitest", + "content": "Always use Vitest for tests, never Jest.", + "confidence": 0.95, + "score": 0.9, + }, + { + "type": "decision", + "title": "CQRS for orders", + "content": "Use CQRS for the Order domain.", + "confidence": 0.9, + "score": 0.8, + }, + { + "type": "preference", + "title": "AAA tests", + "content": "Structure tests with Arrange-Act-Assert.", + "confidence": 0.4, + "score": 0.5, + }, + ] + + +@pytest.fixture +def llm_answer_json() -> str: + return json.dumps( + [ + { + "type": "decision", + "title": "Use CQRS", + "content": "Use CQRS for the Order domain.", + "confidence": 0.9, + }, + { + "type": "instruction", + "title": "Cart != Order", + "content": "Never conflate Cart and Order terminology.", + "confidence": 0.95, + }, + ] + ) diff --git a/examples/claudecode-skills-memanto/tests/test_client.py b/examples/claudecode-skills-memanto/tests/test_client.py new file mode 100644 index 00000000..5bd54ebc --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_client.py @@ -0,0 +1,161 @@ +"""Tests for SkillMemory orchestration (recall, distill, persist) with a fake SDK.""" + +from __future__ import annotations + +from typing import Any + +from memanto_skills.client import SOURCE_TAG, SkillMemory +from memanto_skills.config import SkillsConfig + +from .conftest import FakeSdkClient + + +def _mem(config: SkillsConfig, client: FakeSdkClient) -> SkillMemory: + return SkillMemory(config=config, client=client) + + +class TestSetup: + def test_existing_agent_activates_without_create(self, config: SkillsConfig) -> None: + # Common case: agent already exists — exactly one network call. + fake = FakeSdkClient() + mem = _mem(config, fake) + mem.setup() + assert fake.created == [] + assert fake.activated == ["test-agent"] + + def test_missing_agent_is_created_then_activated(self, config: SkillsConfig) -> None: + fake = FakeSdkClient(agent_exists=False) + mem = _mem(config, fake) + mem.setup() + assert fake.created == ["test-agent"] + assert fake.activated == ["test-agent"] + + def test_setup_is_idempotent(self, config: SkillsConfig) -> None: + fake = FakeSdkClient() + mem = _mem(config, fake) + mem.setup() + mem.setup() + # Activated at most once despite two setup calls. + assert fake.activated == ["test-agent"] + + +class TestRecallForSkill: + def test_recall_uses_skill_route( + self, config: SkillsConfig, sample_memories: list[dict[str, Any]] + ) -> None: + fake = FakeSdkClient(recall_memories=sample_memories) + mem = _mem(config, fake) + profile = mem.recall_for_skill("tdd", task_hint="auth module") + assert len(profile) == 3 + # Query is skill-routed and includes the task hint. + call = fake.recall_calls[0] + assert "testing" in call["query"] + assert "auth module" in call["query"] + # No hard type filter is passed (live-verified to over-constrain). + assert "type" not in call or call["type"] is None + + +class TestDistillAndStore: + def test_llm_path_stores_extracted( + self, config: SkillsConfig, llm_answer_json: str + ) -> None: + fake = FakeSdkClient(answer_text=llm_answer_json) + mem = _mem(config, fake) + stored = mem.distill_and_store("grill-with-docs", "long transcript ...") + assert len(stored) == 2 + # Persisted via batch with proper tags + source. + assert len(fake.batch_calls) == 1 + persisted = fake.batch_calls[0] + assert all(m["source"] == SOURCE_TAG for m in persisted) + assert all(SOURCE_TAG in m["tags"] for m in persisted) + assert all("skill:grill-with-docs" in m["tags"] for m in persisted) + assert all(m["provenance"] == "inferred" for m in persisted) + + def test_falls_back_to_heuristic_when_llm_empty(self, config: SkillsConfig) -> None: + fake = FakeSdkClient(answer_text="[]") # LLM yields nothing + mem = _mem(config, fake) + stored = mem.distill_and_store( + "tdd", "We decided to use pytest for all tests." + ) + assert len(stored) >= 1 + assert any(m["type"] == "decision" for m in stored) + + def test_empty_summary_stores_nothing(self, config: SkillsConfig) -> None: + fake = FakeSdkClient() + mem = _mem(config, fake) + assert mem.distill_and_store("tdd", " ") == [] + assert fake.batch_calls == [] + + def test_batch_failure_falls_back_to_individual( + self, config: SkillsConfig, llm_answer_json: str + ) -> None: + fake = FakeSdkClient(answer_text=llm_answer_json) + + def boom(*_: Any, **__: Any) -> dict: + raise RuntimeError("batch down") + + fake.batch_remember = boom # type: ignore[assignment] + mem = _mem(config, fake) + stored = mem.distill_and_store("tdd", "transcript") + assert len(stored) == 2 + assert len(fake.remembered) == 2 # individual fallback used + + def test_llm_failure_falls_back_to_heuristic(self, config: SkillsConfig) -> None: + fake = FakeSdkClient() + + def boom(*_: Any, **__: Any) -> dict: + raise RuntimeError("llm down") + + fake.answer = boom # type: ignore[assignment] + mem = _mem(config, fake) + stored = mem.distill_and_store("tdd", "We chose Redis for caching.") + assert any(m["type"] == "decision" for m in stored) + + def test_returns_only_successfully_persisted_when_individuals_fail( + self, config: SkillsConfig, llm_answer_json: str + ) -> None: + # batch_remember fails; per-memory remember fails for the FIRST item + # only. The returned list must reflect reality: 1 stored, not 2. + fake = FakeSdkClient(answer_text=llm_answer_json) + + def batch_boom(*_: Any, **__: Any) -> dict: + raise RuntimeError("batch down") + + calls = {"n": 0} + + def remember_partial(*_: Any, **__: Any) -> dict: + calls["n"] += 1 + if calls["n"] == 1: + raise RuntimeError("first write failed") + return {"memory_id": "ok"} + + fake.batch_remember = batch_boom # type: ignore[assignment] + fake.remember = remember_partial # type: ignore[assignment] + mem = _mem(config, fake) + stored = mem.distill_and_store("tdd", "transcript") + # 2 attempted, 1 succeeded — the return value must agree. + assert calls["n"] == 2 + assert len(stored) == 1 + + def test_tags_use_normalized_skill_name( + self, config: SkillsConfig, llm_answer_json: str + ) -> None: + # "/TDD " must be canonicalised to "skill:tdd" so the same skill never + # splits into two tag identities. + fake = FakeSdkClient(answer_text=llm_answer_json) + mem = _mem(config, fake) + mem.distill_and_store("/TDD ", "transcript") + for persisted in fake.batch_calls[0]: + assert "skill:tdd" in persisted["tags"] + assert "skill:/TDD " not in persisted["tags"] + assert "skill:/tdd " not in persisted["tags"] + + +class TestProfileBlock: + def test_profile_block_renders( + self, config: SkillsConfig, sample_memories: list[dict[str, Any]] + ) -> None: + fake = FakeSdkClient(recall_memories=sample_memories) + mem = _mem(config, fake) + block = mem.profile_block() + assert "engineering-profile" in block diff --git a/examples/claudecode-skills-memanto/tests/test_extractor.py b/examples/claudecode-skills-memanto/tests/test_extractor.py new file mode 100644 index 00000000..d542fc8f --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_extractor.py @@ -0,0 +1,98 @@ +"""Tests for LLM-answer parsing and the heuristic fallback.""" + +from __future__ import annotations + +from memanto_skills import extractor + + +class TestParseLlmMemories: + def test_parses_clean_json_array(self, llm_answer_json: str) -> None: + mems = extractor.parse_llm_memories(llm_answer_json) + assert len(mems) == 2 + assert mems[0]["type"] == "decision" + assert mems[1]["title"] == "Cart != Order" + + def test_strips_code_fences_and_prose(self) -> None: + text = ( + "Here are the memories:\n```json\n" + '[{"type":"fact","title":"Py","content":"Project uses Python 3.12."}]' + "\n```\nHope that helps!" + ) + mems = extractor.parse_llm_memories(text) + assert len(mems) == 1 + assert mems[0]["type"] == "fact" + + def test_invalid_type_coerced_to_learning(self) -> None: + mems = extractor.parse_llm_memories( + '[{"type":"nonsense","content":"something durable"}]' + ) + assert mems[0]["type"] == "learning" + + def test_drops_items_without_content(self) -> None: + mems = extractor.parse_llm_memories( + '[{"type":"fact","title":"x"},{"type":"fact","content":"keep me"}]' + ) + assert len(mems) == 1 + assert mems[0]["content"] == "keep me" + + def test_confidence_clamped_and_defaulted(self) -> None: + mems = extractor.parse_llm_memories( + '[{"content":"a","confidence":5},{"content":"b","confidence":"bad"}]' + ) + assert mems[0]["confidence"] == 1.0 + assert mems[1]["confidence"] == 0.85 + + def test_empty_or_garbage_returns_empty(self) -> None: + assert extractor.parse_llm_memories("") == [] + assert extractor.parse_llm_memories("no json here") == [] + assert extractor.parse_llm_memories("[]") == [] + + def test_title_truncated_to_80(self) -> None: + long = "x" * 200 + mems = extractor.parse_llm_memories(f'[{{"content":"{long}"}}]') + assert len(mems[0]["title"]) <= 80 + + +class TestHeuristicFallback: + def test_classifies_instruction(self) -> None: + mems = extractor.heuristic_memories("Always use type hints in this repo.") + assert any(m["type"] == "instruction" for m in mems) + + def test_classifies_decision(self) -> None: + mems = extractor.heuristic_memories("We decided to use Postgres for storage.") + assert any(m["type"] == "decision" for m in mems) + + def test_ignores_short_and_neutral_sentences(self) -> None: + assert extractor.heuristic_memories("ok. hi. done.") == [] + + def test_dedupes(self) -> None: + text = "We chose React. We chose React. We chose React." + mems = extractor.heuristic_memories(text) + assert len(mems) == 1 + + def test_strips_dialogue_role_prefixes(self) -> None: + # Role-label lines with no durable signal should produce nothing. + garbage = extractor.heuristic_memories( + "assistant: Understood, let me summarise that for you." + ) + assert garbage == [] + + # Real signal after a role prefix must still be extracted, and the + # stored content must not include the "user:" prefix itself. + signal = extractor.heuristic_memories( + "user: We always use TypeScript, never plain JavaScript." + ) + assert any(m["type"] == "instruction" for m in signal) + for m in signal: + assert not m["content"].lower().startswith("user:") + + +class TestExtractionQuestion: + def test_includes_skill_and_summary(self) -> None: + q = extractor.build_extraction_question("tdd", "Decided to use Vitest.") + assert "/tdd" in q + assert "Vitest" in q + + def test_truncates_long_summary(self) -> None: + q = extractor.build_extraction_question(None, "x" * 99999) + assert len(q) < 99999 diff --git a/examples/claudecode-skills-memanto/tests/test_hooks.py b/examples/claudecode-skills-memanto/tests/test_hooks.py new file mode 100644 index 00000000..8e78145a --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_hooks.py @@ -0,0 +1,177 @@ +"""Tests for the shared hook plumbing (schema-tolerant transcript reading).""" + +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path + +_HOOKS_DIR = Path(__file__).resolve().parent.parent / "hooks" + + +def _load_common(): + spec = importlib.util.spec_from_file_location( + "_memanto_hook_common", _HOOKS_DIR / "_common.py" + ) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + sys.modules["_memanto_hook_common"] = module + spec.loader.exec_module(module) + return module + + +common = _load_common() + + +class TestDetectSkill: + def test_detects_leading_slash_skill(self) -> None: + assert common.detect_skill("/tdd write tests for auth") == "tdd" + + def test_detects_hyphenated_skill(self) -> None: + assert common.detect_skill("please run /grill-with-docs now") == "grill-with-docs" + + def test_returns_none_without_skill(self) -> None: + assert common.detect_skill("just a normal prompt") is None + + def test_file_paths_are_not_skills(self) -> None: + # Path-like tokens must not be mistaken for skill invocations. + assert common.detect_skill("/usr/local/bin has the binary") is None + assert common.detect_skill("look at /tmp/foo.txt please") is None + + def test_skill_followed_by_path_argument(self) -> None: + assert common.detect_skill("/tdd write tests for src/auth.py") == "tdd" + + def test_handles_empty(self) -> None: + assert common.detect_skill("") is None + assert common.detect_skill(None) is None + + +class TestReadHookInput: + def test_parses_valid_json(self, monkeypatch) -> None: + import io + + monkeypatch.setattr("sys.stdin", io.StringIO('{"prompt":"hi"}')) + assert common.read_hook_input() == {"prompt": "hi"} + + def test_malformed_returns_empty(self, monkeypatch) -> None: + import io + + monkeypatch.setattr("sys.stdin", io.StringIO("not json")) + assert common.read_hook_input() == {} + + +class TestReadTranscriptText: + def test_missing_path_returns_empty(self) -> None: + assert common.read_transcript_text(None) == "" + assert common.read_transcript_text("/no/such/file.jsonl") == "" + + def test_reads_string_content(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + lines = [ + {"message": {"role": "user", "content": "/tdd add tests"}}, + {"message": {"role": "assistant", "content": "Using Vitest."}}, + ] + f.write_text("\n".join(json.dumps(x) for x in lines), encoding="utf-8") + text = common.read_transcript_text(str(f)) + assert "Vitest" in text + assert "tdd" in text + + def test_reads_block_content(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + entry = { + "message": { + "role": "assistant", + "content": [ + {"type": "text", "text": "We use CQRS."}, + {"type": "tool_use", "name": "Bash"}, + ], + } + } + f.write_text(json.dumps(entry), encoding="utf-8") + text = common.read_transcript_text(str(f)) + assert "CQRS" in text + assert "Bash" not in text # tool blocks are skipped + + def test_skips_malformed_lines(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + f.write_text( + 'garbage\n{"message":{"role":"user","content":"real line"}}\n', + encoding="utf-8", + ) + assert "real line" in common.read_transcript_text(str(f)) + + def test_truncates_to_max_chars(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + big = {"message": {"role": "user", "content": "x" * 50000}} + f.write_text(json.dumps(big), encoding="utf-8") + assert len(common.read_transcript_text(str(f), max_chars=1000)) <= 1000 + + +class TestReadTranscriptForDistillation: + def test_returns_skill_and_text_on_short_transcript(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + lines = [ + {"message": {"role": "user", "content": "/tdd add tests for auth"}}, + {"message": {"role": "assistant", "content": "Using Vitest."}}, + ] + f.write_text("\n".join(json.dumps(x) for x in lines), encoding="utf-8") + skill, text = common.read_transcript_for_distillation(str(f)) + assert skill == "tdd" + assert "Vitest" in text + + def test_recovers_skill_when_opener_is_outside_tail(self, tmp_path: Path) -> None: + """Regression: long sessions must still tag with the opening skill. + + The user invokes ``/grill-with-docs`` at the very start. After many + intermediate messages the opening prompt falls outside ``max_chars``, + but the persisted memories must still be tagged ``skill:grill-with-docs``, + not ``skill:unknown``. + """ + f = tmp_path / "t.jsonl" + opener = { + "message": { + "role": "user", + "content": "/grill-with-docs let's nail down the orders service", + } + } + # Filler dominates the tail and pushes the opener outside max_chars. + filler = [ + {"message": {"role": "assistant", "content": "x" * 500}} + for _ in range(40) + ] + decision = { + "message": { + "role": "user", + "content": "We decided on CQRS for the Order domain.", + } + } + all_lines = [opener, *filler, decision] + f.write_text( + "\n".join(json.dumps(x) for x in all_lines), encoding="utf-8" + ) + + skill, text = common.read_transcript_for_distillation( + str(f), max_chars=2000 + ) + + # Skill is recovered from BEFORE the truncation window. + assert skill == "grill-with-docs" + # The recent decision is in the truncated tail. + assert "CQRS" in text + # The opener has been truncated out — proving skill detection had to + # scan beyond the returned text. + assert "/grill-with-docs" not in text + assert len(text) <= 2000 + + def test_missing_path_returns_none_and_empty(self) -> None: + assert common.read_transcript_for_distillation(None) == (None, "") + assert common.read_transcript_for_distillation("/no/such/file") == (None, "") + + def test_no_skill_in_transcript_returns_none_skill(self, tmp_path: Path) -> None: + f = tmp_path / "t.jsonl" + entry = {"message": {"role": "user", "content": "just a chat, no skill"}} + f.write_text(json.dumps(entry), encoding="utf-8") + skill, text = common.read_transcript_for_distillation(str(f)) + assert skill is None + assert "just a chat" in text diff --git a/examples/claudecode-skills-memanto/tests/test_installer.py b/examples/claudecode-skills-memanto/tests/test_installer.py new file mode 100644 index 00000000..ae15964e --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_installer.py @@ -0,0 +1,131 @@ +"""Tests for the settings.json hook installer (idempotency + hook preservation).""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from memanto_skills import installer + +_EVENTS = ("SessionStart", "UserPromptSubmit", "Stop") + + +@pytest.fixture +def settings_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Run the installer against an isolated project directory.""" + monkeypatch.chdir(tmp_path) + return tmp_path / ".claude" / "settings.json" + + +def _load(path: Path) -> dict: + return json.loads(path.read_text(encoding="utf-8")) + + +def _managed_count(settings: dict) -> int: + return sum( + 1 + for entries in settings.get("hooks", {}).values() + for entry in entries + for hook in entry.get("hooks", []) + if installer._MARKER in str(hook.get("command", "")) + ) + + +def test_install_registers_all_three_events(settings_path: Path) -> None: + assert installer.install_hooks() == 0 + settings = _load(settings_path) + for event in _EVENTS: + assert event in settings["hooks"], f"missing {event}" + assert _managed_count(settings) == 3 + + +def test_reinstall_is_idempotent(settings_path: Path) -> None: + installer.install_hooks() + installer.install_hooks() + assert _managed_count(_load(settings_path)) == 3 # no duplicates + + +def test_install_preserves_existing_user_hooks(settings_path: Path) -> None: + settings_path.parent.mkdir(parents=True) + user_hook = {"type": "command", "command": "echo user-hook"} + settings_path.write_text( + json.dumps({"hooks": {"UserPromptSubmit": [{"hooks": [user_hook]}]}}), + encoding="utf-8", + ) + + installer.install_hooks() + + entries = _load(settings_path)["hooks"]["UserPromptSubmit"] + commands = [h["command"] for e in entries for h in e["hooks"]] + assert "echo user-hook" in commands + assert any(installer._MARKER in c for c in commands) + + +def test_strip_preserves_user_hook_inside_mixed_entry(settings_path: Path) -> None: + """A user hook merged into one of our entries must survive a re-install.""" + installer.install_hooks() + settings = _load(settings_path) + user_hook = {"type": "command", "command": "echo merged-by-user"} + settings["hooks"]["Stop"][0]["hooks"].append(user_hook) + settings_path.write_text(json.dumps(settings), encoding="utf-8") + + installer.install_hooks() + + entries = _load(settings_path)["hooks"]["Stop"] + commands = [h["command"] for e in entries for h in e["hooks"]] + assert "echo merged-by-user" in commands + assert sum(installer._MARKER in c for c in commands) == 1 # ours, once + + +def test_uninstall_removes_only_managed_hooks(settings_path: Path) -> None: + settings_path.parent.mkdir(parents=True) + user_hook = {"type": "command", "command": "echo keep-me"} + settings_path.write_text( + json.dumps({"hooks": {"Stop": [{"hooks": [user_hook]}]}}), + encoding="utf-8", + ) + installer.install_hooks() + + assert installer.uninstall_hooks() == 0 + + settings = _load(settings_path) + assert _managed_count(settings) == 0 + commands = [ + h["command"] + for e in settings["hooks"]["Stop"] + for h in e["hooks"] + ] + assert commands == ["echo keep-me"] + + +def test_uninstall_when_nothing_installed_is_a_noop(settings_path: Path) -> None: + settings_path.parent.mkdir(parents=True) + original = json.dumps({"hooks": {}}) + settings_path.write_text(original, encoding="utf-8") + + assert installer.uninstall_hooks() == 0 + # File untouched (no rewrite, no backup churn) when there was nothing to remove. + assert settings_path.read_text(encoding="utf-8") == original + + +def test_install_recovers_from_malformed_hooks_field(settings_path: Path) -> None: + """A hand-edited settings.json with ``hooks`` as a list must not crash.""" + settings_path.parent.mkdir(parents=True) + settings_path.write_text( + json.dumps({"hooks": ["not", "a", "dict"]}), encoding="utf-8" + ) + + assert installer.install_hooks() == 0 + settings = _load(settings_path) + assert isinstance(settings["hooks"], dict) + assert _managed_count(settings) == 3 + + +def test_uninstall_recovers_from_malformed_hooks_field(settings_path: Path) -> None: + settings_path.parent.mkdir(parents=True) + settings_path.write_text( + json.dumps({"hooks": "totally wrong"}), encoding="utf-8" + ) + assert installer.uninstall_hooks() == 0 diff --git a/examples/claudecode-skills-memanto/tests/test_profile.py b/examples/claudecode-skills-memanto/tests/test_profile.py new file mode 100644 index 00000000..3d575830 --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_profile.py @@ -0,0 +1,71 @@ +"""Tests for the MemoryProfile context-block renderer.""" + +from __future__ import annotations + +from typing import Any + +from memanto_skills.profile import MemoryProfile + + +class TestFromRecall: + def test_empty_result(self) -> None: + assert not MemoryProfile.from_recall(None) + assert not MemoryProfile.from_recall({}) + assert len(MemoryProfile.from_recall({"memories": []})) == 0 + + def test_similarity_floor_filters(self, sample_memories: list[dict[str, Any]]) -> None: + profile = MemoryProfile.from_recall( + {"memories": sample_memories}, min_similarity=0.85 + ) + # Only the 0.9-scored instruction clears a 0.85 floor. + assert len(profile) == 1 + assert profile.memories[0]["type"] == "instruction" + + def test_no_floor_keeps_all(self, sample_memories: list[dict[str, Any]]) -> None: + profile = MemoryProfile.from_recall({"memories": sample_memories}) + assert len(profile) == 3 + + +class TestFormatContextBlock: + def test_empty_profile_renders_empty_string(self) -> None: + assert MemoryProfile([]).format_context_block() == "" + + def test_block_wraps_in_engineering_profile_tag( + self, sample_memories: list[dict[str, Any]] + ) -> None: + block = MemoryProfile(sample_memories).format_context_block(skill_name="tdd") + assert block.startswith('") + + def test_instructions_render_before_preferences( + self, sample_memories: list[dict[str, Any]] + ) -> None: + block = MemoryProfile(sample_memories).format_context_block() + assert block.index("Rules") < block.index("Preferences") + + def test_low_confidence_marked_tentative(self) -> None: + block = MemoryProfile( + [{"type": "fact", "content": "maybe true", "confidence": 0.3}] + ).format_context_block() + assert "(tentative)" in block + + def test_content_present(self, sample_memories: list[dict[str, Any]]) -> None: + block = MemoryProfile(sample_memories).format_context_block() + assert "Always use Vitest" in block + assert "CQRS" in block + + def test_skill_name_is_html_escaped_in_attribute( + self, sample_memories: list[dict[str, Any]] + ) -> None: + # A crafted skill name must NOT break the attribute boundary or close + # the wrapper early. The raw injected payload must not appear verbatim. + evil = 'tdd">" not in block + assert "" in block + # Wrapper must still close exactly once at the end. + assert block.count("") == 1 + # The escaped form must be what was rendered. + assert """ in block or ">" in block diff --git a/examples/claudecode-skills-memanto/tests/test_skill_map.py b/examples/claudecode-skills-memanto/tests/test_skill_map.py new file mode 100644 index 00000000..2aead474 --- /dev/null +++ b/examples/claudecode-skills-memanto/tests/test_skill_map.py @@ -0,0 +1,49 @@ +"""Tests for skill-aware recall routing.""" + +from __future__ import annotations + +from memanto_skills import skill_map + + +def test_known_skill_routes() -> None: + route = skill_map.route_for("tdd") + assert "testing" in route.query + assert "testing" in route.tags + + +def test_skill_name_is_normalised() -> None: + assert skill_map.route_for("/TDD").tags == skill_map.route_for("tdd").tags + + +def test_unknown_skill_falls_back_to_default() -> None: + route = skill_map.route_for("some-custom-skill") + assert route is skill_map._DEFAULT_ROUTE + + +def test_none_falls_back_to_default() -> None: + assert skill_map.route_for(None) is skill_map._DEFAULT_ROUTE + + +def test_grill_with_docs_targets_architecture() -> None: + route = skill_map.route_for("grill-with-docs") + assert "architecture" in route.tags + assert "architectural" in route.query + + +def test_known_skills_listed() -> None: + skills = skill_map.known_skills() + assert "tdd" in skills + assert "diagnose" in skills + assert "handoff" in skills + + +class TestNormalizeSkill: + def test_strips_slash_lower_and_whitespace(self) -> None: + assert skill_map.normalize_skill("/TDD ") == "tdd" + assert skill_map.normalize_skill(" Grill-With-Docs") == "grill-with-docs" + + def test_empty_inputs_return_none(self) -> None: + assert skill_map.normalize_skill(None) is None + assert skill_map.normalize_skill("") is None + assert skill_map.normalize_skill(" ") is None + assert skill_map.normalize_skill("/") is None