plexara · cjimti · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/Makefile b/Makefile
@@ -255,12 +255,17 @@ tools-check: tools-install
 verify: tools-check fmt-check vet embed-clean test lint security coverage-gate coverage-report
 	@echo ""
 	@echo "=== verify: all checks passed ==="
+	@# Pre-commit gate sentinel: record the current diff hash so the
+	@# review-gate hook knows verify is green for this exact tree state.
+	@mkdir -p .claude
+	@{ git diff --cached HEAD 2>/dev/null; git diff 2>/dev/null; } \
+		| shasum -a 256 | cut -c1-16 > .claude/.last-verify-passed
 
 ## dev: One-command full local stack; postgres + keycloak in docker, binary in foreground.
 ##      Builds the SPA into the embed dir if dist/index.html is missing so the
 ##      portal renders on first run. Generates .env.dev with random secrets on
 ##      first run (gitignored); subsequent runs reuse those so sessions persist.
-dev: dev-up dev-wait dev-ui-if-needed dev-secrets
+dev: dev-secrets dev-up dev-wait dev-ui-if-needed
 	@. ./.env.dev && \
 	echo "" && \
 	echo "Starting mcp-test (config: configs/mcp-test.live.yaml)..." && \
@@ -273,7 +278,7 @@ dev: dev-up dev-wait dev-ui-if-needed dev-secrets
 
 ## dev-anon: Run anonymous-mode dev binary (no Keycloak, no auth); fastest iteration
 dev-anon: dev-secrets
-	docker compose -f docker-compose.dev.yml up -d postgres
+	@. ./.env.dev && docker compose -f docker-compose.dev.yml up -d postgres
 	@. ./.env.dev && $(GO) run $(CMD_DIR) --config configs/mcp-test.dev.yaml
 
 ## dev-secrets: Generate .env.dev with random cookie secret + dev API key on first run.
@@ -288,14 +293,20 @@ dev-secrets:
 		chmod 600 .env.dev; \
 	fi
 
-## dev-up: Start the dev stack (postgres + keycloak) without the binary
-dev-up:
-	docker compose -f docker-compose.dev.yml up -d postgres keycloak
-
-## dev-wait: Block until postgres and keycloak are reachable
-dev-wait:
+## dev-up: Start the dev stack (postgres + keycloak) without the binary.
+##         Depends on dev-secrets because docker compose interpolates the
+##         MCPTEST_COOKIE_SECRET reference at parse time even when the
+##         mcp-test service isn't being started.
+dev-up: dev-secrets
+	@. ./.env.dev && docker compose -f docker-compose.dev.yml up -d postgres keycloak
+
+## dev-wait: Block until postgres and keycloak are reachable.
+##           Sources .env.dev because `docker compose exec` re-parses the
+##           compose file and its interpolated MCPTEST_COOKIE_SECRET
+##           reference must resolve.
+dev-wait: dev-secrets
 	@echo "Waiting for Postgres..."
-	@until docker compose -f docker-compose.dev.yml exec -T postgres pg_isready -U mcp >/dev/null 2>&1; do sleep 1; done
+	@. ./.env.dev && until docker compose -f docker-compose.dev.yml exec -T postgres pg_isready -U mcp >/dev/null 2>&1; do sleep 1; done
 	@echo "Waiting for Keycloak realm..."
 	@until curl -fs http://localhost:8081/realms/mcp-test/.well-known/openid-configuration >/dev/null 2>&1; do sleep 2; done
 	@echo "Stack ready."
@@ -307,12 +318,12 @@ dev-ui-if-needed:
 	fi
 
 ## dev-down: Stop the dev stack
-dev-down:
-	docker compose -f docker-compose.dev.yml down
+dev-down: dev-secrets
+	@. ./.env.dev && docker compose -f docker-compose.dev.yml down
 
 ## dev-logs: Tail compose logs
-dev-logs:
-	docker compose -f docker-compose.dev.yml logs -f --tail=100
+dev-logs: dev-secrets
+	@. ./.env.dev && docker compose -f docker-compose.dev.yml logs -f --tail=100
 
 ## docker: Build the docker image (matches the goreleaser pipeline).
 ##         Builds the binary first, copies it where the goreleaser-style

diff --git a/docs/operations/inspection.md b/docs/operations/inspection.md
@@ -0,0 +1,123 @@
+---
+title: Inspection workflow
+description: End-to-end walkthrough of the audit inspection utility — capture a call, open the drawer, replay it, compare to a baseline, filter via JSONB paths, and export.
+---
+
+# Inspection workflow
+
+The audit pipeline records every tool call. The inspection utility is the operator-facing toolset for working with those records: a click-to-expand drawer, a per-event replay, side-by-side comparison, server-side JSONB-path filters, and an NDJSON export. This page is the workflow that ties them together.
+
+## What you need
+
+- A running mcp-test instance (`make dev` or a deployment).
+- An API key or portal session for the user account that's allowed to read the audit log.
+- (Optional, for replay) The MCP server registered in this deployment must still know about the tool you're replaying. Replays of removed tools are refused with `400`.
+
+## 1. Capture a call
+
+The pipeline captures every `tools/call` automatically when `audit.enabled: true` (default). Two tables are written in one transaction:
+
+- `audit_events` — indexed summary (timestamp, tool, user, success, duration). Used for browsing and filtering.
+- `audit_payloads` — full request / response envelope (parameters, headers, response result, response error, notifications, replay linkage). Optional; `capture_payloads: false` keeps the summary only.
+
+To produce a fresh row to inspect, fire any tool. The portal's Try-It page (`/portal/tools/<name>`) is the easiest way; any MCP client works too.
+
+## 2. Open the drawer
+
+In the portal, navigate to `Audit`. Each row in the events table is clickable; the click opens a side drawer with four tabs:
+
+### Overview tab
+Timing, identity, request id, session id, source (`mcp` for real client calls, `portal-tryit` for /admin/tryit invocations, `portal-replay` for replays), and the replay linkage (`Replayed from`) when present.
+
+### Request tab
+The captured `request_params` (sanitized via `audit.redact_keys`, with redacted values shown as `"[redacted]"`). Captured request headers when `audit.capture_headers: true` — credential-bearing names (`Authorization`, `Cookie`, `Set-Cookie`, `Proxy-Authorization`, `X-API-Key`) are stored as `"[redacted]"` regardless of the redact-keys config; the names remain visible so an operator can confirm "this request carried an Authorization header" without seeing the token. A truncation warning when the request body exceeded `audit.max_payload_bytes`.
+
+### Response tab
+The full `CallToolResult` content blocks (text, image, audio, structured) plus `response_error` when the call errored. The shape matches what the SDK serializes to the wire so you can see what the client saw. A truncation warning fires when the response body was too large.
+
+### Notifications tab
+Chronological list of every `notifications/*` (progress, log message) the tool dispatched during the call window. Each entry is `{ts, method, params}` with `params` rendered as JSON. A trim warning fires when the notification list exceeded `max_payload_bytes` (the trailing entries are missing; the prefix is what's stored).
+
+Drawer interactions:
+
+- The browser URL gets `?id=<event-id>` appended so the drawer is deep-linkable; share the URL and the recipient lands on the same row.
+- The **Compare** button stashes the open event id in `localStorage`. Open another row's drawer and you'll see "Compare with selected" — clicking opens the comparison page with both events.
+- The **Replay** button is the next step.
+- `Esc` and the backdrop close the drawer.
+
+## 3. Replay a captured call
+
+The drawer's **Replay** button calls `POST /api/v1/portal/audit/events/{id}/replay`. The server re-invokes the tool through an in-process MCP client with the same `request_params` the original call had. A new audit row lands tagged `source=portal-replay` with `replayed_from` pointing at the original event; the new event is fired with **your** identity, not the original caller's, so the audit row reflects who triggered the replay.
+
+The replay banner inside the drawer shows the new event id; clicking it deep-links to that row. Refused replays show a banner explaining why (most common: redacted parameter values, no captured payload, or a tool that's no longer registered).
+
+**Replay re-runs side effects.** If the original call wrote to a database, sent a notification, or charged a card, the replay does it again. There is no dry-run mode and no per-tool allow list. Treat replay like Try-It: a developer affordance for debugging, not a production self-service. The portal asks for confirmation before firing the request; the disabled-state of the Replay button telegraphs whether the row is replayable at all (it isn't, when the original payload wasn't captured or any param was redacted).
+
+Per-identity rate limit (scoped by API key id or OIDC subject): 5 burst, one token refilled every 12 seconds (sustained 5/min). `429` with `Retry-After` when exhausted. Tokens are only consumed after validation passes, so clicks on non-replayable rows return `400` without burning the operator's budget.
+
+## 4. Compare to a baseline
+
+Two events you stashed via the drawer's Compare button can be opened side-by-side at `/portal/audit/compare?a=<id>&b=<id>`. The page renders:
+
+- A summary block (tool, source, result, duration, user, auth type) with diffs highlighted.
+- Per-payload diff trees for `request_params`, `response_result`, `response_error`, plus a count comparison for notifications.
+- Each leaf in the tree is annotated: same (muted), differ (warning color, `before → after`), only-in-A (red `-`), only-in-B (green `+`).
+
+The diff is JSON-path-aware: it walks objects and arrays by key/index instead of doing a text diff, so reordered keys (a Postgres read returning fields in any order) don't show as changes, and a string-vs-object swap appears as one diff at the path it happened — not as a wall of red lines.
+
+Common compare workflows:
+
+- A successful call and a failed call with the "same" arguments. The summary highlights `Result`; the response trees show what differed in the tool's output.
+- Two captures of the same tool name spanning a deploy. Use the comparison to sanity-check that a refactor didn't change the response shape.
+- A replay against its original. The drawer has a quick path: open the replay row, the drawer's Overview tab shows `Replayed from: <id>`; navigate to that row, stash, then back to the replay row, stash, then Compare.
+
+## 5. Filter via JSONB paths
+
+The Audit page has a **JSONB filters** toggle that opens an editor for the path-aware filters the server compiles to JSONB containment queries. Operators routinely live with these set:
+
+- `param.user.id=alice` — every call where the request param at the dotted path `user.id` equals `alice`.
+- `response.isError=true` — every call whose response had `IsError=true` (matches the JSON literal `true`, not the string `"true"`; values are type-detected).
+- `header.User-Agent=curl/8.0` — every call from a specific User-Agent. Header names are canonicalized (`user-agent` matches `User-Agent`).
+- `has=response_error` — every call that recorded a transport-level error.
+- `has=notifications` — every call that fired any notification.
+
+Filters are AND-combined with each other and with the indexed-column filters (tool, user, success, etc.). They run against `audit_payloads` via `EXISTS` subqueries that hit the existing GIN indexes on `request_params` and `response_result`; `request_headers` is unindexed today so pair `header.*` with a time-range filter on busy deployments.
+
+**Quoting forces strings.** `?param.code=200` matches the JSON number `200`; `?param.code="200"` matches the JSON string `"200"`. Header values are always strings; type-detection does not apply there.
+
+## 6. Live tail
+
+The **Live tail** toggle on the Audit page opens an SSE connection to `/api/v1/portal/audit/stream`. New audit events appear in a fixed-cap most-recent-first list (cap 20) above the table as they're written; clicking one opens the drawer. The table itself stays a historical-filter view so the live tail doesn't blow away your filtered context.
+
+The stream sends an opening `: connected` comment on connect, an `event: audit\ndata: <json>` per write, and a `: keepalive` comment every 30 seconds. Slow consumers see per-subscriber drops; the producer never blocks.
+
+## 7. Export
+
+`GET /api/v1/portal/audit/export?format=jsonl` streams the filtered set as newline-delimited summary rows for offline analysis, ad-hoc ETL, or backups.
+
+```bash
+# Every error from the last 24h, piped through jq.
+curl -H "X-API-Key: $KEY" \
+  "$BASE/api/v1/portal/audit/export?success=false&from=$(date -u -v-24H +%FT%TZ)" \
+  | jq -r '.tool_name + "\t" + .error_message'
+```
+
+The same JSONB filters work; combine `?success=false&has=notifications&from=...` to scope a backfill.
+
+The export omits the captured payload from each line; if you need the full envelope, follow up with `/audit/events/{id}` per event. The endpoint is currently capped at 100,000 rows per request and truncates at the cap with no in-band marker; verify the row count against your filter window and tighten if you hit the ceiling. (Future versions may emit a sentinel line or trailer; do not rely on the current silent-truncation behavior.)
+
+## End-to-end example
+
+The shortest path from "a call broke" to a written-up bug report:
+
+1. **Find the failure.** Audit page, set Status=`error`, glance at the table.
+2. **Understand it.** Click the row. Overview shows the tool + duration; Response shows the `response_error.category` + message; Notifications shows what the tool got partway through before failing.
+3. **Reproduce it.** Click Replay. New row in the table tagged `portal-replay`. Open it; if it failed the same way, you have a deterministic repro.
+4. **Compare.** Stash the current failed event via the drawer's Compare button. Open a healthy past call of the same tool, stash that. Compare opens both side-by-side; the Response tree highlights what changed.
+5. **Hand it off.** Copy the event id (from the URL `?id=` or the drawer's id field) into the bug report. The recipient navigates `/portal/audit?id=<id>` and lands on the same drawer.
+
+## Reference
+
+- HTTP endpoints: `docs/reference/http-api.md`
+- Audit schema and retention: `docs/operations/audit.md`
+- v1.1.0 baseline + v1.1.1 schema follow-up: see the audit.md "Two-table layout" section.
diff --git a/docs/reference/http-api.md b/docs/reference/http-api.md
@@ -49,9 +49,10 @@ Behind the cookie or `X-API-Key` / `Authorization: Bearer`.
 | `GET` | `/api/v1/portal/instructions` | The `server.instructions` text the MCP server hands to clients at initialize time. |
 | `GET` | `/api/v1/portal/tools` | List of `{name, group, description, input_schema}` for every registered tool. |
 | `GET` | `/api/v1/portal/tools/{name}` | Same shape, single tool. |
+| `GET` | `/api/v1/portal/audit/meta` | Filter contract surface: `{has_keys, json_sources, replay: {burst, refill_secs, sustained_min}, export: {max_rows}}`. Lets a UI build its filter editor against the server's source of truth without duplicating allow-lists. |
 | `GET` | `/api/v1/portal/audit/events` | Paginated audit events. Query: `from`, `to` (RFC 3339), `tool`, `user`, `session`, `success`, `q`, `limit`, `offset`, plus the JSONB filters described below. |
 | `GET` | `/api/v1/portal/audit/events/{id}` | Single event by id (UUID); includes the captured payload row when present. 400 on a non-UUID id, 404 when the event isn't recorded. |
-| `POST` | `/api/v1/portal/audit/events/{id}/replay` | Re-invokes the captured tool call through an in-process MCP client. Writes a new audit event tagged `source=portal-replay` with `replayed_from` pointing at `{id}`. Per-identity rate limited (5 burst, 1 token / 12s); returns `429 Too Many Requests` with `Retry-After` when exhausted. Refuses (`400`) if the original event has no captured payload, has redacted parameter values, or names a tool no longer registered. CSRF-gated via `X-Requested-With`. |
+| `POST` | `/api/v1/portal/audit/events/{id}/replay` | Re-invokes the captured tool call through an in-process MCP client. Writes a new audit event tagged `source=portal-replay` with `replayed_from` pointing at `{id}`. Per-identity rate limited (5 burst, 1 token / 12s); returns `429 Too Many Requests` with `Retry-After` when exhausted. Tokens are consumed *after* validation passes, so a click on a non-replayable row (no payload, redacted params, missing tool) returns `400` without burning the operator's budget. CSRF-gated via `X-Requested-With`. |
 | `GET` | `/api/v1/portal/audit/export` | NDJSON stream of summary rows for a filter. `format=jsonl` (default) is the only supported format. Same filter surface as `/events`. Capped at 100,000 rows per request. |
 | `GET` | `/api/v1/portal/audit/stream` | SSE live tail of new audit events. One `event: audit\ndata: <event JSON>` per write; opening comment `: connected` confirms the connection; `: keepalive` every 30 seconds. Sets `X-Accel-Buffering: no` for nginx-fronted deployments. |
 | `GET` | `/api/v1/portal/audit/timeseries` | Bucketed counts. Query: `from`, `to`, `bucket` (Go duration). |

diff --git a/pkg/audit/jsonfilter.go b/pkg/audit/jsonfilter.go
@@ -134,12 +134,27 @@ func numericEq(a float64, b any) bool {
 	return false
 }
 
+// AllowedHasKeysList returns a fresh clone of AllowedHasKeys for callers
+// that need to surface the list (e.g. a portal /audit/meta endpoint).
+// Cloning ensures a downstream caller cannot mutate the package-level
+// var. The actual gate at parse time stays the closed-switch
+// IsAllowedHasKey; this helper, the exported var, and the switch must
+// stay synchronized — TestAllowList_FunctionAndSliceAgree enforces it.
+func AllowedHasKeysList() []string {
+	return append([]string(nil), AllowedHasKeys...)
+}
+
+// AllowedJSONSourcesList returns a fresh clone of AllowedJSONSources.
+// Same contract as AllowedHasKeysList.
+func AllowedJSONSourcesList() []string {
+	return append([]string(nil), AllowedJSONSources...)
+}
+
 // IsAllowedHasKey reports whether key is an allowlisted has= column.
 // Implemented as a closed switch (not a slice iteration) so the
-// AllowedHasKeys exported var cannot be mutated by an importing package
-// to widen what gets spliced into the verbatim SQL column reference in
-// buildSelect. The slice stays exported for documentation generators
-// and reflection callers; the gate is the function.
+// internal allowlist cannot be mutated by an importing package to widen
+// what gets spliced into the verbatim SQL column reference in
+// buildSelect.
 func IsAllowedHasKey(key string) bool {
 	switch key {
 	case "request_params",

diff --git a/pkg/audit/jsonfilter_test.go b/pkg/audit/jsonfilter_test.go
@@ -226,4 +226,42 @@ func TestAllowList_FunctionAndSliceAgree(t *testing.T) {
 			t.Errorf("IsAllowedJSONSource(%q) = true, want false", s)
 		}
 	}
+
+	// AllowedHasKeysList / AllowedJSONSourcesList must mirror the
+	// underlying slices exactly and must return a fresh clone each call
+	// (a downstream caller mutating the returned slice must not affect
+	// the next caller).
+	if got := AllowedHasKeysList(); len(got) != len(AllowedHasKeys) {
+		t.Fatalf("AllowedHasKeysList len = %d, want %d", len(got), len(AllowedHasKeys))
+	}
+	for i, k := range AllowedHasKeysList() {
+		if k != AllowedHasKeys[i] {
+			t.Errorf("AllowedHasKeysList[%d] = %q, want %q (drift vs AllowedHasKeys)", i, k, AllowedHasKeys[i])
+		}
+	}
+	first := AllowedHasKeysList()
+	first[0] = "MUTATED"
+	if AllowedHasKeysList()[0] == "MUTATED" {
+		t.Error("AllowedHasKeysList() returned a shared slice; mutation leaked across callers")
+	}
+	if AllowedHasKeys[0] == "MUTATED" {
+		t.Error("AllowedHasKeysList() returned the package var directly; mutation leaked into AllowedHasKeys")
+	}
+
+	if got := AllowedJSONSourcesList(); len(got) != len(AllowedJSONSources) {
+		t.Fatalf("AllowedJSONSourcesList len = %d, want %d", len(got), len(AllowedJSONSources))
+	}
+	for i, s := range AllowedJSONSourcesList() {
+		if s != AllowedJSONSources[i] {
+			t.Errorf("AllowedJSONSourcesList[%d] = %q, want %q", i, s, AllowedJSONSources[i])
+		}
+	}
+	firstSrc := AllowedJSONSourcesList()
+	firstSrc[0] = "MUTATED"
+	if AllowedJSONSourcesList()[0] == "MUTATED" {
+		t.Error("AllowedJSONSourcesList() returned a shared slice; mutation leaked across callers")
+	}
+	if AllowedJSONSources[0] == "MUTATED" {
+		t.Error("AllowedJSONSourcesList() returned the package var directly; mutation leaked into AllowedJSONSources")
+	}
 }