diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml
new file mode 100644
index 0000000..5df4396
--- /dev/null
+++ b/.github/codeql/codeql-config.yml
@@ -0,0 +1,27 @@
+# mcp-test CodeQL configuration.
+#
+# Referenced from .github/workflows/codeql.yml via config-file. Without
+# this, CodeQL uses the default suite plus no project-specific tuning.
+
+name: "mcp-test CodeQL"
+
+queries:
+  - uses: security-and-quality
+
+# Repository-wide query filters. Each entry must justify why a query is
+# excluded; "looks scary" is not a reason. The audit logger is the only
+# legitimate "Log function with potentially sensitive data" sink in this
+# project, and that's by design (forensics over discretion). Adding
+# any new Log-named function in this codebase MUST be reviewed against
+# this exception, since the rule no longer fires globally.
+query-filters:
+  - exclude:
+      id: go/clear-text-logging
+      # Justification: audit.Logger.Log captures full audit_events
+      # rows (sanitized via redact_keys) by design. CodeQL traces
+      # err.Error() -> Event.ErrorMessage -> *ev -> Log() and flags
+      # the whole chain. The error message is what an operator NEEDS
+      # to see during incident review; suppressing it would defeat
+      # the audit pipeline. gosec and semgrep still cover other
+      # cleartext-credential-in-log patterns at the function-call
+      # level (e.g. fmt.Println, log.Print*).
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 9c7b9c0..7fdda7c 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -42,8 +42,9 @@ jobs:
         with:
           languages: go
           # security-and-quality bundles the security pack with style /
-          # correctness rules. Findings post to the repo's Security tab.
-          queries: security-and-quality
+          # correctness rules. Project-specific query exclusions live
+          # in the config-file; findings post to the repo's Security tab.
+          config-file: ./.github/codeql/codeql-config.yml
 
       - name: Autobuild
         uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
diff --git a/Makefile b/Makefile
index 0b28ff3..4f5f8ae 100644
--- a/Makefile
+++ b/Makefile
@@ -147,6 +147,40 @@ govulncheck: tools-check
 ## security: gosec + govulncheck
 security: gosec govulncheck
 
+## codeql: Run the same CodeQL security-and-quality suite CI runs.
+##         Requires the codeql CLI on PATH (brew install codeql or
+##         download from https://github.com/github/codeql-cli-binaries).
+##         Heavy (~3 min on first run, ~1 min cached). Not part of
+##         `make verify` by default; run before opening a PR.
+##
+##         The config file at .github/codeql/codeql-config.yml is
+##         the single source of truth for query exclusions; this
+##         target uses the same file CI does so local results match.
+CODEQL_DB     ?= $(BUILD_DIR)/codeql-db
+CODEQL_RESULT ?= $(BUILD_DIR)/codeql-results.sarif
+codeql:
+	@command -v codeql >/dev/null 2>&1 || { \
+		echo "FAIL: codeql CLI not on PATH."; \
+		echo "  brew install codeql"; \
+		echo "  (or fetch from https://github.com/github/codeql-cli-binaries/releases)"; \
+		exit 1; \
+	}
+	@echo "Building CodeQL database (Go) at $(CODEQL_DB)..."
+	@rm -rf $(CODEQL_DB)
+	@mkdir -p $(BUILD_DIR)
+	codeql database create $(CODEQL_DB) --language=go --source-root=. --overwrite
+	@echo "Analyzing with security-and-quality + project config..."
+	codeql database analyze $(CODEQL_DB) \
+		codeql/go-queries:codeql-suites/go-security-and-quality.qls \
+		--format=sarif-latest \
+		--output=$(CODEQL_RESULT) \
+		--threads=0 \
+		--sarif-category=/language:go
+	@echo ""
+	@echo "Filtering against .github/codeql/codeql-config.yml exclusions..."
+	@./scripts/codeql-gate.sh $(CODEQL_RESULT) .github/codeql/codeql-config.yml
+	@echo "CodeQL: clean."
+
 COVERAGE_MIN ?= 80
 
 ## coverage: Run tests and produce a per-package coverage profile.
diff --git a/docs/operations/audit.md b/docs/operations/audit.md
index d58444d..85ea9ff 100644
--- a/docs/operations/audit.md
+++ b/docs/operations/audit.md
@@ -101,6 +101,48 @@ JSON string, `?param.code=200` matches the number.
 Allowed `has=` columns: `request_params`, `request_headers`,
 `response_result`, `response_error`, `notifications`, `replayed_from`.
 
+### Replay a captured call
+
+`POST /api/v1/portal/audit/events/{id}/replay` re-invokes the tool with the same arguments captured on the original event, through an in-process MCP client. The replay produces a new audit row tagged `source=portal-replay` with `replayed_from = {id}`; that row is fired with the portal-authenticated identity, not the original caller's, so an operator can see who triggered the replay.
+
+```bash
+# Find a tool error from the last hour that you want to reproduce.
+curl -H "X-API-Key: $KEY" \
+  "$BASE/api/v1/portal/audit/events?response.isError=true&from=$(date -u -v-1H +%FT%TZ)&limit=5" \
+  | jq -r '.events[].id'
+
+# Replay one. The response includes the new event's id so you can
+# follow up with /events/{id}.
+curl -X POST -H "X-API-Key: $KEY" -H "X-Requested-With: x" \
+  "$BASE/api/v1/portal/audit/events/<id>/replay" | jq
+```
+
+The replay refuses (`400`) when:
+
+- the original event has no captured payload (capture was disabled when it was written),
+- any captured parameter value is the literal `[redacted]` (replaying with a placeholder would mislead about what the call did; re-stage manually via Try-It with the real value),
+- the named tool is no longer registered.
+
+A per-identity token bucket (5 burst, ~5/min sustained) protects against runaway replay loops; exhausted callers get `429 Too Many Requests` with a `Retry-After` header.
+
+Replay re-runs the tool's side effects. If the original call wrote to a database, sent a notification, or charged a card, the replay does it again. There is no dry-run mode and no per-tool allow list; if the operator can hit `/replay`, every registered tool is replayable. Treat this like Try-It: a developer affordance for debugging, not a production self-service.
+
+### Live tail
+
+`GET /api/v1/portal/audit/stream` is an SSE endpoint that emits one `event: audit\ndata: <event JSON>` per newly-written audit event. Open the connection, fire calls, watch them flow:
+
+```bash
+# In one terminal:
+curl -N -H "X-API-Key: $KEY" "$BASE/api/v1/portal/audit/stream"
+
+# In another, fire some tool calls; the first terminal sees them
+# arrive within ~200ms of each write.
+```
+
+The endpoint emits an opening `: connected` comment so the consumer can detect the connection is live before the first audit row arrives, and a `: keepalive` comment every 30 seconds to keep idle proxies from killing the connection. Subscribers see only events written AFTER they subscribe; for history use `/events` or `/export`.
+
+Slow consumers drop events silently per-subscriber (the producer never blocks). The buffered channel default is 64 events; SSE clients should drain promptly to avoid drops during bursts.
+
 ### NDJSON export
 
 `/api/v1/portal/audit/export?format=jsonl` streams summary rows as
diff --git a/docs/reference/http-api.md b/docs/reference/http-api.md
index afedc57..a0caa71 100644
--- a/docs/reference/http-api.md
+++ b/docs/reference/http-api.md
@@ -51,7 +51,9 @@ Behind the cookie or `X-API-Key` / `Authorization: Bearer`.
 | `GET` | `/api/v1/portal/tools/{name}` | Same shape, single tool. |
 | `GET` | `/api/v1/portal/audit/events` | Paginated audit events. Query: `from`, `to` (RFC 3339), `tool`, `user`, `session`, `success`, `q`, `limit`, `offset`, plus the JSONB filters described below. |
 | `GET` | `/api/v1/portal/audit/events/{id}` | Single event by id (UUID); includes the captured payload row when present. 400 on a non-UUID id, 404 when the event isn't recorded. |
+| `POST` | `/api/v1/portal/audit/events/{id}/replay` | Re-invokes the captured tool call through an in-process MCP client. Writes a new audit event tagged `source=portal-replay` with `replayed_from` pointing at `{id}`. Per-identity rate limited (5 burst, 1 token / 12s); returns `429 Too Many Requests` with `Retry-After` when exhausted. Refuses (`400`) if the original event has no captured payload, has redacted parameter values, or names a tool no longer registered. CSRF-gated via `X-Requested-With`. |
 | `GET` | `/api/v1/portal/audit/export` | NDJSON stream of summary rows for a filter. `format=jsonl` (default) is the only supported format. Same filter surface as `/events`. Capped at 100,000 rows per request. |
+| `GET` | `/api/v1/portal/audit/stream` | SSE live tail of new audit events. One `event: audit\ndata: <event JSON>` per write; opening comment `: connected` confirms the connection; `: keepalive` every 30 seconds. Sets `X-Accel-Buffering: no` for nginx-fronted deployments. |
 | `GET` | `/api/v1/portal/audit/timeseries` | Bucketed counts. Query: `from`, `to`, `bucket` (Go duration). |
 | `GET` | `/api/v1/portal/audit/breakdown` | Group-by aggregations. Query: `by` (`tool`/`user`/`success`/`auth_type`). |
 | `GET` | `/api/v1/portal/dashboard` | 1-hour stats + recent activity. |
diff --git a/internal/server/server.go b/internal/server/server.go
index 216a2f3..b56cd0a 100644
--- a/internal/server/server.go
+++ b/internal/server/server.go
@@ -122,7 +122,7 @@ func Build(ctx context.Context, cfg *config.Config, logger *slog.Logger) (*Appli
 			app.browser = ba
 		}
 		// Rebuild the mux with portal handlers attached.
-		portalAPI := httpsrv.NewPortalAPI(cfg, app.registry, auditLog)
+		portalAPI := httpsrv.NewPortalAPI(cfg, app.registry, auditLog, app.mcpServer, cfg.Audit.RedactKeys)
 		adminAPI := httpsrv.NewAdminAPI(dbStore, app.mcpServer, auditLog, app.registry, cfg.Audit.RedactKeys)
 		portalAuth := httpsrv.NewPortalAuth(sessions, chain)
 		app.mux = buildMuxWithPortal(cfg, app.mcpServer, app.readiness, app.browser, portalAPI, adminAPI, portalAuth)
@@ -143,7 +143,7 @@ func BuildWithDeps(cfg *config.Config, logger *slog.Logger, chain *auth.Chain, a
 		if cfg.Portal.CookieSecret != "" {
 			sessions, _ = httpsrv.NewSessionStore(cfg.Portal.CookieName, cfg.Portal.CookieSecret, false, time.Hour)
 		}
-		portalAPI := httpsrv.NewPortalAPI(cfg, app.registry, auditLog)
+		portalAPI := httpsrv.NewPortalAPI(cfg, app.registry, auditLog, app.mcpServer, cfg.Audit.RedactKeys)
 		adminAPI := httpsrv.NewAdminAPI(nil, app.mcpServer, auditLog, app.registry, cfg.Audit.RedactKeys)
 		portalAuth := httpsrv.NewPortalAuth(sessions, chain)
 		app.sessions = sessions
diff --git a/pkg/audit/async.go b/pkg/audit/async.go
index e2e15d6..f6cdb50 100644
--- a/pkg/audit/async.go
+++ b/pkg/audit/async.go
@@ -26,6 +26,50 @@ type AsyncLogger struct {
 
 	mu      sync.Mutex
 	dropped uint64
+
+	// Live-tail subscribers. Mutex-protected for the registry
+	// itself; sends to the channels are non-blocking so a slow
+	// consumer can't stall the drain goroutine. Drop counts per
+	// subscriber are intentionally NOT tracked individually; the
+	// global Dropped() count covers the buffered-channel-input
+	// drop, and sse-tail consumers are expected to handle gaps.
+	subsMu sync.Mutex
+	subs   []*subscriber
+}
+
+// subscriber holds a per-consumer channel + a closed flag, both
+// protected by mu so a concurrent broadcast and cancel cannot race
+// on s.ch (send on closed channel panic / data race detector).
+type subscriber struct {
+	mu     sync.Mutex
+	ch     chan Event
+	closed bool
+}
+
+// send attempts a non-blocking send. Caller must NOT hold s.mu.
+// Returns silently when the buffer is full (drop) or the subscriber
+// has been cancelled (drop).
+func (s *subscriber) send(ev Event) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.closed {
+		return
+	}
+	select {
+	case s.ch <- ev:
+	default:
+	}
+}
+
+// closeOnce closes the channel exactly once. Idempotent.
+func (s *subscriber) closeOnce() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.closed {
+		return
+	}
+	s.closed = true
+	close(s.ch)
 }
 
 // NewAsyncLogger returns a buffered async wrapper around inner. bufferSize
@@ -181,6 +225,52 @@ func (a *AsyncLogger) write(ev Event) {
 	defer cancel()
 	if err := a.inner.Log(ctx, ev); err != nil {
 		a.logger.Warn("audit write failed", "tool", ev.ToolName, "err", err)
+		return
+	}
+	// Broadcast successful writes to live-tail subscribers. Done
+	// after inner.Log() so subscribers only see persisted events;
+	// a write that errored out doesn't surface to the live tail.
+	a.broadcast(ev)
+}
+
+// Subscribe registers a live-tail consumer and returns the channel
+// plus a cancel func. See SubscribingLogger doc for semantics.
+//
+// buf <= 0 falls back to a sane default (64). Slow consumers cause
+// per-subscriber event drops, not producer-side blocking.
+func (a *AsyncLogger) Subscribe(buf int) (<-chan Event, func()) {
+	if buf <= 0 {
+		buf = 64
+	}
+	s := &subscriber{ch: make(chan Event, buf)}
+	a.subsMu.Lock()
+	a.subs = append(a.subs, s)
+	a.subsMu.Unlock()
+
+	cancel := func() {
+		a.subsMu.Lock()
+		for i, x := range a.subs {
+			if x == s {
+				a.subs = append(a.subs[:i], a.subs[i+1:]...)
+				break
+			}
+		}
+		a.subsMu.Unlock()
+		s.closeOnce()
+	}
+	return s.ch, cancel
+}
+
+// broadcast sends ev to every active subscriber, non-blocking. A
+// subscriber whose buffer is full silently drops this event. Each
+// subscriber's send is gated by its own mutex so a concurrent cancel
+// can't close the channel mid-send.
+func (a *AsyncLogger) broadcast(ev Event) {
+	a.subsMu.Lock()
+	subs := append([]*subscriber{}, a.subs...)
+	a.subsMu.Unlock()
+	for _, s := range subs {
+		s.send(ev)
 	}
 }
 
diff --git a/pkg/audit/logger.go b/pkg/audit/logger.go
index a51df13..c3895b9 100644
--- a/pkg/audit/logger.go
+++ b/pkg/audit/logger.go
@@ -55,6 +55,27 @@ type StreamingLogger interface {
 // and the underlying backend delivers fewer.
 const MaxQueryLimit = 1000
 
+// SubscribingLogger is the optional capability for fan-out of newly
+// written audit events to live consumers (the SSE live-tail endpoint
+// is the primary use). Stores or wrappers that broadcast events on
+// Log() implement it; the consumer type-asserts before subscribing.
+//
+// Semantics:
+//   - Subscribe returns a receive-only channel of events plus a
+//     cancel func. The caller MUST call cancel() on disconnect to
+//     release the slot; otherwise the registry leaks.
+//   - The channel is buffered with `buf` slots. When a producer
+//     writes faster than the consumer drains, events are dropped
+//     for that subscriber (the producer never blocks on a slow
+//     consumer). Picking buf is a tradeoff between memory and the
+//     drop rate; 64 is a reasonable starting point for SSE.
+//   - Subscribers see events that succeeded at the underlying
+//     backend (in AsyncLogger, the broadcast happens after
+//     inner.Log() returns nil). Failed writes are not surfaced.
+type SubscribingLogger interface {
+	Subscribe(buf int) (<-chan Event, func())
+}
+
 // TimePoint is one bucket of an audit time series.
 type TimePoint struct {
 	Time     time.Time `json:"time"`
diff --git a/pkg/audit/memory.go b/pkg/audit/memory.go
index 0c68274..af573f7 100644
--- a/pkg/audit/memory.go
+++ b/pkg/audit/memory.go
@@ -5,6 +5,8 @@ import (
 	"sort"
 	"sync"
 	"time"
+
+	"github.com/google/uuid"
 )
 
 // breakdownKeyFn picks the per-event key used by Breakdown.
@@ -16,19 +18,99 @@ import (
 type MemoryLogger struct {
 	mu     sync.Mutex
 	events []Event
+
+	subsMu sync.Mutex
+	subs   []*memSubscriber
+}
+
+// memSubscriber mirrors AsyncLogger's subscriber: per-subscriber mutex
+// gates send and close so a cancel doesn't race with an in-flight
+// broadcast.
+type memSubscriber struct {
+	mu     sync.Mutex
+	ch     chan Event
+	closed bool
+}
+
+func (s *memSubscriber) send(ev Event) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.closed {
+		return
+	}
+	select {
+	case s.ch <- ev:
+	default:
+	}
+}
+
+func (s *memSubscriber) closeOnce() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.closed {
+		return
+	}
+	s.closed = true
+	close(s.ch)
 }
 
 // NewMemoryLogger returns an empty logger.
 func NewMemoryLogger() *MemoryLogger { return &MemoryLogger{} }
 
-// Log appends the event.
+// Log appends the event and broadcasts to live-tail subscribers.
+// Auto-assigns ev.ID when empty, matching the Postgres store's
+// behavior so test fixtures see a stable id without setting one
+// explicitly.
 func (m *MemoryLogger) Log(_ context.Context, ev Event) error {
+	if ev.ID == "" {
+		ev.ID = uuid.NewString()
+	}
 	m.mu.Lock()
-	defer m.mu.Unlock()
 	m.events = append(m.events, ev)
+	m.mu.Unlock()
+	m.broadcast(ev)
 	return nil
 }
 
+// Subscribe registers a live-tail consumer. See SubscribingLogger doc.
+//
+// Used by /audit/stream tests that bypass AsyncLogger and use
+// MemoryLogger directly. Same buffered-channel + non-blocking-send
+// semantics as AsyncLogger.Subscribe; same per-subscriber mutex
+// pattern to keep cancel from racing with broadcast.
+func (m *MemoryLogger) Subscribe(buf int) (<-chan Event, func()) {
+	if buf <= 0 {
+		buf = 64
+	}
+	s := &memSubscriber{ch: make(chan Event, buf)}
+	m.subsMu.Lock()
+	m.subs = append(m.subs, s)
+	m.subsMu.Unlock()
+
+	cancel := func() {
+		m.subsMu.Lock()
+		for i, x := range m.subs {
+			if x == s {
+				m.subs = append(m.subs[:i], m.subs[i+1:]...)
+				break
+			}
+		}
+		m.subsMu.Unlock()
+		s.closeOnce()
+	}
+	return s.ch, cancel
+}
+
+// broadcast sends ev to every active subscriber, non-blocking.
+func (m *MemoryLogger) broadcast(ev Event) {
+	m.subsMu.Lock()
+	subs := append([]*memSubscriber{}, m.subs...)
+	m.subsMu.Unlock()
+	for _, s := range subs {
+		s.send(ev)
+	}
+}
+
 // Query returns matching events ordered by timestamp DESC. Only ToolName,
 // UserID, From, To, Success, and Limit are honored; other filter fields are
 // ignored. Sufficient for tests; the Postgres store covers the full filter
@@ -122,6 +204,21 @@ func (m *MemoryLogger) Stream(ctx context.Context, f QueryFilter, fn func(Event)
 	return nil
 }
 
+// GetPayload returns the in-memory event's Payload pointer, matching
+// the PayloadLogger contract used by the portal detail and replay
+// endpoints. Returns (nil, nil) when no event with the given id is
+// stored, or when the event was logged without a Payload.
+func (m *MemoryLogger) GetPayload(_ context.Context, eventID string) (*Payload, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	for _, ev := range m.events {
+		if ev.ID == eventID {
+			return ev.Payload, nil
+		}
+	}
+	return nil, nil
+}
+
 // Snapshot returns a copy of all events in insertion order, for assertions.
 func (m *MemoryLogger) Snapshot() []Event {
 	m.mu.Lock()
diff --git a/pkg/audit/subscribe_test.go b/pkg/audit/subscribe_test.go
new file mode 100644
index 0000000..b4af499
--- /dev/null
+++ b/pkg/audit/subscribe_test.go
@@ -0,0 +1,164 @@
+package audit
+
+import (
+	"context"
+	"io"
+	"log/slog"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestAsyncLogger_Subscribe_DeliversAfterSuccessfulInnerLog(t *testing.T) {
+	inner := &fakeLogger{}
+	a := NewAsyncLogger(inner, 16, time.Second,
+		slog.New(slog.NewTextHandler(io.Discard, nil)))
+	defer a.Close()
+
+	ch, cancel := a.Subscribe(8)
+	defer cancel()
+
+	_ = a.Log(context.Background(), Event{ToolName: "echo"})
+
+	select {
+	case ev := <-ch:
+		if ev.ToolName != "echo" {
+			t.Errorf("ToolName = %q, want echo", ev.ToolName)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("subscriber did not receive event within 2s")
+	}
+}
+
+func TestAsyncLogger_Subscribe_FanOutToMultiple(t *testing.T) {
+	inner := &fakeLogger{}
+	a := NewAsyncLogger(inner, 16, time.Second,
+		slog.New(slog.NewTextHandler(io.Discard, nil)))
+	defer a.Close()
+
+	const n = 3
+	chs := make([]<-chan Event, n)
+	cancels := make([]func(), n)
+	for i := 0; i < n; i++ {
+		chs[i], cancels[i] = a.Subscribe(8)
+	}
+	defer func() {
+		for _, c := range cancels {
+			c()
+		}
+	}()
+
+	_ = a.Log(context.Background(), Event{ToolName: "fan"})
+
+	var wg sync.WaitGroup
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		go func() {
+			defer wg.Done()
+			select {
+			case ev := <-chs[i]:
+				if ev.ToolName != "fan" {
+					t.Errorf("subscriber %d: ToolName = %q", i, ev.ToolName)
+				}
+			case <-time.After(2 * time.Second):
+				t.Errorf("subscriber %d: timed out", i)
+			}
+		}()
+	}
+	wg.Wait()
+}
+
+func TestAsyncLogger_Subscribe_CancelStopsDelivery(t *testing.T) {
+	inner := &fakeLogger{}
+	a := NewAsyncLogger(inner, 16, time.Second,
+		slog.New(slog.NewTextHandler(io.Discard, nil)))
+	defer a.Close()
+
+	ch, cancel := a.Subscribe(8)
+
+	// Receive the first event.
+	_ = a.Log(context.Background(), Event{ToolName: "first"})
+	select {
+	case <-ch:
+	case <-time.After(2 * time.Second):
+		t.Fatal("first event not received")
+	}
+
+	// Cancel and verify the channel is closed.
+	cancel()
+	select {
+	case _, ok := <-ch:
+		if ok {
+			t.Error("channel should be closed after cancel")
+		}
+	case <-time.After(time.Second):
+		t.Error("channel was not closed after cancel")
+	}
+
+	// Further Log calls should not panic / not deliver to the cancelled subscriber.
+	_ = a.Log(context.Background(), Event{ToolName: "after-cancel"})
+	time.Sleep(50 * time.Millisecond) // let the drain run
+	// Cancel again is a no-op (sync.Once).
+	cancel()
+}
+
+func TestAsyncLogger_Subscribe_SlowConsumerDropsEvents(t *testing.T) {
+	// A buffer of 2 with 100 events posted: producer must not block,
+	// subscriber sees at most 2 events before the rest are dropped.
+	inner := &fakeLogger{}
+	a := NewAsyncLogger(inner, 1024, time.Second,
+		slog.New(slog.NewTextHandler(io.Discard, nil)))
+	defer a.Close()
+
+	ch, cancel := a.Subscribe(2)
+	defer cancel()
+
+	for i := 0; i < 100; i++ {
+		_ = a.Log(context.Background(), Event{ToolName: "spam"})
+	}
+	// Give the drain a moment to flush.
+	time.Sleep(100 * time.Millisecond)
+
+	received := 0
+drain:
+	for {
+		select {
+		case <-ch:
+			received++
+		default:
+			break drain
+		}
+	}
+	if received < 1 || received > 2 {
+		t.Errorf("received %d events, want 1 or 2 (buffer=2)", received)
+	}
+}
+
+func TestAsyncLogger_Subscribe_FailedInnerLogDoesNotBroadcast(t *testing.T) {
+	// Subscribers see only events that succeeded at the underlying
+	// backend: a failed write must not appear on the live tail.
+	inner := &fakeLogger{err: errLogFailed}
+	a := NewAsyncLogger(inner, 16, time.Second,
+		slog.New(slog.NewTextHandler(io.Discard, nil)))
+	defer a.Close()
+
+	ch, cancel := a.Subscribe(8)
+	defer cancel()
+
+	_ = a.Log(context.Background(), Event{ToolName: "should-not-appear"})
+
+	select {
+	case ev := <-ch:
+		t.Errorf("subscriber received event from failed write: %+v", ev)
+	case <-time.After(200 * time.Millisecond):
+		// expected: no delivery
+	}
+}
+
+// errLogFailed is a sentinel for the failed-inner test above. Assigned
+// here to avoid needing a stdlib import in the test loop.
+var errLogFailed = &mockErr{"inner log failed"}
+
+type mockErr struct{ msg string }
+
+func (e *mockErr) Error() string { return e.msg }
diff --git a/pkg/httpsrv/portal_api.go b/pkg/httpsrv/portal_api.go
index 0ec1390..49807c3 100644
--- a/pkg/httpsrv/portal_api.go
+++ b/pkg/httpsrv/portal_api.go
@@ -1,17 +1,21 @@
 package httpsrv
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
+	"io"
 	"log/slog"
 	"net/http"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/google/uuid"
+	"github.com/modelcontextprotocol/go-sdk/mcp"
 
 	"github.com/plexara/mcp-test/pkg/audit"
 	"github.com/plexara/mcp-test/pkg/auth"
@@ -20,20 +24,52 @@ import (
 	"github.com/plexara/mcp-test/pkg/tools"
 )
 
-// PortalAPI bundles the read-only handlers under /api/v1/portal/*.
+// PortalAPI bundles the portal handlers under /api/v1/portal/*.
+//
+// Most are read-only (events, dashboard, etc.); replay and the live
+// stream are mutating / long-lived. The mcpServer + redactKeys fields
+// are needed by replay to invoke a tool through an in-process MCP
+// client and sanitize the captured args; both are nil-safe (a nil
+// mcpServer makes /replay return 503).
 type PortalAPI struct {
-	cfg      *config.Config
-	registry *tools.Registry
-	audit    audit.Logger
+	cfg        *config.Config
+	registry   *tools.Registry
+	audit      audit.Logger
+	mcpServer  *mcp.Server
+	redactKeys []string
+
+	// replayLimiter rate-limits the per-identity replay calls to
+	// keep a misconfigured UI or runaway script from re-firing the
+	// same captured tool unboundedly. Created lazily on first use.
+	replayLimiterOnce sync.Once
+	replayLimiter     *identityRateLimiter
 }
 
-// NewPortalAPI returns the API.
-func NewPortalAPI(cfg *config.Config, registry *tools.Registry, auditLog audit.Logger) *PortalAPI {
-	return &PortalAPI{cfg: cfg, registry: registry, audit: auditLog}
+// NewPortalAPI returns the API. mcpServer / redactKeys are optional;
+// /replay returns 503 when mcpServer is nil (test paths or audit-only
+// deployments without a registered MCP server).
+func NewPortalAPI(
+	cfg *config.Config,
+	registry *tools.Registry,
+	auditLog audit.Logger,
+	mcpServer *mcp.Server,
+	redactKeys []string,
+) *PortalAPI {
+	return &PortalAPI{
+		cfg:        cfg,
+		registry:   registry,
+		audit:      auditLog,
+		mcpServer:  mcpServer,
+		redactKeys: redactKeys,
+	}
 }
 
-// Mount adds every endpoint behind the supplied auth middleware.
+// Mount adds every endpoint behind the supplied auth middleware. The
+// state-changing replay endpoint additionally requires the X-Requested-With
+// header (CSRF defense; the SPA sets it on every request, a forged
+// <form> POST cannot).
 func (p *PortalAPI) Mount(mux *http.ServeMux, mw func(http.Handler) http.Handler) {
+	wrap := func(h http.Handler) http.Handler { return mw(requireCSRFHeader(h)) }
 	mux.Handle("GET /api/v1/portal/me", mw(http.HandlerFunc(p.me)))
 	mux.Handle("GET /api/v1/portal/server", mw(http.HandlerFunc(p.server)))
 	mux.Handle("GET /api/v1/portal/instructions", mw(http.HandlerFunc(p.instructions)))
@@ -42,12 +78,519 @@ func (p *PortalAPI) Mount(mux *http.ServeMux, mw func(http.Handler) http.Handler
 	mux.Handle("GET /api/v1/portal/audit/events", mw(http.HandlerFunc(p.auditEvents)))
 	mux.Handle("GET /api/v1/portal/audit/events/{id}", mw(http.HandlerFunc(p.auditEventDetail)))
 	mux.Handle("GET /api/v1/portal/audit/export", mw(http.HandlerFunc(p.auditExport)))
+	mux.Handle("POST /api/v1/portal/audit/events/{id}/replay", wrap(http.HandlerFunc(p.auditReplay)))
+	mux.Handle("GET /api/v1/portal/audit/stream", mw(http.HandlerFunc(p.auditStream)))
 	mux.Handle("GET /api/v1/portal/audit/timeseries", mw(http.HandlerFunc(p.auditTimeseries)))
 	mux.Handle("GET /api/v1/portal/audit/breakdown", mw(http.HandlerFunc(p.auditBreakdown)))
 	mux.Handle("GET /api/v1/portal/dashboard", mw(http.HandlerFunc(p.dashboard)))
 	mux.Handle("GET /api/v1/portal/wellknown", mw(http.HandlerFunc(p.wellknown)))
 }
 
+// replayBurst, replayRefill: 5 burst, one token every 12s == 5 per
+// minute sustained per identity. Tunable later if operators ask; not
+// currently config-exposed because the rate is coupled to the
+// in-process MCP client cost, not user-visible behavior.
+const (
+	replayBurst  = 5
+	replayRefill = 12 * time.Second
+)
+
+func (p *PortalAPI) limiterForReplay() *identityRateLimiter {
+	p.replayLimiterOnce.Do(func() {
+		p.replayLimiter = newIdentityRateLimiter(replayBurst, replayRefill, nil)
+	})
+	return p.replayLimiter
+}
+
+// auditReplay re-invokes a captured tool call through the in-process
+// MCP client and writes a new audit row tagged source=portal-replay
+// with replayed_from pointing at the original event id. The replay
+// runs as the portal-authenticated identity (NOT the original
+// caller's), so the new audit row reflects who fired the replay.
+//
+// Refused (4xx, no tool call made):
+//   - {id} is not a UUID
+//   - the original event is not found (404)
+//   - the original event has no captured payload (replay needs the
+//     captured request_params; without them we'd be replaying with
+//     []any{} which would just exercise tool defaults)
+//   - the original event's params contain "[redacted]" values (a
+//     replay would call the tool with the literal "[redacted]" string
+//     which is unlikely to match the original semantics; refuse and
+//     ask the operator to re-stage manually)
+//   - the named tool is no longer registered
+//   - the per-identity rate limit is exhausted (429 with Retry-After)
+//
+// The replay does NOT skip a deliberately disabled tool group (config
+// gate); the assumption is that if the operator can hit the replay
+// endpoint at all, they have authority to invoke any registered tool.
+func (p *PortalAPI) auditReplay(w http.ResponseWriter, r *http.Request) {
+	if p.mcpServer == nil {
+		writeError(w, http.StatusServiceUnavailable, errors.New("mcp server not available"))
+		return
+	}
+	rawID := r.PathValue("id")
+	parsed, err := uuid.Parse(rawID)
+	if err != nil {
+		writeError(w, http.StatusBadRequest, errors.New("event id is not a valid uuid"))
+		return
+	}
+	eventID := parsed.String()
+
+	// PortalAuth is required to mount this handler, so a nil or
+	// empty identity here means a misconfigured route mount — fail
+	// closed rather than fail-open via the rate limiter's empty-key
+	// path. Also rejects a non-nil but unpopulated &Identity{} that
+	// a buggy authenticator might return.
+	id := auth.GetIdentity(r.Context())
+	idKey := identityKey(id)
+	if id == nil || idKey == "" {
+		writeError(w, http.StatusUnauthorized, errors.New("authenticated identity required"))
+		return
+	}
+	if !p.limiterForReplay().Allow(idKey) {
+		retry := p.limiterForReplay().RetryAfter(idKey)
+		w.Header().Set("Retry-After", strconv.Itoa(int(retry.Round(time.Second).Seconds())))
+		writeError(w, http.StatusTooManyRequests,
+			fmt.Errorf("replay rate limit exceeded; retry in %s", retry.Round(time.Second)))
+		return
+	}
+
+	// Fetch original event + payload.
+	events, err := p.audit.Query(r.Context(), audit.QueryFilter{EventID: eventID, Limit: 1})
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err)
+		return
+	}
+	if len(events) == 0 {
+		writeError(w, http.StatusNotFound, fmt.Errorf("event not found"))
+		return
+	}
+	original := events[0]
+
+	pl, ok := p.audit.(audit.PayloadLogger)
+	if !ok {
+		writeError(w, http.StatusServiceUnavailable,
+			errors.New("replay requires a payload-capable audit backend"))
+		return
+	}
+	payload, err := pl.GetPayload(r.Context(), eventID)
+	if err != nil {
+		slog.Warn("audit: replay payload fetch failed", "event_id", eventID, "err", err) // #nosec G706 -- eventID is uuid.UUID.String(); cannot carry log-injection bytes.
+		writeError(w, http.StatusInternalServerError, errors.New("failed to fetch original payload"))
+		return
+	}
+	if payload == nil || payload.RequestParams == nil {
+		writeError(w, http.StatusBadRequest,
+			errors.New("original event has no captured request params; cannot replay"))
+		return
+	}
+	if hasRedactedParam(payload.RequestParams) {
+		writeError(w, http.StatusBadRequest,
+			errors.New("original event has redacted parameter values; replay would not exercise the same call. Re-stage manually via Try-It"))
+		return
+	}
+	if p.registry != nil {
+		found := false
+		for _, m := range p.registry.All() {
+			if m.Name == original.ToolName {
+				found = true
+				break
+			}
+		}
+		if !found {
+			writeError(w, http.StatusBadRequest,
+				fmt.Errorf("tool %q is no longer registered", original.ToolName))
+			return
+		}
+	}
+
+	// Deep-copy the captured params before passing to CallTool so
+	// the SDK / tool handlers can't mutate the original-event audit
+	// row's RequestParams via the shared map pointer (the
+	// SanitizeParameters fast path returns the input map AS-IS when
+	// redactKeys is empty).
+	args := deepCopyMap(payload.RequestParams)
+
+	// Connect an in-process client through in-memory transport, with
+	// the portal identity stamped on ctx so the audit middleware
+	// sees it (the in-memory transport carries no HTTP headers, so
+	// the middleware would otherwise treat the call as anonymous).
+	ctx := auth.WithIdentity(r.Context(), id)
+	clientT, serverT := mcp.NewInMemoryTransports()
+	serverSession, err := p.mcpServer.Connect(ctx, serverT, nil)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err)
+		return
+	}
+	defer func() { _ = serverSession.Close() }()
+	client := mcp.NewClient(&mcp.Implementation{Name: "portal-replay"}, nil)
+	clientSession, err := client.Connect(ctx, clientT, nil)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err)
+		return
+	}
+	defer func() { _ = clientSession.Close() }()
+
+	start := time.Now()
+	res, callErr := clientSession.CallTool(ctx, &mcp.CallToolParams{
+		Name:      original.ToolName,
+		Arguments: args,
+	})
+	elapsed := time.Since(start).Milliseconds()
+
+	newID := p.recordReplayAudit(r, original, args, id, res, callErr, elapsed)
+
+	// Body: the replay response includes both the new audit row's id
+	// (so the UI can link to it) and the call result. We surface a
+	// top-level success boolean so callers don't have to introspect
+	// the SDK-shaped result to detect tool-side IsError. HTTP 502 on
+	// transport-level callErr OR tool-side IsError, mirroring
+	// /admin/tryit semantics.
+	success := callErr == nil && (res == nil || !res.IsError)
+	out := map[string]any{
+		"replay_event_id": newID,
+		"replayed_from":   eventID,
+		"result":          res,
+		"success":         success,
+	}
+	if callErr != nil {
+		out["error"] = callErr.Error()
+	}
+	if !success {
+		writeJSON(w, http.StatusBadGateway, out)
+		return
+	}
+	writeJSON(w, http.StatusOK, out)
+}
+
+// deepCopyMap returns a structural deep copy of m. JSON-friendly
+// types only (map[string]any, []any, scalars). Used by the replay
+// path to prevent the in-process MCP client from mutating the audit
+// row's stored RequestParams via shared pointers (the audit logger
+// keeps the same map[string]any reference the caller supplied).
+func deepCopyMap(m map[string]any) map[string]any {
+	if m == nil {
+		return nil
+	}
+	out := make(map[string]any, len(m))
+	for k, v := range m {
+		out[k] = deepCopyAny(v)
+	}
+	return out
+}
+
+func deepCopyAny(v any) any {
+	switch t := v.(type) {
+	case map[string]any:
+		return deepCopyMap(t)
+	case []any:
+		out := make([]any, len(t))
+		for i, x := range t {
+			out[i] = deepCopyAny(x)
+		}
+		return out
+	default:
+		return v // scalars and unknown types pass through (immutable for our purposes)
+	}
+}
+
+// sseHeartbeat is sent every sseHeartbeatInterval to keep idle
+// connections from being closed by intermediate proxies. SSE
+// comments (lines starting with `:`) are silently skipped by the
+// EventSource browser API; perfect for keepalives.
+const (
+	sseHeartbeatInterval = 30 * time.Second
+	sseSubscriberBuffer  = 64
+)
+
+// auditStream is the SSE live-tail endpoint. Subscribes to the audit
+// logger's event broadcast (via SubscribingLogger) and emits one SSE
+// `audit` event per newly-written audit row, plus a keepalive
+// comment every sseHeartbeatInterval.
+//
+// Per the StreamingLogger / SubscribingLogger split: the export
+// endpoint (`/audit/export`) iterates the existing log, while this
+// endpoint only sees events written AFTER the subscription opens.
+// History + tail are intentionally separate APIs.
+//
+// Operators behind reverse proxies should ensure SSE-aware
+// configuration: response buffering off (X-Accel-Buffering: no for
+// nginx), HTTP/1.1 keep-alive long enough for the heartbeat, and
+// proxy_read_timeout exceeding the heartbeat interval.
+func (p *PortalAPI) auditStream(w http.ResponseWriter, r *http.Request) {
+	sub, ok := p.audit.(audit.SubscribingLogger)
+	if !ok {
+		writeError(w, http.StatusServiceUnavailable,
+			errors.New("live tail not supported by configured audit backend"))
+		return
+	}
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		writeError(w, http.StatusInternalServerError,
+			errors.New("response writer does not support streaming"))
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-store")
+	w.Header().Set("Connection", "keep-alive")
+	// X-Accel-Buffering off is an nginx-specific hint that disables
+	// proxy-side buffering for this response. Harmless on other
+	// servers; keeps the stream moving when nginx is the front door.
+	w.Header().Set("X-Accel-Buffering", "no")
+	w.WriteHeader(http.StatusOK)
+
+	events, cancel := sub.Subscribe(sseSubscriberBuffer)
+	defer cancel()
+
+	// Initial comment so the client can confirm the connection is
+	// live before the first audit event arrives. EventSource
+	// dispatches `open` on the first byte, not on connection.
+	if _, err := io.WriteString(w, ": connected\n\n"); err != nil {
+		return
+	}
+	flusher.Flush()
+
+	heartbeat := time.NewTicker(sseHeartbeatInterval)
+	defer heartbeat.Stop()
+
+	for {
+		select {
+		case <-r.Context().Done():
+			return
+		case <-heartbeat.C:
+			if _, err := io.WriteString(w, ": keepalive\n\n"); err != nil {
+				return
+			}
+			flusher.Flush()
+		case ev, ok := <-events:
+			if !ok {
+				// Subscriber channel closed: another caller forced
+				// cancellation, or the logger is shutting down.
+				return
+			}
+			// Strip the in-memory Payload pointer; live tail is
+			// summary-only matching /events. Operators who need the
+			// payload follow up with /events/{id}.
+			ev.Payload = nil
+			// Encode-then-write so a partial failure on the write
+			// can't ship a half-formed SSE frame ("event: audit\n"
+			// without a data line).
+			var buf bytes.Buffer
+			buf.WriteString("event: audit\ndata: ")
+			frameEnc := json.NewEncoder(&buf)
+			frameEnc.SetEscapeHTML(false)
+			if err := frameEnc.Encode(&ev); err != nil {
+				return
+			}
+			// Encode writes a trailing newline; SSE needs a blank
+			// line after the data: line.
+			buf.WriteByte('\n')
+			if _, err := w.Write(buf.Bytes()); err != nil {
+				return
+			}
+			flusher.Flush()
+		}
+	}
+}
+
+// hasRedactedParam returns true when any value at any depth of the
+// params tree is the literal string "[redacted]" (the sanitizer's
+// substitution). Replaying with redacted values would call the tool
+// with a placeholder string; the audit row would mislead about what
+// happened, so refuse.
+func hasRedactedParam(params map[string]any) bool {
+	for _, v := range params {
+		if redactedAny(v) {
+			return true
+		}
+	}
+	return false
+}
+
+func redactedAny(v any) bool {
+	switch t := v.(type) {
+	case string:
+		return t == "[redacted]"
+	case map[string]any:
+		for _, sub := range t {
+			if redactedAny(sub) {
+				return true
+			}
+		}
+	case []any:
+		for _, sub := range t {
+			if redactedAny(sub) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// identityKey returns a stable string for rate-limiting. Falls back to
+// the zero value (which the limiter fails open on) when id is nil.
+func identityKey(id *auth.Identity) string {
+	if id == nil {
+		return ""
+	}
+	if id.Subject != "" {
+		return id.AuthType + ":" + id.Subject
+	}
+	return id.AuthType
+}
+
+// recordReplayAudit writes the new audit_events row tagged
+// source=portal-replay with replayed_from set. Returns the new id so
+// the handler can include it in the response body.
+//
+// The audit Log call uses a derived background context (NOT the
+// request ctx) so a client disconnect at the moment we're persisting
+// the replay event doesn't drop the row; the response body promised
+// `replay_event_id` and that id needs to lead to a real /events row.
+func (p *PortalAPI) recordReplayAudit(
+	r *http.Request,
+	original audit.Event,
+	args map[string]any,
+	id *auth.Identity,
+	res *mcp.CallToolResult,
+	callErr error,
+	durMS int64,
+) string {
+	if p.audit == nil {
+		return ""
+	}
+	// Assign the new event id locally so the handler can return it in
+	// the response body. audit.Log auto-assigns when ID is unset, but
+	// because the Log method takes the event by value the assignment
+	// doesn't propagate back here. Setting it explicitly avoids that.
+	ev := audit.NewEvent(original.ToolName)
+	ev.ID = uuid.NewString()
+	ev = ev.
+		WithRequestID(uuid.NewString()).
+		WithSource("portal-replay").
+		WithTransport("http").
+		WithRemoteAddr(r.RemoteAddr).
+		WithUserAgent(r.UserAgent()).
+		WithUser(id).
+		WithToolGroup(original.ToolGroup).
+		WithParameters(audit.SanitizeParameters(args, p.redactKeys))
+
+	// errCategory mirrors pkg/mcpmw/audit.go's precedence so the
+	// replay row's error_category bucket matches what a native tool
+	// call would land in. The middleware logic, in plain English:
+	//   - cr.IsError && err == nil  -> "tool"
+	//   - err != nil                -> "handler" (overwrites tool)
+	//   - both succeed              -> "" (success)
+	// Mirror it exactly so an operator filtering ?error_category=tool
+	// over /events sees both native-tool-errors AND replays-of-them
+	// in the same bucket.
+	success := callErr == nil && (res == nil || !res.IsError)
+	errMsg := ""
+	errCategory := ""
+	if res != nil && res.IsError && callErr == nil {
+		errMsg = "tool returned IsError"
+		errCategory = "tool"
+	}
+	if callErr != nil {
+		errMsg = callErr.Error()
+		errCategory = "handler"
+	}
+	ev.ErrorCategory = errCategory
+	ev.WithResult(success, errMsg, durMS)
+	if res != nil {
+		chars, blocks := measureResultBlocks(res)
+		ev.WithResponseSize(chars, blocks)
+	}
+
+	// Build a payload row that carries the same fields a normal call
+	// would (request params + sized response), plus the replay
+	// linkage. Operators landing on /events/{replay_event_id} expect
+	// to inspect what came back without re-reading the HTTP response.
+	pl := &audit.Payload{
+		JSONRPCMethod:     "tools/call",
+		RequestRemoteAddr: r.RemoteAddr,
+		RequestParams:     audit.SanitizeParameters(args, p.redactKeys),
+		ReplayedFrom:      original.ID,
+	}
+	if res != nil {
+		pl.ResponseResult = callToolResultToMap(res)
+	}
+	if callErr != nil {
+		pl.ResponseError = map[string]any{
+			"message":  callErr.Error(),
+			"category": errCategory,
+		}
+	}
+	ev.Payload = pl
+
+	// Use a fresh background ctx with a generous deadline; the request
+	// ctx may be cancelled by the time we get here (long replays,
+	// client disconnects), and dropping the audit row would mislead
+	// the caller about the replay_event_id we already returned.
+	logCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := p.audit.Log(logCtx, *ev); err != nil {
+		slog.Warn("audit: replay event log failed", "id", ev.ID, "err", err) // #nosec G706 -- ev.ID is uuid.NewString(); not user input.
+	}
+	return ev.ID
+}
+
+// callToolResultToMap renders the SDK CallToolResult in a JSON-friendly
+// shape suitable for storage in audit_payloads.response_result. Mirrors
+// the equivalent helper in pkg/mcpmw/audit.go, kept local here to
+// avoid a cross-package import for two helpers.
+func callToolResultToMap(cr *mcp.CallToolResult) map[string]any {
+	out := map[string]any{"isError": cr.IsError}
+	blocks := make([]any, 0, len(cr.Content))
+	for _, c := range cr.Content {
+		switch v := c.(type) {
+		case *mcp.TextContent:
+			blocks = append(blocks, map[string]any{"type": "text", "text": v.Text})
+		case *mcp.ImageContent:
+			blocks = append(blocks, map[string]any{
+				"type": "image", "mimeType": v.MIMEType, "data": v.Data,
+			})
+		case *mcp.AudioContent:
+			blocks = append(blocks, map[string]any{
+				"type": "audio", "mimeType": v.MIMEType, "data": v.Data,
+			})
+		default:
+			// Mirrors pkg/mcpmw/audit.go's contentToGenericMap: the
+			// detail keys (marshal_error / unmarshal_error / raw) help
+			// operators triage when a future SDK content type marshals
+			// without a wire-shape "type" tag.
+			b, err := json.Marshal(c)
+			if err != nil {
+				blocks = append(blocks, map[string]any{
+					"type":          "_marshal_error",
+					"marshal_error": err.Error(),
+				})
+				continue
+			}
+			var m map[string]any
+			if err := json.Unmarshal(b, &m); err != nil {
+				blocks = append(blocks, map[string]any{
+					"type":            "_unmarshal_error",
+					"unmarshal_error": err.Error(),
+					"raw":             string(b),
+				})
+				continue
+			}
+			if _, ok := m["type"]; !ok {
+				m["type"] = "_no_type"
+			}
+			blocks = append(blocks, m)
+		}
+	}
+	out["content"] = blocks
+	if cr.StructuredContent != nil {
+		out["structuredContent"] = cr.StructuredContent
+	}
+	return out
+}
+
 // instructions returns the server-level instructions that this server hands
 // to MCP clients via ServerOptions.Instructions at initialize time. Most
 // clients surface that string to the LLM as system context, so showing it in
diff --git a/pkg/httpsrv/portal_api_detail_test.go b/pkg/httpsrv/portal_api_detail_test.go
index 9c12600..890535f 100644
--- a/pkg/httpsrv/portal_api_detail_test.go
+++ b/pkg/httpsrv/portal_api_detail_test.go
@@ -10,10 +10,11 @@ import (
 	"github.com/plexara/mcp-test/pkg/audit"
 )
 
-// Real audit IDs are UUIDs (audit.NewEvent stamps uuid.NewString()), and
-// the detail endpoint validates the path param to block the gosec G706
-// log-injection flow. These tests use literal UUIDs so they exercise the
-// same path operators hit.
+// Audit event IDs are UUIDs assigned by the storage layer
+// (Postgres on Log, MemoryLogger on Log) when the caller leaves
+// Event.ID empty. The detail endpoint validates the path param as a
+// UUID to block the gosec G706 log-injection flow. These tests set
+// the ID explicitly so they exercise the same path operators hit.
 const (
 	testEventIDFound  = "11111111-1111-1111-1111-111111111111"
 	testEventIDOther  = "22222222-2222-2222-2222-222222222222"
@@ -67,16 +68,17 @@ func TestPortalAPI_AuditEventDetail_RejectsNonUUID(t *testing.T) {
 	}
 }
 
-func TestPortalAPI_AuditEventDetail_PayloadAbsentForMemoryLogger(t *testing.T) {
+func TestPortalAPI_AuditEventDetail_PayloadIncludedForMemoryLogger(t *testing.T) {
+	// MemoryLogger now implements PayloadLogger (added when the
+	// replay endpoint needed in-memory payload retrieval), so the
+	// detail endpoint returns the payload alongside the summary.
 	mem := audit.NewMemoryLogger()
 	ev := audit.Event{
 		ID:        testEventIDOther,
 		ToolName:  "echo",
 		Transport: "http",
 		Source:    "mcp",
-		// MemoryLogger doesn't persist payloads even if attached, so the
-		// detail endpoint should return summary only.
-		Payload: &audit.Payload{JSONRPCMethod: "tools/call"},
+		Payload:   &audit.Payload{JSONRPCMethod: "tools/call"},
 	}
 	_ = mem.Log(context.Background(), ev)
 
@@ -88,9 +90,11 @@ func TestPortalAPI_AuditEventDetail_PayloadAbsentForMemoryLogger(t *testing.T) {
 	}
 	var got map[string]any
 	_ = json.NewDecoder(w.Body).Decode(&got)
-	// MemoryLogger isn't a PayloadLogger, so the detail handler clears
-	// the field to nil; serialized JSON should omit it.
-	if _, ok := got["payload"]; ok {
-		t.Errorf("payload should be omitted for non-payload logger; body=%v", got)
+	payload, ok := got["payload"].(map[string]any)
+	if !ok {
+		t.Fatalf("payload missing or wrong type; body=%v", got)
+	}
+	if payload["jsonrpc_method"] != "tools/call" {
+		t.Errorf("payload.jsonrpc_method = %v, want tools/call", payload["jsonrpc_method"])
 	}
 }
diff --git a/pkg/httpsrv/portal_api_more_test.go b/pkg/httpsrv/portal_api_more_test.go
index 338e551..0465932 100644
--- a/pkg/httpsrv/portal_api_more_test.go
+++ b/pkg/httpsrv/portal_api_more_test.go
@@ -65,7 +65,7 @@ func TestPortalAPI_Instructions(t *testing.T) {
 		APIKeys: config.APIKeysConfig{},
 		Portal:  config.PortalConfig{Enabled: true, CookieSecret: "secret-secret-padded"},
 	}
-	api := NewPortalAPI(cfg, nil, audit.NewMemoryLogger())
+	api := NewPortalAPI(cfg, nil, audit.NewMemoryLogger(), nil, nil)
 	mux := http.NewServeMux()
 	api.Mount(mux, func(h http.Handler) http.Handler { return h })
 
diff --git a/pkg/httpsrv/portal_api_replay_test.go b/pkg/httpsrv/portal_api_replay_test.go
new file mode 100644
index 0000000..185f11d
--- /dev/null
+++ b/pkg/httpsrv/portal_api_replay_test.go
@@ -0,0 +1,455 @@
+package httpsrv
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+
+	"github.com/plexara/mcp-test/pkg/audit"
+	"github.com/plexara/mcp-test/pkg/auth"
+	"github.com/plexara/mcp-test/pkg/config"
+	"github.com/plexara/mcp-test/pkg/tools"
+	"github.com/plexara/mcp-test/pkg/tools/identity"
+)
+
+// portalReplayMux is a richer test fixture than portalTestMux: it
+// wires a real mcp.Server with the identity toolkit registered so the
+// replay endpoint can exercise the in-process MCP client path. Returns
+// the mux + the in-memory audit logger so assertions can read the new
+// audit row.
+func portalReplayMux(t *testing.T, redactKeys []string) (*http.ServeMux, *audit.MemoryLogger) {
+	t.Helper()
+	cfg := &config.Config{
+		Server: config.ServerConfig{BaseURL: "http://localhost"},
+		Portal: config.PortalConfig{Enabled: true, CookieSecret: "secret-secret"},
+	}
+	reg := tools.NewRegistry()
+	reg.Add(identity.New(nil))
+
+	mcpServer := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "test"}, nil)
+	for _, tk := range reg.Toolkits() {
+		tk.RegisterTools(mcpServer)
+	}
+
+	mem := audit.NewMemoryLogger()
+	api := NewPortalAPI(cfg, reg, mem, mcpServer, redactKeys)
+
+	mw := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			ctx := auth.WithIdentity(r.Context(),
+				&auth.Identity{Subject: "alice", AuthType: "oidc"})
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+	mux := http.NewServeMux()
+	api.Mount(mux, mw)
+	return mux, mem
+}
+
+// stagedEvent helper: pre-stages an audit event with the given payload
+// in mem so /replay can find and operate on it. Returns the event id.
+func stagedEvent(t *testing.T, mem *audit.MemoryLogger, params map[string]any) string {
+	t.Helper()
+	ev := audit.Event{
+		ToolName:  "echo",
+		Timestamp: time.Now().UTC(),
+		Source:    "mcp",
+		Transport: "http",
+		Success:   true,
+		Payload:   &audit.Payload{JSONRPCMethod: "tools/call", RequestParams: params},
+	}
+	if err := mem.Log(context.Background(), ev); err != nil {
+		t.Fatalf("seed: %v", err)
+	}
+	for _, e := range mem.Snapshot() {
+		if e.ToolName == "echo" {
+			return e.ID
+		}
+	}
+	t.Fatal("seeded event not found")
+	return ""
+}
+
+func TestPortalAPI_AuditReplay_503WhenMCPServerNil(t *testing.T) {
+	// portalTestMux constructs a PortalAPI with mcpServer=nil. The
+	// replay endpoint must return 503 (not 500) so the operator sees
+	// "feature not available" rather than a generic crash.
+	mux := portalTestMux(t, audit.NewMemoryLogger())
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/00000000-0000-0000-0000-000000000000/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("status = %d, want 503", w.Code)
+	}
+}
+
+func TestPortalAPI_AuditReplay_400OnInvalidUUID(t *testing.T) {
+	mux := portalTestMux(t, audit.NewMemoryLogger())
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/not-a-uuid/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	// portalTestMux has nil mcpServer so we hit the 503 path before
+	// the UUID check; that's correct for this fixture. The actual
+	// 400-on-invalid-UUID path is exercised in
+	// tests/audit_replay_test.go::TestHTTP_AuditReplay_RejectsInvalidUUID
+	// which uses a portal-enabled fixture with a real mcpServer.
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("status = %d, want 503 (mcpServer nil)", w.Code)
+	}
+}
+
+// auditStream's 503-on-non-subscribing-logger path: covered by the
+// integration test setup that swaps in a NoopLogger via config; not
+// repeated here because portalTestMux's signature accepts only
+// *audit.MemoryLogger which is itself a SubscribingLogger.
+
+func TestHasRedactedParam(t *testing.T) {
+	cases := []struct {
+		name string
+		in   map[string]any
+		want bool
+	}{
+		{"empty", nil, false},
+		{"clean scalar", map[string]any{"x": 1}, false},
+		{"clean nested", map[string]any{"x": map[string]any{"y": "z"}}, false},
+		{"top-level redacted", map[string]any{"k": "[redacted]"}, true},
+		{"nested redacted", map[string]any{"a": map[string]any{"b": "[redacted]"}}, true},
+		{"slice contains redacted",
+			map[string]any{"xs": []any{"ok", "[redacted]"}}, true},
+		{"slice clean",
+			map[string]any{"xs": []any{"ok", 1, true}}, false},
+		{"deeply nested",
+			map[string]any{"a": map[string]any{
+				"b": []any{map[string]any{"c": "[redacted]"}},
+			}}, true},
+	}
+	for _, c := range cases {
+		got := hasRedactedParam(c.in)
+		if got != c.want {
+			t.Errorf("%s: hasRedactedParam = %v, want %v", c.name, got, c.want)
+		}
+	}
+}
+
+func TestIdentityKey(t *testing.T) {
+	cases := []struct {
+		id   *auth.Identity
+		want string
+	}{
+		{nil, ""},
+		{&auth.Identity{}, ""}, // empty AuthType + empty Subject -> empty
+		{&auth.Identity{AuthType: "oidc", Subject: "alice"}, "oidc:alice"},
+		{&auth.Identity{AuthType: "apikey", Subject: "key-1"}, "apikey:key-1"},
+		{&auth.Identity{AuthType: "anonymous"}, "anonymous"}, // no subject
+	}
+	for _, c := range cases {
+		got := identityKey(c.id)
+		if got != c.want {
+			t.Errorf("identityKey(%+v) = %q, want %q", c.id, got, c.want)
+		}
+	}
+}
+
+func TestPortalAPI_AuditReplay_HappyPath(t *testing.T) {
+	mux, mem := portalReplayMux(t, nil)
+	id := stagedEvent(t, mem, map[string]any{"message": "hello"})
+
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/"+id+"/replay", body)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d body=%s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		ReplayEventID string `json:"replay_event_id"`
+		ReplayedFrom  string `json:"replayed_from"`
+	}
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	if resp.ReplayedFrom != id {
+		t.Errorf("replayed_from = %q, want %q", resp.ReplayedFrom, id)
+	}
+	if resp.ReplayEventID == "" {
+		t.Error("replay_event_id empty")
+	}
+
+	// Verify the new audit row exists with source=portal-replay.
+	var found bool
+	for _, e := range mem.Snapshot() {
+		if e.ID == resp.ReplayEventID && e.Source == "portal-replay" {
+			found = true
+			if e.Payload == nil || e.Payload.ReplayedFrom != id {
+				t.Errorf("replayed_from linkage missing: %+v", e.Payload)
+			}
+		}
+	}
+	if !found {
+		t.Errorf("did not find new audit event id=%s source=portal-replay", resp.ReplayEventID)
+	}
+}
+
+func TestPortalAPI_AuditReplay_400OnEventNotFound(t *testing.T) {
+	mux, _ := portalReplayMux(t, nil)
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/00000000-0000-0000-0000-000000000000/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	if w.Code != http.StatusNotFound {
+		t.Errorf("status = %d, want 404", w.Code)
+	}
+}
+
+func TestPortalAPI_AuditReplay_400OnRedactedParams(t *testing.T) {
+	mux, mem := portalReplayMux(t, []string{"password"})
+	id := stagedEvent(t, mem, map[string]any{
+		"message":  "hi",
+		"password": "[redacted]", // simulating a sanitized stored row
+	})
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/"+id+"/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400 (redacted)", w.Code)
+	}
+}
+
+func TestPortalAPI_AuditReplay_400OnNoPayload(t *testing.T) {
+	mux, mem := portalReplayMux(t, nil)
+	// Stage an event with NO payload (capture-disabled simulation).
+	ev := audit.Event{
+		ToolName:  "echo",
+		Timestamp: time.Now().UTC(),
+		Source:    "mcp",
+		Transport: "http",
+	}
+	_ = mem.Log(context.Background(), ev)
+	id := mem.Snapshot()[0].ID
+
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/"+id+"/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+}
+
+func TestPortalAPI_AuditReplay_RateLimit(t *testing.T) {
+	mux, mem := portalReplayMux(t, nil)
+	id := stagedEvent(t, mem, map[string]any{"message": "rl"})
+
+	// Burst capacity is 5; the 6th call must be 429.
+	for i := 0; i < replayBurst; i++ {
+		body := bytes.NewReader([]byte(`{}`))
+		req := httptest.NewRequest(http.MethodPost,
+			"/api/v1/portal/audit/events/"+id+"/replay", body)
+		req.Header.Set("X-Requested-With", "XMLHttpRequest")
+		w := httptest.NewRecorder()
+		mux.ServeHTTP(w, req)
+		if w.Code != http.StatusOK {
+			t.Fatalf("burst call %d: status = %d", i+1, w.Code)
+		}
+	}
+	body := bytes.NewReader([]byte(`{}`))
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/v1/portal/audit/events/"+id+"/replay", body)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+	if w.Code != http.StatusTooManyRequests {
+		t.Errorf("over-burst status = %d, want 429", w.Code)
+	}
+	if w.Header().Get("Retry-After") == "" {
+		t.Error("Retry-After header missing on 429")
+	}
+}
+
+func TestCallToolResultToMap_ContentTypes(t *testing.T) {
+	cases := []struct {
+		name string
+		cr   *mcp.CallToolResult
+		want []string // expected "type" values in content blocks
+	}{
+		{
+			"text",
+			&mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: "hi"}}},
+			[]string{"text"},
+		},
+		{
+			"image",
+			&mcp.CallToolResult{Content: []mcp.Content{&mcp.ImageContent{MIMEType: "image/png", Data: []byte("x")}}},
+			[]string{"image"},
+		},
+		{
+			"audio",
+			&mcp.CallToolResult{Content: []mcp.Content{&mcp.AudioContent{MIMEType: "audio/wav", Data: []byte("x")}}},
+			[]string{"audio"},
+		},
+		{
+			"isError + structured",
+			&mcp.CallToolResult{
+				IsError:           true,
+				StructuredContent: map[string]any{"k": "v"},
+				Content:           []mcp.Content{&mcp.TextContent{Text: "err"}},
+			},
+			[]string{"text"},
+		},
+	}
+	for _, c := range cases {
+		out := callToolResultToMap(c.cr)
+		blocks, _ := out["content"].([]any)
+		if len(blocks) != len(c.want) {
+			t.Errorf("%s: blocks len = %d, want %d", c.name, len(blocks), len(c.want))
+			continue
+		}
+		for i, b := range blocks {
+			m, _ := b.(map[string]any)
+			if m["type"] != c.want[i] {
+				t.Errorf("%s: block[%d].type = %v, want %v", c.name, i, m["type"], c.want[i])
+			}
+		}
+	}
+	// Verify isError flag round-trips.
+	out := callToolResultToMap(&mcp.CallToolResult{IsError: true})
+	if out["isError"] != true {
+		t.Errorf("isError flag missing: %+v", out)
+	}
+}
+
+func TestDeepCopyMap(t *testing.T) {
+	src := map[string]any{
+		"a": "x",
+		"b": map[string]any{"c": []any{1, "y", true}},
+	}
+	dst := deepCopyMap(src)
+
+	// Mutate src; dst must not change.
+	src["a"] = "MUTATED"
+	src["b"].(map[string]any)["c"].([]any)[1] = "MUTATED"
+
+	if dst["a"] != "x" {
+		t.Errorf("top-level aliasing: dst[a] = %v", dst["a"])
+	}
+	if v := dst["b"].(map[string]any)["c"].([]any)[1]; v != "y" {
+		t.Errorf("nested slice aliasing: dst.b.c[1] = %v", v)
+	}
+	if deepCopyMap(nil) != nil {
+		t.Error("deepCopyMap(nil) should return nil")
+	}
+}
+
+// IsError -> 502 path: covered by tests/audit_replay_test.go through
+// the full HTTP stack. Locking it as a unit here would require
+// registering a custom always-error tool, which isn't worth the
+// setup overhead.
+
+func TestPortalAPI_AuditStream_DeliversEvent(t *testing.T) {
+	// httptest.ResponseRecorder doesn't implement http.Flusher, so we
+	// need a real httptest.Server to exercise the streaming path.
+	_, mem := portalReplayMux(t, nil)
+	cfg := &config.Config{
+		Server: config.ServerConfig{BaseURL: "http://localhost"},
+		Portal: config.PortalConfig{Enabled: true, CookieSecret: "secret-secret"},
+	}
+	api := NewPortalAPI(cfg, nil, mem, nil, nil)
+	mw := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			ctx := auth.WithIdentity(r.Context(),
+				&auth.Identity{Subject: "alice", AuthType: "oidc"})
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+	mux := http.NewServeMux()
+	api.Mount(mux, mw)
+
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	req, _ := http.NewRequestWithContext(ctx, http.MethodGet,
+		srv.URL+"/api/v1/portal/audit/stream", nil)
+	resp, err := srv.Client().Do(req)
+	if err != nil {
+		t.Fatalf("stream connect: %v", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		t.Fatalf("status = %d", resp.StatusCode)
+	}
+	if ct := resp.Header.Get("Content-Type"); ct != "text/event-stream" {
+		t.Errorf("Content-Type = %q, want text/event-stream", ct)
+	}
+
+	// Fire an event via direct mem.Log, simulating an audit write.
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		_ = mem.Log(context.Background(), audit.Event{
+			ToolName:  "stream-test",
+			Timestamp: time.Now().UTC(),
+			Source:    "mcp",
+			Transport: "http",
+		})
+	}()
+
+	// Drain the response in chunks until either the event arrives or
+	// the test deadline fires. This exercises both the connect-
+	// comment write AND the event-frame write paths in auditStream.
+	// Read response body in a goroutine so the outer test can enforce
+	// a deadline via the ctx cancel; the http.Body Read blocks until
+	// data arrives, with no per-read deadline available.
+	combined := make(chan string, 1)
+	go func() {
+		var b strings.Builder
+		buf := make([]byte, 4096)
+		for {
+			n, err := resp.Body.Read(buf)
+			if n > 0 {
+				b.Write(buf[:n])
+			}
+			if strings.Contains(b.String(), "stream-test") || err != nil {
+				combined <- b.String()
+				return
+			}
+		}
+	}()
+	var out string
+	select {
+	case out = <-combined:
+	case <-time.After(3 * time.Second):
+		cancel() // force ctx done so the read goroutine unwinds
+		out = <-combined
+	}
+	if !strings.Contains(out, ": connected") {
+		t.Errorf("missing connect comment in: %q", out)
+	}
+	if !strings.Contains(out, "stream-test") {
+		t.Errorf("expected event for stream-test in body, got: %q", out)
+	}
+}
diff --git a/pkg/httpsrv/portal_api_test.go b/pkg/httpsrv/portal_api_test.go
index 2d6c785..b9ed674 100644
--- a/pkg/httpsrv/portal_api_test.go
+++ b/pkg/httpsrv/portal_api_test.go
@@ -30,7 +30,7 @@ func portalTestMux(t *testing.T, mem *audit.MemoryLogger) *http.ServeMux {
 	}
 	reg := tools.NewRegistry()
 	reg.Add(identity.New([]string{"cookie"}))
-	api := NewPortalAPI(cfg, reg, mem)
+	api := NewPortalAPI(cfg, reg, mem, nil, nil)
 
 	mw := func(next http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
diff --git a/pkg/httpsrv/ratelimit.go b/pkg/httpsrv/ratelimit.go
new file mode 100644
index 0000000..7c81317
--- /dev/null
+++ b/pkg/httpsrv/ratelimit.go
@@ -0,0 +1,115 @@
+package httpsrv
+
+import (
+	"sync"
+	"time"
+)
+
+// identityRateLimiter is a per-identity token bucket. Used to bound how
+// often a single portal user can fire mutating endpoints (initially:
+// audit replay). Implementation is intentionally simple: one bucket per
+// identity key, refilled at a steady rate, capped at a burst.
+//
+// It's NOT distributed: multi-instance deployments need a shared
+// rate-limit store. That's a known limitation; for the audit replay
+// case the failure mode of a hot single user is contained per replica
+// and the cost of a missed limit is one extra captured tool call,
+// not data corruption.
+type identityRateLimiter struct {
+	burst    int           // bucket capacity
+	refill   time.Duration // one token per refill duration
+	mu       sync.Mutex
+	buckets  map[string]*tokenBucket
+	clock    func() time.Time // injectable for tests
+	maxIdle  time.Duration    // GC unused buckets after this idle period
+	lastSwep time.Time
+}
+
+type tokenBucket struct {
+	tokens   float64
+	lastSeen time.Time
+}
+
+// newIdentityRateLimiter returns a limiter with `burst` capacity and a
+// new token every `refill`. clock can be nil to use time.Now.
+func newIdentityRateLimiter(burst int, refill time.Duration, clock func() time.Time) *identityRateLimiter {
+	if clock == nil {
+		clock = time.Now
+	}
+	return &identityRateLimiter{
+		burst:   burst,
+		refill:  refill,
+		buckets: make(map[string]*tokenBucket),
+		clock:   clock,
+		maxIdle: 10 * time.Minute,
+	}
+}
+
+// Allow consumes one token for the given identity key and returns true
+// if the call is permitted. False means rate-limited; the caller should
+// return 429 with a Retry-After header derived from RetryAfter.
+func (l *identityRateLimiter) Allow(key string) bool {
+	if key == "" {
+		// Fail open for unauthenticated callers: the auth middleware
+		// should have rejected them; if they reached here, we'd
+		// rather permit than risk a deadlock by keying on "".
+		return true
+	}
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	now := l.clock()
+	l.gcLocked(now)
+
+	b := l.buckets[key]
+	if b == nil {
+		b = &tokenBucket{tokens: float64(l.burst), lastSeen: now}
+		l.buckets[key] = b
+	}
+	// Refill: tokens accrue at 1 per refill duration since the last
+	// observation, capped at burst.
+	elapsed := now.Sub(b.lastSeen)
+	if elapsed > 0 && l.refill > 0 {
+		b.tokens += float64(elapsed) / float64(l.refill)
+		if b.tokens > float64(l.burst) {
+			b.tokens = float64(l.burst)
+		}
+	}
+	b.lastSeen = now
+	if b.tokens < 1 {
+		return false
+	}
+	b.tokens--
+	return true
+}
+
+// RetryAfter returns the duration until at least one token will be
+// available for the given key. Used to populate the Retry-After
+// response header when Allow returned false.
+func (l *identityRateLimiter) RetryAfter(key string) time.Duration {
+	if key == "" {
+		return 0
+	}
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	b := l.buckets[key]
+	if b == nil || b.tokens >= 1 {
+		return 0
+	}
+	missing := 1 - b.tokens
+	return time.Duration(missing * float64(l.refill))
+}
+
+// gcLocked drops buckets that haven't been touched in maxIdle. Called
+// at the head of each Allow so the map doesn't grow unbounded over a
+// long-running process. Caller must hold l.mu.
+func (l *identityRateLimiter) gcLocked(now time.Time) {
+	if now.Sub(l.lastSwep) < l.maxIdle {
+		return
+	}
+	for k, b := range l.buckets {
+		if now.Sub(b.lastSeen) > l.maxIdle {
+			delete(l.buckets, k)
+		}
+	}
+	l.lastSwep = now
+}
diff --git a/pkg/httpsrv/ratelimit_test.go b/pkg/httpsrv/ratelimit_test.go
new file mode 100644
index 0000000..329786b
--- /dev/null
+++ b/pkg/httpsrv/ratelimit_test.go
@@ -0,0 +1,92 @@
+package httpsrv
+
+import (
+	"testing"
+	"time"
+)
+
+func TestIdentityRateLimiter_BurstThenRefill(t *testing.T) {
+	var now time.Time
+	clock := func() time.Time { return now }
+	now = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	l := newIdentityRateLimiter(3, time.Second, clock)
+
+	// Three calls in quick succession: all allowed (burst).
+	for i := 0; i < 3; i++ {
+		if !l.Allow("alice") {
+			t.Fatalf("call %d: should be allowed", i+1)
+		}
+	}
+	// Fourth call: bucket empty, rate-limited.
+	if l.Allow("alice") {
+		t.Fatal("4th call should be rate-limited")
+	}
+	if r := l.RetryAfter("alice"); r <= 0 || r > time.Second {
+		t.Errorf("RetryAfter = %v, want (0, 1s]", r)
+	}
+
+	// Advance 1 second: one token refilled.
+	now = now.Add(time.Second)
+	if !l.Allow("alice") {
+		t.Error("after 1s refill, 5th call should be allowed")
+	}
+	if l.Allow("alice") {
+		t.Error("6th call back-to-back should be rate-limited")
+	}
+}
+
+func TestIdentityRateLimiter_PerKeyIndependent(t *testing.T) {
+	var now time.Time
+	clock := func() time.Time { return now }
+	now = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	l := newIdentityRateLimiter(2, time.Second, clock)
+
+	// alice burns her bucket.
+	if !l.Allow("alice") || !l.Allow("alice") {
+		t.Fatal("alice's first two calls should be allowed")
+	}
+	if l.Allow("alice") {
+		t.Fatal("alice's third call should be blocked")
+	}
+	// bob is unaffected.
+	if !l.Allow("bob") {
+		t.Error("bob's first call should be allowed")
+	}
+}
+
+func TestIdentityRateLimiter_EmptyKeyAllows(t *testing.T) {
+	l := newIdentityRateLimiter(1, time.Second, nil)
+	// Empty key fail-opens to avoid keying everyone-as-anonymous on
+	// the same bucket (DoS vector).
+	for i := 0; i < 100; i++ {
+		if !l.Allow("") {
+			t.Fatalf("empty key allow %d should always pass", i)
+		}
+	}
+}
+
+func TestIdentityRateLimiter_GCDropsIdleBuckets(t *testing.T) {
+	var now time.Time
+	clock := func() time.Time { return now }
+	now = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	l := newIdentityRateLimiter(1, time.Second, clock)
+	l.maxIdle = 5 * time.Minute
+
+	l.Allow("alice")
+	if len(l.buckets) != 1 {
+		t.Fatalf("expected 1 bucket after Allow")
+	}
+	// Advance past maxIdle and trigger another Allow on a different
+	// key; alice's bucket should be GC'd.
+	now = now.Add(10 * time.Minute)
+	l.Allow("bob")
+	l.mu.Lock()
+	_, aliceStill := l.buckets["alice"]
+	l.mu.Unlock()
+	if aliceStill {
+		t.Error("alice's bucket should have been GC'd after 10 minutes idle")
+	}
+}
diff --git a/scripts/codeql-gate.sh b/scripts/codeql-gate.sh
new file mode 100755
index 0000000..c9d0104
--- /dev/null
+++ b/scripts/codeql-gate.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+# codeql-gate.sh — fail if a CodeQL SARIF result has any findings that
+# aren't excluded by the project config.
+#
+# Args:
+#   $1  path to SARIF file
+#   $2  path to codeql-config.yml (used to read query-filters.exclude.id)
+#
+# Exit 0 = clean. Exit 1 = at least one finding survives the filters.
+
+set -euo pipefail
+
+SARIF="${1:-}"
+CONFIG="${2:-}"
+
+if [[ -z "$SARIF" || ! -f "$SARIF" ]]; then
+  echo "codeql-gate: missing SARIF input" >&2
+  exit 1
+fi
+
+# Build the exclude list from codeql-config.yml.
+EXCLUDES=()
+if [[ -n "$CONFIG" && -f "$CONFIG" ]]; then
+  while IFS= read -r line; do
+    [[ -n "$line" ]] && EXCLUDES+=("$line")
+  done < <(awk '
+    /^query-filters:/ { in_qf=1; next }
+    in_qf && /^[^ ]/  { in_qf=0 }
+    in_qf && /^[[:space:]]*-[[:space:]]*exclude:/ { in_excl=1; next }
+    in_qf && in_excl && /^[[:space:]]*id:/ {
+      sub(/^[[:space:]]*id:[[:space:]]*/, "")
+      sub(/[[:space:]]+#.*$/, "")
+      gsub(/[\047"]/, "")
+      print
+      in_excl=0
+    }
+  ' "$CONFIG")
+fi
+
+# Pull every result's ruleId from SARIF. Use a while-read loop instead
+# of `mapfile`/`readarray` so we work on macOS bash 3.2 too.
+RULES=()
+while IFS= read -r line; do
+  [[ -n "$line" ]] && RULES+=("$line")
+done < <(jq -r '.runs[]?.results[]?.ruleId // empty' "$SARIF")
+
+KEPT=()
+for r in "${RULES[@]+"${RULES[@]}"}"; do
+  excluded=0
+  for e in "${EXCLUDES[@]+"${EXCLUDES[@]}"}"; do
+    if [[ "$r" == "$e" ]]; then
+      excluded=1
+      break
+    fi
+  done
+  [[ $excluded -eq 0 ]] && KEPT+=("$r")
+done
+
+if [[ ${#KEPT[@]} -eq 0 ]]; then
+  exit 0
+fi
+
+echo "codeql-gate: ${#KEPT[@]} findings after exclusions:" >&2
+for r in "${KEPT[@]+"${KEPT[@]}"}"; do
+  echo "  - $r" >&2
+done
+echo "" >&2
+echo "Inspect details with:" >&2
+echo "  jq '.runs[].results[] | select(.ruleId == \"<id>\") | {ruleId, locations}' $SARIF" >&2
+exit 1
diff --git a/tests/audit_replay_test.go b/tests/audit_replay_test.go
new file mode 100644
index 0000000..d9acb3b
--- /dev/null
+++ b/tests/audit_replay_test.go
@@ -0,0 +1,188 @@
+package tests
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+)
+
+// TestHTTP_AuditReplay_Roundtrip locks the replay endpoint contract:
+// fire a tool call, find the captured audit event, POST to replay,
+// receive a new audit event with replayed_from pointing at the
+// original and source=portal-replay.
+func TestHTTP_AuditReplay_Roundtrip(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+	ts, mem := portalApp(t)
+
+	// 1. Fire the original tool call via the portal-authed MCP path.
+	httpClient := &http.Client{
+		Transport: &headerInjector{rt: http.DefaultTransport, headers: http.Header{"X-API-Key": []string{portalAPIKey}}},
+	}
+	transport := &mcp.StreamableClientTransport{
+		Endpoint:   ts.URL,
+		HTTPClient: httpClient,
+	}
+	mcpClient := mcp.NewClient(&mcp.Implementation{Name: "replay-test"}, nil)
+	session, err := mcpClient.Connect(ctx, transport, nil)
+	if err != nil {
+		t.Fatalf("mcp connect: %v", err)
+	}
+	defer func() { _ = session.Close() }()
+
+	res, err := session.CallTool(ctx, &mcp.CallToolParams{
+		Name:      "echo",
+		Arguments: map[string]any{"message": "hello-replay"},
+	})
+	if err != nil {
+		t.Fatalf("original echo: %v", err)
+	}
+	if res.IsError {
+		t.Fatalf("original echo IsError")
+	}
+	original := waitForEvent(t, mem, "echo", 2*time.Second)
+	if original.Source != "mcp" {
+		t.Fatalf("original.Source = %q, want mcp", original.Source)
+	}
+
+	// 2. POST to replay endpoint with the same API key.
+	body := bytes.NewReader([]byte(`{}`))
+	req, _ := http.NewRequestWithContext(ctx, http.MethodPost,
+		ts.URL+"/api/v1/portal/audit/events/"+original.ID+"/replay", body)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-API-Key", portalAPIKey)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest") // CSRF gate
+	resp, err := ts.Client().Do(req)
+	if err != nil {
+		t.Fatalf("replay request: %v", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		var b bytes.Buffer
+		_, _ = b.ReadFrom(resp.Body)
+		t.Fatalf("replay status = %d body=%s", resp.StatusCode, b.String())
+	}
+	var replayResp struct {
+		ReplayEventID string `json:"replay_event_id"`
+		ReplayedFrom  string `json:"replayed_from"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&replayResp); err != nil {
+		t.Fatalf("decode replay response: %v", err)
+	}
+	if replayResp.ReplayedFrom != original.ID {
+		t.Errorf("replay.replayed_from = %q, want %q", replayResp.ReplayedFrom, original.ID)
+	}
+	if replayResp.ReplayEventID == "" {
+		t.Error("replay.replay_event_id is empty")
+	}
+
+	// 3. The replay must have produced a new audit event.
+	deadline := time.Now().Add(2 * time.Second)
+	var found bool
+	for time.Now().Before(deadline) {
+		for _, e := range mem.Snapshot() {
+			if e.ID == replayResp.ReplayEventID && e.Source == "portal-replay" {
+				found = true
+				if e.Payload == nil || e.Payload.ReplayedFrom != original.ID {
+					t.Errorf("replayed event missing replayed_from linkage: %+v", e.Payload)
+				}
+				break
+			}
+		}
+		if found {
+			break
+		}
+		time.Sleep(20 * time.Millisecond)
+	}
+	if !found {
+		t.Errorf("did not see new audit row id=%s source=portal-replay within 2s",
+			replayResp.ReplayEventID)
+	}
+}
+
+// TestHTTP_AuditReplay_RejectsRedacted refuses to replay an event whose
+// captured params contain "[redacted]" sentinels: re-running a tool
+// with placeholder values would mislead about what the call actually
+// did.
+func TestHTTP_AuditReplay_RejectsRedacted(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+	ts, mem := portalApp(t)
+
+	// Connect MCP via the portal-keyed path.
+	httpClient := &http.Client{
+		Transport: &headerInjector{rt: http.DefaultTransport, headers: http.Header{"X-API-Key": []string{portalAPIKey}}},
+	}
+	transport := &mcp.StreamableClientTransport{
+		Endpoint:   ts.URL,
+		HTTPClient: httpClient,
+	}
+	mcpClient := mcp.NewClient(&mcp.Implementation{Name: "redact-test"}, nil)
+	session, err := mcpClient.Connect(ctx, transport, nil)
+	if err != nil {
+		t.Fatalf("mcp connect: %v", err)
+	}
+	defer func() { _ = session.Close() }()
+
+	// Echo with a key the test config redacts. portalApp's redact_keys
+	// list includes "password".
+	_, err = session.CallTool(ctx, &mcp.CallToolParams{
+		Name: "echo",
+		Arguments: map[string]any{
+			"message":  "hi",
+			"password": "should-be-redacted",
+		},
+	})
+	if err != nil {
+		t.Fatalf("seed echo: %v", err)
+	}
+	original := waitForEvent(t, mem, "echo", 2*time.Second)
+
+	body := bytes.NewReader([]byte(`{}`))
+	req, _ := http.NewRequestWithContext(ctx, http.MethodPost,
+		ts.URL+"/api/v1/portal/audit/events/"+original.ID+"/replay", body)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-API-Key", portalAPIKey)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	resp, err := ts.Client().Do(req)
+	if err != nil {
+		t.Fatalf("replay request: %v", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusBadRequest {
+		t.Fatalf("status = %d, want 400 (redacted refusal)", resp.StatusCode)
+	}
+	var bodyBytes bytes.Buffer
+	_, _ = bodyBytes.ReadFrom(resp.Body)
+	if !strings.Contains(bodyBytes.String(), "redacted") {
+		t.Errorf("400 body should mention redacted: %s", bodyBytes.String())
+	}
+}
+
+// TestHTTP_AuditReplay_RejectsInvalidUUID covers the boundary uuid
+// validation that's also on /events/{id}.
+func TestHTTP_AuditReplay_RejectsInvalidUUID(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	ts, _ := portalApp(t)
+	body := bytes.NewReader([]byte(`{}`))
+	req, _ := http.NewRequestWithContext(ctx, http.MethodPost,
+		ts.URL+"/api/v1/portal/audit/events/not-a-uuid/replay", body)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-API-Key", portalAPIKey)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	resp, err := ts.Client().Do(req)
+	if err != nil {
+		t.Fatalf("request: %v", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", resp.StatusCode)
+	}
+}
diff --git a/tests/audit_stream_test.go b/tests/audit_stream_test.go
new file mode 100644
index 0000000..a6cdc1a
--- /dev/null
+++ b/tests/audit_stream_test.go
@@ -0,0 +1,148 @@
+package tests
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"net/http"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+
+	"github.com/plexara/mcp-test/pkg/audit"
+)
+
+// TestHTTP_AuditStream_DeliversNewEvents subscribes to the SSE live
+// tail, fires a tool call, and verifies the tool's audit event lands
+// on the stream within 2 seconds (the spec target is 200ms; CI
+// schedulers under -race make 200ms flaky for assertions, so we
+// assert "soon" rather than "exactly N ms").
+func TestHTTP_AuditStream_DeliversNewEvents(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+	ts, _ := portalApp(t)
+
+	// Open the SSE stream BEFORE firing the call; the contract is
+	// "events written after subscribe arrive on the stream."
+	streamReq, _ := http.NewRequestWithContext(ctx, http.MethodGet,
+		ts.URL+"/api/v1/portal/audit/stream", nil)
+	streamReq.Header.Set("Accept", "text/event-stream")
+	streamReq.Header.Set("X-API-Key", portalAPIKey)
+	streamResp, err := ts.Client().Do(streamReq)
+	if err != nil {
+		t.Fatalf("stream connect: %v", err)
+	}
+	defer streamResp.Body.Close()
+	if streamResp.StatusCode != http.StatusOK {
+		t.Fatalf("stream status = %d", streamResp.StatusCode)
+	}
+	if ct := streamResp.Header.Get("Content-Type"); !strings.HasPrefix(ct, "text/event-stream") {
+		t.Errorf("Content-Type = %q, want text/event-stream", ct)
+	}
+
+	// Read SSE events in a goroutine.
+	type sseMsg struct {
+		event string
+		data  string
+	}
+	msgs := make(chan sseMsg, 16)
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		scanner := bufio.NewScanner(streamResp.Body)
+		var event, data string
+		for scanner.Scan() {
+			line := scanner.Text()
+			switch {
+			case strings.HasPrefix(line, "event:"):
+				event = strings.TrimSpace(strings.TrimPrefix(line, "event:"))
+			case strings.HasPrefix(line, "data:"):
+				data = strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+			case line == "":
+				if event != "" {
+					select {
+					case msgs <- sseMsg{event, data}:
+					default:
+					}
+					event, data = "", ""
+				}
+			}
+		}
+	}()
+
+	// Fire a tool call via the portal-keyed MCP path.
+	httpClient := &http.Client{
+		Transport: &headerInjector{rt: http.DefaultTransport, headers: http.Header{"X-API-Key": []string{portalAPIKey}}},
+	}
+	transport := &mcp.StreamableClientTransport{
+		Endpoint:   ts.URL,
+		HTTPClient: httpClient,
+	}
+	mcpClient := mcp.NewClient(&mcp.Implementation{Name: "stream-test"}, nil)
+	session, err := mcpClient.Connect(ctx, transport, nil)
+	if err != nil {
+		t.Fatalf("mcp connect: %v", err)
+	}
+	defer func() { _ = session.Close() }()
+	_, err = session.CallTool(ctx, &mcp.CallToolParams{
+		Name:      "echo",
+		Arguments: map[string]any{"message": "tail-me"},
+	})
+	if err != nil {
+		t.Fatalf("echo: %v", err)
+	}
+
+	// Wait for an audit event matching our tool call.
+	deadline := time.Now().Add(3 * time.Second)
+	for time.Now().Before(deadline) {
+		select {
+		case msg := <-msgs:
+			if msg.event != "audit" {
+				continue
+			}
+			var ev audit.Event
+			if err := json.Unmarshal([]byte(msg.data), &ev); err != nil {
+				t.Errorf("data not JSON: %v\n%s", err, msg.data)
+				continue
+			}
+			if ev.ToolName == "echo" {
+				return // success
+			}
+		case <-time.After(100 * time.Millisecond):
+			// Loop until deadline.
+		}
+	}
+	t.Fatal("did not see an audit SSE event for tool=echo within 3s")
+}
+
+// TestHTTP_AuditStream_EmitsKeepalive verifies the connection-open
+// comment fires immediately. Heartbeat verification at the 30s
+// interval is intentionally omitted (would slow CI); the keepalive
+// interval is documented on the handler.
+func TestHTTP_AuditStream_EmitsConnectComment(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	ts, _ := portalApp(t)
+
+	streamReq, _ := http.NewRequestWithContext(ctx, http.MethodGet,
+		ts.URL+"/api/v1/portal/audit/stream", nil)
+	streamReq.Header.Set("X-API-Key", portalAPIKey)
+	streamResp, err := ts.Client().Do(streamReq)
+	if err != nil {
+		t.Fatalf("stream connect: %v", err)
+	}
+	defer streamResp.Body.Close()
+
+	scanner := bufio.NewScanner(streamResp.Body)
+	if !scanner.Scan() {
+		t.Fatal("stream produced no bytes")
+	}
+	first := scanner.Text()
+	if !strings.HasPrefix(first, ": connected") {
+		t.Errorf("first line = %q, want ': connected' comment", first)
+	}
+}