From 68f34b12ff1267766b947d8422cf65996768c986 Mon Sep 17 00:00:00 2001 From: Shawn Hartsock Date: Fri, 12 Jun 2026 21:41:00 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat(store/mcp):=20#7=20=E2=80=94=20mcp=5Fs?= =?UTF-8?q?ervers=20proxy=20substrate=20(mcp=5Fregister=20+=20mcp-query)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The credential-proxy piece of the agent-state-store epic (#7, PR D): present registered downstream MCP servers as a single modulex surface so a hotseat agent points its harness at modulex and never touches enterprise credentials. Store (modulex-core/store.rs): - McpServer struct + mcp_register/mcp_servers/mcp_server/mcp_unregister over the already-reserved mcp_servers table (name PK, command, args_json, note, created_gen — generation-stamped, never wall-clock). Upsert by name. - Wired into StoreDump export/import (sovereignty); the export carries the invocation shape only — never a credential. Tool (modulex-mcp/tools.rs): - mcp_register MCP tool (action add|list|remove) — a mutation, so a real tool. Lands in a new opt-in `mcp` facet: NOT on the default ≤12 tools/list budget (DEFAULT_TOOL_BUDGET stays 12, CI-pinned), discoverable + callable via the tool_search/tool_describe/tool_invoke trio, exactly like the board facet. Step (modulex-core/steps/mcp_query.rs): - mcp-query step: spawn a registered downstream as a stdio MCP client (initialize → notifications/initialized → tools/call over the existing single-shot Spawner seam — no new deps, lean build unaffected), embed ONLY the tool result as a report section. Security (this is the credential-proxy surface): - Leashed spawn: the downstream command goes through ExecGate::spawn → check_exec BEFORE any process; an ungranted command is DENIED, not run. The store cannot silently widen the leash — a registered server's command must be granted (inline `command` joins the declared-default grant via required_programs, or via [caveats] exec). - Credentials by reference: the downstream's secrets are resolved at spawn time into unserializable Secrets injected only into the child env; they never reach the store, an export, the report data/markdown, or error text. stderr is never surfaced (only parsed JSON-RPC error messages are). - Regression test asserts a resolved {cmd} credential value never appears in the serialized step result. Coverage: cargo llvm-cov on the new modules — mcp_query.rs 96.46% lines, store.rs 96.73%, tools.rs 93.79%, server.rs 96.89%; library total 95.03% lines (binary/pyo3 entrypoints excluded). Adds `just cov`/`cov-ci` and a CI `coverage` job enforcing an 80% floor, mirroring the test gate (hook parity note updated — coverage is CI-only, too slow pre-push). 3-tier tests: pure wire-shape (build_stdin/parse_call_response/render), mocked-spawner logic (happy path, denial, timeout, missing store/credential, downstream error, credential-leak regression), and server-level round-trip via tool_invoke. Golden schema pinned for the new step. Disclosure tiers: capability = mcp-query STEP (zero tool surface) + mcp_register MUTATION tool in the opt-in `mcp` facet (discovered, not listed). Refs #7. Co-Authored-By: Claude Fable 5 --- .githooks/pre-push | 9 +- .github/workflows/ci.yml | 19 + crates/modulex-core/src/lib.rs | 2 +- crates/modulex-core/src/steps/mcp_query.rs | 698 ++++++++++++++++++ crates/modulex-core/src/steps/mod.rs | 4 + crates/modulex-core/src/store.rs | 176 +++++ .../modulex-core/tests/golden/mcp-query.json | 32 + crates/modulex-mcp/src/server.rs | 104 +++ crates/modulex-mcp/src/tools.rs | 115 +++ justfile | 15 + modulex.toml.example | 22 +- 11 files changed, 1190 insertions(+), 6 deletions(-) create mode 100644 crates/modulex-core/src/steps/mcp_query.rs create mode 100644 crates/modulex-core/tests/golden/mcp-query.json diff --git a/.githooks/pre-push b/.githooks/pre-push index bbe3b40..73d2799 100755 --- a/.githooks/pre-push +++ b/.githooks/pre-push @@ -1,7 +1,10 @@ #!/usr/bin/env bash -# PIPELINE PARITY: this hook mirrors .github/workflows/ci.yml. -# When editing this file, verify it still matches the pipeline (and vice -# versa — pipeline edits REQUIRE a hook audit; see CLAUDE.md). +# PIPELINE PARITY: this hook mirrors the `check` job of +# .github/workflows/ci.yml. When editing this file, verify it still matches +# the pipeline (and vice versa — pipeline edits REQUIRE a hook audit; see +# CLAUDE.md). The CI `coverage` job (just cov-ci) is intentionally NOT run +# pre-push: full llvm-cov instrumentation is too slow for every push, so the +# coverage floor is enforced in CI only. Run `just cov-ci` locally before a PR. set -euo pipefail echo "pre-push: cargo fmt --check" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9f16f2c..b9130a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,3 +26,22 @@ jobs: run: | cargo check -p modulex-cli --no-default-features --locked cargo check -p modulex-mcp --no-default-features --locked + + # COVERAGE GATE: mirrors `just cov-ci`. The floor (80% lines) is enforced + # here; keep it in sync with the justfile. Binary/pyo3 entrypoints are + # excluded (integration-tested, not unit-tested). + coverage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: llvm-tools-preview + - uses: Swatinem/rust-cache@v2 + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + - name: Coverage (fail under the floor) + run: | + cargo llvm-cov --all-features --workspace \ + --ignore-filename-regex '(main\.rs|modulex-py/src/lib\.rs)' \ + --fail-under-lines 80 diff --git a/crates/modulex-core/src/lib.rs b/crates/modulex-core/src/lib.rs index 5c7b914..9d2347e 100644 --- a/crates/modulex-core/src/lib.rs +++ b/crates/modulex-core/src/lib.rs @@ -44,7 +44,7 @@ pub use exec::{ExecGate, ExecOutput, ExecRequest, Spawner, TokioSpawner}; pub use registry::StepRegistry; pub use report::{RepoResult, Report, StepResult}; pub use step::{RunContext, StepHandler}; -pub use store::Store; +pub use store::{McpServer, Store}; // Re-export the leash vocabulary so embedders don't need a direct // agent-bridle-core dependency to construct grants. diff --git a/crates/modulex-core/src/steps/mcp_query.rs b/crates/modulex-core/src/steps/mcp_query.rs new file mode 100644 index 0000000..5349e60 --- /dev/null +++ b/crates/modulex-core/src/steps/mcp_query.rs @@ -0,0 +1,698 @@ +//! The `mcp-query` step — call a tool on a registered downstream MCP server +//! and embed only its RESULT in the report (issue #7, PR D: "hide other MCPs +//! behind modulex"). +//! +//! This is the credential-proxy surface. A hotseat agent points its harness +//! at modulex and never touches enterprise credentials: it asks modulex to +//! run `server.tool(args)`; modulex spawns the downstream server, injects the +//! server's credential *references* (resolved at spawn time), calls the tool, +//! and returns the tool result. The calling agent sees the result — never the +//! command line, never the credentials. +//! +//! ## Security model (adversarial-review surface) +//! +//! 1. **Leashed spawn.** The downstream command runs through +//! [`crate::exec::ExecGate::spawn`], which calls agent-bridle's `check_exec` +//! BEFORE any process exists. A server whose command is not in the run's +//! exec grant is **denied, not run** — the store cannot silently widen the +//! leash. To authorize a registered server, its command must be granted: +//! either by a config `mcp-query` step that declares `command` inline (so +//! it joins the declared-default grant via [`StepHandler::required_programs`]) +//! or by an explicit `[caveats] exec`. +//! 2. **Credentials by reference, never by value.** The downstream server's +//! secrets are supplied through the step's `env = { NAME = {env|file|cmd} }` +//! references — the same [`crate::credentials::Secret`] model as every other +//! step. Secrets are unserializable by construction, injected only into the +//! child environment, and scrubbed from captured output. They never reach +//! the store, an export, the report `data`, the report markdown, or an error +//! string. +//! 3. **Result only.** Only the JSON-RPC tool *result* is embedded. The +//! spawned command line and resolved env are never serialized into the +//! report. +//! +//! ## Transport +//! +//! MCP stdio is newline-delimited JSON-RPC 2.0. modulex writes the +//! handshake + call as a batch to the child's stdin +//! (`initialize` → `notifications/initialized` → `tools/call`), closes stdin, +//! and parses the responses from stdout, matching by request `id`. This rides +//! the existing single-shot [`crate::exec::Spawner`] seam, so it is mockable +//! and leashed exactly like every other subprocess. + +use std::time::Duration; + +use async_trait::async_trait; +use serde_json::{json, Value}; + +use crate::config::{expand_tilde, StepSpec}; +use crate::exec::ExecRequest; +use crate::report::StepResult; +use crate::step::{resolve_step_env, RunContext, StepHandler}; +use crate::store::McpServer; + +/// JSON-RPC id of the `initialize` request. +const ID_INIT: i64 = 1; +/// JSON-RPC id of the `tools/call` request. +const ID_CALL: i64 = 2; + +/// The MCP protocol version modulex speaks to downstreams (same as the +/// modulex server's own). +const PROTOCOL_VERSION: &str = "2024-11-05"; + +/// Build the newline-delimited JSON-RPC batch sent to a downstream server's +/// stdin: `initialize`, `notifications/initialized`, then `tools/call`. +/// Factored out (pure) so the wire shape is unit-tested without a process. +#[must_use] +pub fn build_stdin(tool: &str, arguments: &Value) -> String { + let initialize = json!({ + "jsonrpc": "2.0", + "id": ID_INIT, + "method": "initialize", + "params": { + "protocolVersion": PROTOCOL_VERSION, + "capabilities": {}, + "clientInfo": { "name": "modulex-mcp-proxy", "version": "0.1.0" } + } + }); + let initialized = json!({ + "jsonrpc": "2.0", + "method": "notifications/initialized" + }); + let call = json!({ + "jsonrpc": "2.0", + "id": ID_CALL, + "method": "tools/call", + "params": { "name": tool, "arguments": arguments } + }); + format!("{initialize}\n{initialized}\n{call}\n") +} + +/// Find the JSON-RPC response object with the given `id` in newline-delimited +/// stdout. Non-JSON lines and notifications (no `id`) are skipped, so a chatty +/// server that logs to stdout does not break the parse. +fn find_response(stdout: &str, id: i64) -> Option { + stdout + .lines() + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() { + return None; + } + serde_json::from_str::(line).ok() + }) + .find(|v| v.get("id").and_then(Value::as_i64) == Some(id)) +} + +/// Outcome of parsing a downstream `tools/call` response: either the tool +/// result payload, or a human-readable error. +#[derive(Debug, PartialEq, Eq)] +pub enum CallOutcome { + /// The `result` object from the `tools/call` response. + Result(Value), + /// A JSON-RPC error or a malformed/absent response. + Error(String), +} + +/// Extract the `tools/call` result (id [`ID_CALL`]) from a downstream's +/// stdout. Pure, so the response handling is unit-tested without a process. +#[must_use] +pub fn parse_call_response(stdout: &str) -> CallOutcome { + let Some(response) = find_response(stdout, ID_CALL) else { + return CallOutcome::Error( + "downstream MCP returned no response to tools/call (handshake may have failed)".into(), + ); + }; + if let Some(error) = response.get("error") { + let message = error + .get("message") + .and_then(Value::as_str) + .unwrap_or("downstream tool error"); + return CallOutcome::Error(format!("downstream MCP error: {message}")); + } + match response.get("result") { + Some(result) => CallOutcome::Result(result.clone()), + None => { + CallOutcome::Error("downstream MCP response carried neither result nor error".into()) + } + } +} + +/// Render a tool result as a human-readable report body. MCP `tools/call` +/// results carry a `content` array of typed blocks; we surface text blocks and +/// fall back to compact JSON for anything else. +fn render_result(result: &Value) -> String { + if let Some(content) = result.get("content").and_then(Value::as_array) { + let mut chunks = Vec::new(); + for block in content { + match block.get("type").and_then(Value::as_str) { + Some("text") => { + if let Some(text) = block.get("text").and_then(Value::as_str) { + chunks.push(text.to_string()); + } + } + _ => chunks.push(block.to_string()), + } + } + if !chunks.is_empty() { + return chunks.join("\n"); + } + } + result.to_string() +} + +/// The (tilde-expanded) command a registered server will spawn — used for BOTH +/// the declared grant and the spawn request, so the leash compares like with +/// like. Server-name → command requires the store, so this only resolves the +/// `command` param a config step may declare inline (see the security model). +fn declared_command(spec: &StepSpec) -> Option { + spec.param_str("command") + .map(|c| expand_tilde(c).to_string_lossy().into_owned()) +} + +/// `mcp-query`: call `server.tool(args)` on a registered downstream MCP. +pub struct McpQuery; + +#[async_trait] +impl StepHandler for McpQuery { + fn type_name(&self) -> &'static str { + "mcp-query" + } + + fn description(&self) -> &'static str { + "Call a tool on a registered downstream MCP server (leashed stdio \ + client) and embed only its result" + } + + fn data_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "required": ["server", "tool", "state"], + "properties": { + "server": { "type": "string" }, + "tool": { "type": "string" }, + "state": { "type": "string", + "enum": ["ok", "denied", "error", "skipped"] }, + "result": { "description": "the downstream tool result (state=ok); \ + never contains credentials" }, + "detail": { "type": "string", "description": "error/denial text" } + } + }) + } + + fn required_programs(&self, spec: &StepSpec) -> Vec { + // A config step may declare `command` inline so the downstream server's + // program joins the declared-default exec grant. A server selected only + // by `server` name (resolved from the store at run time) is NOT + // auto-granted here — the store must not silently widen the leash; such + // a server needs an explicit `[caveats] exec`. See the security model. + declared_command(spec).into_iter().collect() + } + + async fn run(&self, spec: &StepSpec, cx: &RunContext) -> StepResult { + let Some(server_name) = spec.param_str("server") else { + return StepResult::fail( + &spec.name, + &spec.step_type, + "missing required param `server` (a registered MCP server name)", + ); + }; + let Some(tool) = spec.param_str("tool") else { + return StepResult::fail(&spec.name, &spec.step_type, "missing required param `tool`"); + }; + let arguments = spec + .params + .get("arguments") + .and_then(|v| serde_json::to_value(v).ok()) + .unwrap_or_else(|| json!({})); + + let Some(store) = &cx.store else { + return StepResult::skip( + &spec.name, + &spec.step_type, + "agent state store unavailable — no MCP registry", + ); + }; + let server = match store.mcp_server(server_name) { + Ok(Some(server)) => server, + Ok(None) => { + return StepResult::fail( + &spec.name, + &spec.step_type, + format!("no registered MCP server named {server_name:?}"), + ) + } + Err(e) => return StepResult::fail(&spec.name, &spec.step_type, e.to_string()), + }; + + if cx.dry_run { + return StepResult::ok( + &spec.name, + &spec.step_type, + format!( + "[dry-run] would call {tool:?} on registered MCP {:?} (command {:?}, leashed)", + server.name, server.command + ), + ); + } + + self.query(spec, cx, &server, tool, &arguments).await + } +} + +impl McpQuery { + /// The leashed call against an already-resolved server record. + async fn query( + &self, + spec: &StepSpec, + cx: &RunContext, + server: &McpServer, + tool: &str, + arguments: &Value, + ) -> StepResult { + // Credentials are resolved by REFERENCE here, immediately before spawn, + // into unserializable Secrets — never read from or written to the store. + let env = match resolve_step_env(spec, &cx.exec).await { + Ok(env) => env, + Err((name, error)) => { + // The error names the env var, never the value. + return self.soft( + spec, + server, + tool, + "skipped", + format!("credential {name} unavailable: {error}"), + true, + ); + } + }; + + let stdin = build_stdin(tool, arguments); + // The leash check (`check_exec(server.command)`) happens inside spawn, + // BEFORE any process exists. An ungranted command is Denied here. + let request = + ExecRequest::new(expand_tilde(&server.command).to_string_lossy().into_owned()) + .args(server.args.clone()) + .env(env) + .stdin(stdin) + .timeout(Duration::from_secs(spec.timeout)); + + let out = match cx.exec.spawn(request).await { + Ok(out) => out, + Err(crate::exec::ExecError::Denied(reason)) => { + // Out-of-grant downstream command: denied, not run. This is the + // proxy's exec-leash enforcement point. + return self.soft(spec, server, tool, "denied", reason, false); + } + Err(e) => return self.soft(spec, server, tool, "error", e.to_string(), false), + }; + + if out.timed_out { + return self.soft( + spec, + server, + tool, + "error", + format!("downstream MCP {:?} timed out", server.name), + false, + ); + } + + match parse_call_response(&out.stdout) { + CallOutcome::Result(result) => { + let body = render_result(&result); + let mut step = StepResult::ok(&spec.name, &spec.step_type, body); + step.data = Some(json!({ + "server": server.name, + "tool": tool, + "state": "ok", + "result": result, + })); + step + } + CallOutcome::Error(detail) => { + // A downstream error is data, not a dead routine. stderr is NOT + // surfaced (it could carry credential echoes); only the parsed + // JSON-RPC error message is reported. + self.soft(spec, server, tool, "error", detail, false) + } + } + } + + /// A soft outcome (skip or fail) carrying typed `data` but no credentials. + fn soft( + &self, + spec: &StepSpec, + server: &McpServer, + tool: &str, + state: &str, + detail: String, + skipped: bool, + ) -> StepResult { + let mut step = if skipped { + StepResult::skip(&spec.name, &spec.step_type, detail.clone()) + } else { + StepResult::fail(&spec.name, &spec.step_type, detail.clone()) + }; + step.data = Some(json!({ + "server": server.name, + "tool": tool, + "state": state, + "detail": detail, + })); + step + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use agent_bridle_core::{Caveats, Scope}; + + use super::*; + use crate::config::Config; + use crate::exec::test_support::{gate_with, MockSpawner}; + use crate::exec::ExecOutput; + use crate::store::Store; + + /// A `tools/call` success response a well-behaved server emits. + fn ok_response() -> String { + let init = + json!({"jsonrpc":"2.0","id":ID_INIT,"result":{"protocolVersion":PROTOCOL_VERSION}}); + let call = json!({ + "jsonrpc":"2.0","id":ID_CALL, + "result":{"content":[{"type":"text","text":"42 open issues"}]} + }); + format!("{init}\n{call}\n") + } + + fn cx_with( + store: Option>, + outputs: Vec, + allow: &[&str], + ) -> (RunContext, Arc) { + let spawner = Arc::new(MockSpawner::with_outputs(outputs)); + let granted = Caveats { + exec: Scope::only(allow.iter().map(ToString::to_string)), + ..Caveats::top() + }; + ( + RunContext { + config: Arc::new(Config::default()), + dry_run: false, + generation: 9, + exec: gate_with(&granted, spawner.clone()), + prior: Vec::new(), + store, + }, + spawner, + ) + } + + fn store_with_server(command: &str) -> Arc { + let store = Arc::new(Store::in_memory().unwrap()); + store + .mcp_register("gh", command, &["serve".into()], "github", 1) + .unwrap(); + store + } + + fn spec(toml_text: &str) -> StepSpec { + toml::from_str(toml_text).unwrap() + } + + // ── pure wire-shape tests (no process) ───────────────────────────── + + #[test] + fn build_stdin_carries_handshake_then_call() { + let stdin = build_stdin("issues_list", &json!({"repo": "x"})); + let lines: Vec<&str> = stdin.lines().collect(); + assert_eq!(lines.len(), 3); + let init: Value = serde_json::from_str(lines[0]).unwrap(); + assert_eq!(init["method"], "initialize"); + assert_eq!(init["id"], ID_INIT); + let note: Value = serde_json::from_str(lines[1]).unwrap(); + assert_eq!(note["method"], "notifications/initialized"); + assert!(note.get("id").is_none(), "notifications carry no id"); + let call: Value = serde_json::from_str(lines[2]).unwrap(); + assert_eq!(call["method"], "tools/call"); + assert_eq!(call["params"]["name"], "issues_list"); + assert_eq!(call["params"]["arguments"]["repo"], "x"); + } + + #[test] + fn parse_call_response_extracts_result() { + match parse_call_response(&ok_response()) { + CallOutcome::Result(r) => { + assert_eq!(r["content"][0]["text"], "42 open issues"); + } + other => panic!("expected result, got {other:?}"), + } + } + + #[test] + fn parse_call_response_surfaces_jsonrpc_error() { + let stdout = format!( + "{}\n", + json!({"jsonrpc":"2.0","id":ID_CALL,"error":{"code":-32602,"message":"bad args"}}) + ); + match parse_call_response(&stdout) { + CallOutcome::Error(e) => assert!(e.contains("bad args")), + other => panic!("expected error, got {other:?}"), + } + } + + #[test] + fn parse_call_response_handles_missing_and_chatty_lines() { + // No tools/call response at all (handshake failed). + assert!(matches!( + parse_call_response("not json\n{\"jsonrpc\":\"2.0\"}\n"), + CallOutcome::Error(_) + )); + // Chatty log lines before a valid response are ignored. + let stdout = format!("starting up...\n{}", ok_response()); + assert!(matches!( + parse_call_response(&stdout), + CallOutcome::Result(_) + )); + } + + #[test] + fn render_result_prefers_text_blocks() { + let r = json!({"content":[{"type":"text","text":"hello"},{"type":"text","text":"world"}]}); + assert_eq!(render_result(&r), "hello\nworld"); + // Non-content results fall back to JSON. + assert_eq!(render_result(&json!({"k":1})), "{\"k\":1}"); + } + + // ── leashed run tests (mocked spawner) ───────────────────────────── + + #[tokio::test] + async fn happy_path_embeds_only_the_tool_result() { + let store = store_with_server("gh-mcp"); + let (cx, spawner) = cx_with( + Some(store), + vec![MockSpawner::ok(&ok_response())], + &["gh-mcp"], + ); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"issues_list\""), + &cx, + ) + .await; + assert!(result.success, "{:?}", result.error); + assert_eq!(result.output, "42 open issues"); + assert_eq!(result.data.as_ref().unwrap()["state"], "ok"); + // The downstream command WAS the one spawned, with its static argv. + let calls = spawner.calls.lock().unwrap(); + assert_eq!(calls[0].0, "gh-mcp"); + assert_eq!(calls[0].1, vec!["serve".to_string()]); + } + + #[tokio::test] + async fn ungranted_downstream_command_is_denied_not_run() { + // The server's command is NOT in the exec grant. + let store = store_with_server("forbidden-mcp"); + let (cx, spawner) = cx_with( + Some(store), + vec![MockSpawner::ok(&ok_response())], + &["something-else"], + ); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\""), + &cx, + ) + .await; + assert!(!result.success); + assert_eq!(result.data.as_ref().unwrap()["state"], "denied"); + assert!( + spawner.calls.lock().unwrap().is_empty(), + "denial happens BEFORE any spawn" + ); + } + + #[tokio::test] + async fn unknown_server_fails_cleanly() { + let store = Arc::new(Store::in_memory().unwrap()); + let (cx, _) = cx_with(Some(store), vec![], &["whatever"]); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"ghost\"\ntool=\"x\""), + &cx, + ) + .await; + assert!(!result.success); + assert!(result.error.as_deref().unwrap().contains("ghost")); + } + + #[tokio::test] + async fn timed_out_downstream_is_soft_error_not_a_hung_routine() { + let store = store_with_server("gh-mcp"); + let timeout = ExecOutput { + timed_out: true, + ..Default::default() + }; + let (cx, _) = cx_with(Some(store), vec![timeout], &["gh-mcp"]); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\""), + &cx, + ) + .await; + assert!(!result.success); + assert_eq!(result.data.as_ref().unwrap()["state"], "error"); + assert!(result.error.as_deref().unwrap().contains("timed out")); + } + + #[tokio::test] + async fn missing_store_soft_skips() { + let (cx, _) = cx_with(None, vec![], &["gh-mcp"]); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\""), + &cx, + ) + .await; + assert!(result.skipped); + } + + #[tokio::test] + async fn missing_credential_soft_skips_naming_var_not_value() { + let store = store_with_server("gh-mcp"); + let (cx, spawner) = cx_with(Some(store), vec![], &["gh-mcp"]); + let result = McpQuery + .run( + &spec( + "name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\"\n\ + env = { GITHUB_TOKEN = { env = \"MODULEX_TEST_UNSET_VAR_XYZZY\" } }", + ), + &cx, + ) + .await; + assert!(result.skipped); + assert!(result.output.contains("GITHUB_TOKEN")); + assert!( + !result.output.contains("MODULEX_TEST_UNSET_VAR_XYZZY") + || result.output.contains("not set"), + "names the var, not a value" + ); + assert!(spawner.calls.lock().unwrap().is_empty(), "never spawned"); + } + + #[tokio::test] + async fn downstream_error_is_soft_data_not_dead_routine() { + let store = store_with_server("gh-mcp"); + let err = format!( + "{}\n", + json!({"jsonrpc":"2.0","id":ID_CALL,"error":{"message":"rate limited"}}) + ); + let (cx, _) = cx_with(Some(store), vec![MockSpawner::ok(&err)], &["gh-mcp"]); + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\""), + &cx, + ) + .await; + assert!(!result.success); + assert_eq!(result.data.as_ref().unwrap()["state"], "error"); + assert!(result.error.as_deref().unwrap().contains("rate limited")); + } + + #[tokio::test] + async fn resolved_credential_never_leaks_into_the_report() { + // The credential-proxy guarantee: a server's secret, resolved by + // reference at spawn time, must never appear in the report data, + // markdown, or error text — only the tool RESULT is embedded. + const SECRET: &str = "ghp_supersecrettoken"; + let store = store_with_server("gh-mcp"); + // First mock output answers the {cmd} credential resolution (stdout = + // the secret); the second answers the downstream tools/call. + let outputs = vec![MockSpawner::ok(SECRET), MockSpawner::ok(&ok_response())]; + // The credential command (`secret-tool`) and the server command must + // both be granted. + let (cx, _) = cx_with(Some(store), outputs, &["gh-mcp", "secret-tool"]); + let result = McpQuery + .run( + &spec( + "name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\"\n\ + env = { GITHUB_TOKEN = { cmd = \"secret-tool lookup gh\" } }", + ), + &cx, + ) + .await; + assert!(result.success, "{:?}", result.error); + // Serialize the whole step result — the only thing an agent sees. + let serialized = serde_json::to_string(&result).unwrap(); + assert!( + !serialized.contains(SECRET), + "credential value leaked into the report: {serialized}" + ); + assert!(!result.output.contains(SECRET)); + } + + #[tokio::test] + async fn dry_run_describes_without_spawning() { + let store = store_with_server("gh-mcp"); + let (mut cx, spawner) = cx_with(Some(store), vec![], &["gh-mcp"]); + cx.dry_run = true; + let result = McpQuery + .run( + &spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"issues_list\""), + &cx, + ) + .await; + assert!(result.output.contains("[dry-run]")); + assert!(result.output.contains("issues_list")); + assert!(spawner.calls.lock().unwrap().is_empty()); + } + + #[test] + fn required_programs_declares_inline_command_only() { + // Inline `command` joins the declared grant. + let with_cmd = + spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\"\ncommand=\"gh-mcp\""); + assert_eq!( + McpQuery.required_programs(&with_cmd), + vec!["gh-mcp".to_string()] + ); + // Server-only: the store can't silently widen the grant. + let server_only = spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\"\ntool=\"x\""); + assert!(McpQuery.required_programs(&server_only).is_empty()); + } + + #[tokio::test] + async fn missing_params_fail_cleanly() { + let store = store_with_server("gh-mcp"); + let (cx, _) = cx_with(Some(store), vec![], &["gh-mcp"]); + let no_server = McpQuery + .run(&spec("name=\"q\"\ntype=\"mcp-query\"\ntool=\"x\""), &cx) + .await; + assert!(no_server.error.as_deref().unwrap().contains("server")); + let no_tool = McpQuery + .run(&spec("name=\"q\"\ntype=\"mcp-query\"\nserver=\"gh\""), &cx) + .await; + assert!(no_tool.error.as_deref().unwrap().contains("tool")); + } +} diff --git a/crates/modulex-core/src/steps/mod.rs b/crates/modulex-core/src/steps/mod.rs index 6d788ce..cccc02d 100644 --- a/crates/modulex-core/src/steps/mod.rs +++ b/crates/modulex-core/src/steps/mod.rs @@ -22,6 +22,7 @@ //! | `board-ingest` | [`board_ingest`] | gh (issues → store cards) | //! | `python` | [`python`] | the configured interpreter (plugin protocol) | //! | `reminders` | [`reminders`] | — (agent state store) | +//! | `mcp-query` | [`mcp_query`] | a registered downstream MCP (leashed stdio client) | //! | `url-watch` | [`web`] | — (leashed in-proc fetch; feature `web`) | use std::sync::Arc; @@ -34,6 +35,7 @@ pub mod dates; pub mod git; pub mod github; pub mod gitlab; +pub mod mcp_query; pub mod project; pub mod python; pub mod reminders; @@ -65,6 +67,7 @@ pub fn builtin_registry() -> StepRegistry { registry.register(Arc::new(board_ingest::BoardIngest)); registry.register(Arc::new(python::PythonPlugin)); registry.register(Arc::new(reminders::Reminders)); + registry.register(Arc::new(mcp_query::McpQuery)); #[cfg(feature = "web")] registry.register(Arc::new(web::UrlWatch::new())); registry @@ -98,6 +101,7 @@ mod tests { "board-ingest", "python", "reminders", + "mcp-query", ] { assert!(names.iter().any(|n| n == expected), "missing {expected}"); } diff --git a/crates/modulex-core/src/store.rs b/crates/modulex-core/src/store.rs index 4da1be3..e3f714a 100644 --- a/crates/modulex-core/src/store.rs +++ b/crates/modulex-core/src/store.rs @@ -176,6 +176,28 @@ pub struct StoredCountdown { pub retired_gen: Option, } +/// A downstream MCP server registered behind modulex (issue #7, PR D). +/// +/// The registry holds only the *invocation shape* — the program to spawn and +/// its static argv. Credentials are **never** stored here: the spawning step +/// resolves credential references at run time (the [`crate::credentials::Secret`] +/// model), so a server's secrets never touch this table, an export, or a +/// report. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct McpServer { + /// Stable registry name (the primary key; how a step selects this server). + pub name: String, + /// The program to spawn as a stdio MCP server (e.g. `npx`, `uvx`, a path). + /// This is what the exec leash checks before the process exists. + pub command: String, + /// Static arguments passed to `command`, in order. + pub args: Vec, + /// Free-text note (what this server is for). + pub note: String, + /// Engine generation when registered (a counter, never wall-clock). + pub created_gen: u64, +} + /// A URL registered for periodic change tracking. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Watch { @@ -318,6 +340,11 @@ pub struct StoreDump { pub countdowns: Vec, /// All watches. pub watches: Vec, + /// All registered downstream MCP servers (invocation shapes only — no + /// credentials). `#[serde(default)]` keeps dumps that predate the proxy + /// substrate importable. + #[serde(default)] + pub mcp_servers: Vec, /// All board cards (open and closed). `#[serde(default)]` keeps v1 dumps /// (which predate cards) importable. #[serde(default)] @@ -583,6 +610,76 @@ impl Store { Ok(()) } + // ── mcp servers (downstream MCPs behind modulex; issue #7 PR D) ───── + + /// Register (or replace, by `name`) a downstream MCP server. Stores ONLY + /// the invocation shape — command + static argv. Credential references are + /// resolved by the calling step at spawn time and never persisted here. + /// + /// # Errors + /// [`StoreError`] on SQLite failure. + pub fn mcp_register( + &self, + name: &str, + command: &str, + args: &[String], + note: &str, + generation: u64, + ) -> Result<(), StoreError> { + let args_json = serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string()); + let conn = self.conn.lock().expect("store lock poisoned"); + conn.execute( + "INSERT INTO mcp_servers (name, command, args_json, note, created_gen) + VALUES (?1, ?2, ?3, ?4, ?5) + ON CONFLICT(name) DO UPDATE SET + command = excluded.command, args_json = excluded.args_json, + note = excluded.note, created_gen = excluded.created_gen", + params![name, command, args_json, note, generation], + )?; + Ok(()) + } + + /// All registered downstream MCP servers, by name. + /// + /// # Errors + /// [`StoreError`] on SQLite failure. + pub fn mcp_servers(&self) -> Result, StoreError> { + let conn = self.conn.lock().expect("store lock poisoned"); + let mut stmt = conn.prepare( + "SELECT name, command, args_json, note, created_gen FROM mcp_servers ORDER BY name", + )?; + let rows = stmt + .query_map([], row_to_mcp_server)? + .collect::, _>>()?; + Ok(rows) + } + + /// One registered server by name, or `None`. + /// + /// # Errors + /// [`StoreError`] on SQLite failure. + pub fn mcp_server(&self, name: &str) -> Result, StoreError> { + let conn = self.conn.lock().expect("store lock poisoned"); + let server = conn + .query_row( + "SELECT name, command, args_json, note, created_gen + FROM mcp_servers WHERE name = ?1", + params![name], + row_to_mcp_server, + ) + .optional()?; + Ok(server) + } + + /// Remove a registered server by name. Returns false when not found. + /// + /// # Errors + /// [`StoreError`] on SQLite failure. + pub fn mcp_unregister(&self, name: &str) -> Result { + let conn = self.conn.lock().expect("store lock poisoned"); + Ok(conn.execute("DELETE FROM mcp_servers WHERE name = ?1", params![name])? > 0) + } + // ── cards (knowledge board) ──────────────────────────────────────── /// Insert or update a card (upsert on `card_id`); returns its rowid. @@ -902,6 +999,7 @@ impl Store { .collect::, _>>()?; rows }, + mcp_servers: self.mcp_servers()?, cards: self.cards_query(None, None, None)?, }; Ok(serde_json::to_string_pretty(&dump).unwrap_or_else(|_| "{}".to_string())) @@ -944,6 +1042,9 @@ impl Store { self.watch_seen(id, hash, gen)?; } } + for s in &dump.mcp_servers { + self.mcp_register(&s.name, &s.command, &s.args, &s.note, s.created_gen)?; + } for c in &dump.cards { self.card_add(&card_input_from(c), c.created_gen)?; } @@ -1164,6 +1265,19 @@ fn row_to_watch(row: &rusqlite::Row<'_>) -> rusqlite::Result { /// Map a `cards` row to a [`Card`]. Column order must match [`CARD_COLS`]. /// `refs` is left empty — callers fill it via [`load_refs`]. +fn row_to_mcp_server(row: &rusqlite::Row<'_>) -> rusqlite::Result { + let args_json: String = row.get(2)?; + Ok(McpServer { + name: row.get(0)?, + command: row.get(1)?, + // A malformed args_json degrades to an empty argv rather than failing + // the read — the registry stays usable. + args: serde_json::from_str(&args_json).unwrap_or_default(), + note: row.get(3)?, + created_gen: row.get(4)?, + }) +} + fn row_to_card(row: &rusqlite::Row<'_>) -> rusqlite::Result { Ok(Card { id: row.get(0)?, @@ -1242,6 +1356,68 @@ mod tests { assert!(store.watches().unwrap().is_empty()); } + #[test] + fn mcp_server_lifecycle_register_list_get_unregister() { + let store = Store::in_memory().unwrap(); + store + .mcp_register( + "gh", + "npx", + &["-y".into(), "@modelcontextprotocol/server-github".into()], + "enterprise github", + 4, + ) + .unwrap(); + let servers = store.mcp_servers().unwrap(); + assert_eq!(servers.len(), 1); + assert_eq!(servers[0].name, "gh"); + assert_eq!(servers[0].command, "npx"); + assert_eq!( + servers[0].args, + vec!["-y", "@modelcontextprotocol/server-github"] + ); + assert_eq!(servers[0].created_gen, 4); + + let one = store.mcp_server("gh").unwrap().unwrap(); + assert_eq!(one.command, "npx"); + assert!(store.mcp_server("nope").unwrap().is_none()); + + // Re-register by the same name replaces (upsert), never duplicates. + store + .mcp_register("gh", "uvx", &["mcp-github".into()], "swapped", 5) + .unwrap(); + let servers = store.mcp_servers().unwrap(); + assert_eq!(servers.len(), 1, "same name upserts to one row"); + assert_eq!(servers[0].command, "uvx"); + assert_eq!(servers[0].created_gen, 5); + + assert!(store.mcp_unregister("gh").unwrap()); + assert!(!store.mcp_unregister("gh").unwrap(), "already gone"); + assert!(store.mcp_servers().unwrap().is_empty()); + } + + #[test] + fn mcp_servers_export_import_round_trips() { + let a = Store::in_memory().unwrap(); + a.mcp_register("gh", "npx", &["-y".into(), "server-github".into()], "gh", 1) + .unwrap(); + a.mcp_register("jira", "uvx", &["mcp-jira".into()], "", 2) + .unwrap(); + + let json = a.export_json().unwrap(); + assert!(json.contains("server-github"), "export carries the argv"); + // The export carries invocation shape only — never a credential. + assert!(!json.contains("token"), "no credential material in export"); + + let b = Store::in_memory().unwrap(); + b.import_json(&json).unwrap(); + let servers = b.mcp_servers().unwrap(); + assert_eq!(servers.len(), 2); + assert_eq!(servers[0].name, "gh"); + assert_eq!(servers[0].args, vec!["-y", "server-github"]); + assert_eq!(servers[1].name, "jira"); + } + #[test] fn generation_persists() { let store = Store::in_memory().unwrap(); diff --git a/crates/modulex-core/tests/golden/mcp-query.json b/crates/modulex-core/tests/golden/mcp-query.json new file mode 100644 index 0000000..7db7195 --- /dev/null +++ b/crates/modulex-core/tests/golden/mcp-query.json @@ -0,0 +1,32 @@ +{ + "properties": { + "detail": { + "description": "error/denial text", + "type": "string" + }, + "result": { + "description": "the downstream tool result (state=ok); never contains credentials" + }, + "server": { + "type": "string" + }, + "state": { + "enum": [ + "ok", + "denied", + "error", + "skipped" + ], + "type": "string" + }, + "tool": { + "type": "string" + } + }, + "required": [ + "server", + "tool", + "state" + ], + "type": "object" +} diff --git a/crates/modulex-mcp/src/server.rs b/crates/modulex-mcp/src/server.rs index 4a660f5..6f57b40 100644 --- a/crates/modulex-mcp/src/server.rs +++ b/crates/modulex-mcp/src/server.rs @@ -431,6 +431,110 @@ type = "reminders" } } + #[tokio::test] + async fn mcp_register_round_trips_via_invoke() { + // The mcp (credential-proxy) facet is opt-in (unlisted by default) but + // callable through tool_invoke — register -> list -> remove a + // downstream server. The registry stores the INVOCATION SHAPE only. + let s = server(vec![]); + + let add = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 110, "method": "tools/call", + "params": { "name": "tool_invoke", "arguments": { + "name": "mcp_register", + "arguments": { + "action": "add", "name": "gh", + "command": "gh-mcp", "args": ["serve"], + "note": "enterprise github" + } + } } + })) + .await + .unwrap(); + assert!(add["result"].get("isError").is_none(), "{add}"); + let body: Value = + serde_json::from_str(add["result"]["content"][0]["text"].as_str().unwrap()).unwrap(); + assert_eq!(body["name"], "gh"); + assert_eq!(body["created_gen"], 0); + + // list returns the registered server (and no credential material). + let list = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 111, "method": "tools/call", + "params": { "name": "tool_invoke", "arguments": { + "name": "mcp_register", "arguments": { "action": "list" } + } } + })) + .await + .unwrap(); + let listing = list["result"]["content"][0]["text"].as_str().unwrap(); + let servers: Value = serde_json::from_str(listing).unwrap(); + assert_eq!(servers.as_array().unwrap().len(), 1); + assert_eq!(servers[0]["command"], "gh-mcp"); + assert_eq!(servers[0]["args"][0], "serve"); + assert!(!listing.contains("token"), "no credential material listed"); + + // remove unregisters it. + let remove = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 112, "method": "tools/call", + "params": { "name": "tool_invoke", "arguments": { + "name": "mcp_register", + "arguments": { "action": "remove", "name": "gh" } + } } + })) + .await + .unwrap(); + assert!(remove["result"].get("isError").is_none(), "{remove}"); + + // add without required fields is a tool error. + let bad = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 113, "method": "tools/call", + "params": { "name": "tool_invoke", "arguments": { + "name": "mcp_register", "arguments": { "action": "add", "name": "x" } + } } + })) + .await + .unwrap(); + assert_eq!(bad["result"]["isError"], true); + } + + #[tokio::test] + async fn mcp_register_is_unlisted_but_discoverable() { + // Listing is context cost, not capability: mcp_register is in the + // non-default `mcp` facet — absent from tools/list, but tool_search + // finds it and tool_describe discloses its schema. + let s = server(vec![]); + let resp = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 120, "method": "tools/call", + "params": { "name": "tool_search", "arguments": { "query": "downstream mcp" } } + })) + .await + .unwrap(); + let found: Value = + serde_json::from_str(resp["result"]["content"][0]["text"].as_str().unwrap()).unwrap(); + assert!(found["tools"] + .as_array() + .unwrap() + .iter() + .any(|t| t["name"] == "mcp_register" && t["facet"] == "mcp")); + + let resp = s + .handle(&json!({ + "jsonrpc": "2.0", "id": 121, "method": "tools/call", + "params": { "name": "tool_describe", "arguments": { "name": "mcp_register" } } + })) + .await + .unwrap(); + let spec: Value = + serde_json::from_str(resp["result"]["content"][0]["text"].as_str().unwrap()).unwrap(); + assert_eq!(spec["facet"], "mcp"); + assert_eq!(spec["mutates"], true); + } + #[tokio::test] async fn routine_eval_runs_inline_steps_under_the_same_leash() { let s = server(vec![ok_out("inline says hi\n")]); diff --git a/crates/modulex-mcp/src/tools.rs b/crates/modulex-mcp/src/tools.rs index f9d125e..310ac18 100644 --- a/crates/modulex-mcp/src/tools.rs +++ b/crates/modulex-mcp/src/tools.rs @@ -621,6 +621,51 @@ fn h_board_close<'a>(cx: &'a CallCtx<'a>, args: &'a Value) -> ToolFuture<'a> { }) } +// ── mcp registry (downstream MCPs behind modulex; opt-in `mcp` facet) ── + +fn h_mcp_register<'a>(cx: &'a CallCtx<'a>, args: &'a Value) -> ToolFuture<'a> { + Box::pin(async move { + let store = match store_of(cx.engine) { + Ok(store) => store, + Err(fault) => return fault, + }; + let action = args.get("action").and_then(Value::as_str).unwrap_or("add"); + match action { + "list" => store_outcome(store.mcp_servers()), + "remove" => { + let Some(name) = args.get("name").and_then(Value::as_str) else { + return ToolOutcome::err("mcp_register remove requires `name`"); + }; + match store.mcp_unregister(name) { + Ok(true) => ToolOutcome::ok(format!("mcp server {name:?} unregistered")), + Ok(false) => ToolOutcome::err(format!("no registered mcp server {name:?}")), + Err(e) => ToolOutcome::err(e.to_string()), + } + } + "add" => { + let (Some(name), Some(command)) = ( + args.get("name").and_then(Value::as_str), + args.get("command").and_then(Value::as_str), + ) else { + return ToolOutcome::err("mcp_register add requires `name` and `command`"); + }; + let server_args = str_list(args, "args"); + let note = args.get("note").and_then(Value::as_str).unwrap_or(""); + // Mutation stamp: the generation current at call time — a counter. + let generation = cx.engine.current_generation(); + store_outcome( + store + .mcp_register(name, command, &server_args, note, generation) + .map(|()| json!({ "name": name, "created_gen": generation })), + ) + } + other => ToolOutcome::err(format!( + "mcp_register: unknown action {other:?} (add | list | remove)" + )), + } + }) +} + // ── discovery trio (the constant-size long tail) ─────────────────────── fn h_tool_search<'a>(cx: &'a CallCtx<'a>, args: &'a Value) -> ToolFuture<'a> { @@ -1199,6 +1244,41 @@ fn build_registry() -> ToolRegistry { }, handler: h_board_close, }, + // ── mcp facet: opt-in, NOT in the default index (budget stays 12). + // Reachable by default via tool_search/tool_invoke and the `mcp-query` + // step — at zero tools/list cost. This is the credential-proxy + // registry: a hotseat agent points at modulex, never at the + // downstream's credentials. + ToolEntry { + spec: ToolSpec { + name: "mcp_register", + description: "Manage the registry of downstream MCP servers hidden \ + behind modulex (issue #7 proxy). action=add {name, command, \ + args?, note?} registers a server (stores the INVOCATION SHAPE \ + only — never credentials; the mcp-query step injects credential \ + references at spawn time and the command is exec-leashed) | \ + list returns all servers | remove {name} unregisters. Returns \ + {name, created_gen} on add.", + input_schema: json!({ + "type": "object", + "properties": { + "action": { "type": "string", + "enum": ["add", "list", "remove"], "default": "add" }, + "name": { "type": "string", + "description": "registry name (add/remove)" }, + "command": { "type": "string", + "description": "program to spawn as a stdio MCP \ + server; must be in the exec grant" }, + "args": { "type": "array", "items": { "type": "string" }, + "description": "static argv for the command" }, + "note": { "type": "string" } + } + }), + mutates: true, + facet: "mcp", + }, + handler: h_mcp_register, + }, ]; ToolRegistry { entries } } @@ -1238,6 +1318,7 @@ mod tests { ("board_query", false, "board"), ("board_move", true, "board"), ("board_close", true, "board"), + ("mcp_register", true, "mcp"), ]; let actual: Vec<(&str, bool, &str)> = registry() .specs() @@ -1322,6 +1403,40 @@ mod tests { ); } + /// The mcp (credential-proxy) facet is OPT-IN: absent from the default + /// index (the budget stays 12), but listed when exposed and always + /// discoverable via tool_search/tool_invoke. + #[test] + fn mcp_facet_is_opt_in_but_discoverable() { + use modulex_core::config::McpConfig; + + let default = crate::facets::FacetPolicy::resolve(None, &McpConfig::default()); + assert!( + !registry() + .specs() + .filter(|s| default.exposes(s.facet)) + .any(|s| s.name == "mcp_register"), + "mcp_register must not appear in the default index" + ); + + let opted = + crate::facets::FacetPolicy::resolve(Some("core,store,mcp"), &McpConfig::default()); + assert!( + registry() + .specs() + .any(|s| s.name == "mcp_register" && opted.exposes(s.facet)), + "opting into `mcp` lists mcp_register" + ); + + // Discoverable (callable) by default even though it is not listed. + assert!( + registry() + .specs() + .any(|s| s.name == "mcp_register" && !default.denies(s.facet)), + "mcp_register is reachable via tool_search/tool_invoke by default" + ); + } + #[test] fn tool_names_are_unique() { let mut names: Vec<&str> = registry().specs().map(|s| s.name).collect(); diff --git a/justfile b/justfile index 21c91b6..24b90a6 100644 --- a/justfile +++ b/justfile @@ -28,6 +28,21 @@ fmt-check: live-test: MODULEX_LIVE_TESTS=1 cargo test -p modulex-core --test live_contract -- --nocapture +# Local coverage with an HTML report (opens target/llvm-cov/html/index.html). +# Excludes binary/pyo3 entrypoints (integration-tested, not unit-tested) so +# the number reflects the library logic the tests actually cover. +cov: + cargo llvm-cov --all-features --workspace \ + --ignore-filename-regex '(main\.rs|modulex-py/src/lib\.rs)' --html + @echo "report: target/llvm-cov/html/index.html" + +# CI-mode coverage gate: same scope as `cov`, fails under the floor. Mirrors +# the `coverage` job in .github/workflows/ci.yml — keep the floor in sync. +cov-ci: + cargo llvm-cov --all-features --workspace \ + --ignore-filename-regex '(main\.rs|modulex-py/src/lib\.rs)' \ + --fail-under-lines 80 + # Run the CLI against the example config (dry run; no side effects). demo: MODULEX_CONFIG=modulex.toml.example cargo run -p modulex-cli -- run morning --dry-run diff --git a/modulex.toml.example b/modulex.toml.example index fb2ad60..7ab7568 100644 --- a/modulex.toml.example +++ b/modulex.toml.example @@ -50,9 +50,11 @@ display = "{label}: work day {n} of {total}" # (reminder_add, watch_add, ...) live in the "store-classic" facet: # unlisted but discoverable via tool_search and callable via tool_invoke. # $MODULEX_TOOLS="core,store,store-classic" overrides; `deny` kills a facet -# entirely (unlisted, undiscoverable, uncallable). +# entirely (unlisted, undiscoverable, uncallable). The "board" and "mcp" +# (downstream-MCP registry, mcp_register) facets are also opt-in: unlisted by +# default, discoverable via tool_search and callable via tool_invoke. # [mcp] -# expose = ["core", "store", "store-classic"] +# expose = ["core", "store", "store-classic", "mcp"] # deny = [] # Optional explicit leash (tier 2; $MODULEX_CAVEATS JSON is tier 1): @@ -97,6 +99,22 @@ type = "reminders" name = "watched pages" type = "url-watch" # leashed in-proc fetch: net Caveats axis + SSRF screening +# --- Phase: downstream MCP proxy (issue #7) --- +# Register a downstream MCP server once (mcp_register tool or `store import`): +# mcp_register{action:"add", name:"gh", command:"gh-mcp", args:["serve"]} +# then call its tools through modulex — the agent never sees the credentials. +# The downstream command must be in the exec grant (declare `command` inline +# below so it joins the declared-default grant, or list it in [caveats] exec). +# Credentials are REFERENCES resolved at spawn time, never inlined or stored. +# [[routines.morning.steps]] +# name = "open issues" +# type = "mcp-query" +# server = "gh" # a name registered via mcp_register +# tool = "list_issues" # the downstream tool to call +# command = "gh-mcp" # declare for the grant (store can't widen it) +# arguments = { repo = "owner/name", state = "open" } +# env = { GITHUB_TOKEN = { cmd = "secret-tool lookup gh-token" } } + # --- Phase: deadlines & countdowns (config + store, merged) --- [[routines.morning.steps]] From d2bb58cb2748475ac7b2ddb226c6c05b39c1c5d9 Mon Sep 17 00:00:00 2001 From: Shawn Hartsock Date: Fri, 12 Jun 2026 22:00:25 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix(security):=20#58=20=E2=80=94=20failing?= =?UTF-8?q?=20{cmd}=20secret=20helper=20must=20not=20surface=20its=20stder?= =?UTF-8?q?r/stdout=20body?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adversarial review (DO-NOT-MERGE-AS-IS) found a real credential leak: a {cmd} credential helper (gh auth token, vault, pass, custom wrappers) that exits non-zero routinely echoes the very token it tried — on stdout AND stderr. credentials.rs surfaced the trimmed stderr verbatim into CredentialError::CommandFailed, which mcp_query embeds into the agent-visible step result. The exec-gate scrub can't catch it: during a credential's own resolution the secret is the command's OUTPUT, not in the child env, so the scrub loop is empty. Fix is in the SHARED resolver, so it closes the leak for all seven call sites at once (project/gitlab/script/python/github/board_ingest + the new mcp-query). Surface only the exit status, never the stream body. Adds the adversarial regression (failing gh-auth helper echoing a token → token absent from the error, exit status still reported). Everything else in #58 held under review: the exec leash (argv exec, no shell — injection-proof), store-can't-widen-the-grant, the result-echo scrub, Secret unserializability, the ≤12 tool budget, generation stamps. Co-Authored-By: Claude Fable 5 --- crates/modulex-core/src/credentials.rs | 58 ++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/crates/modulex-core/src/credentials.rs b/crates/modulex-core/src/credentials.rs index d48ce0a..ef4c295 100644 --- a/crates/modulex-core/src/credentials.rs +++ b/crates/modulex-core/src/credentials.rs @@ -143,9 +143,17 @@ impl CredentialRef { if !out.success() { return Err(CredentialError::CommandFailed( cmd.clone(), - // stderr may carry diagnostics; stdout may carry the - // secret — only stderr is safe to surface. - out.stderr.trim().to_string(), + // NEITHER stream is safe to surface: a failing secret + // helper (`gh auth token`, `vault`, `pass`, custom + // wrappers) routinely echoes the very token it tried on + // BOTH stdout and stderr. This error is embedded into + // agent-visible step results (the credential-proxy + // premise is "the agent never sees a credential"), so + // we surface only the exit status, never the body. + match out.status { + Some(code) => format!("exited with status {code}"), + None => "did not exit normally".to_string(), + }, )); } Ok(Secret::new(out.stdout.trim().to_string())) @@ -219,6 +227,50 @@ mod tests { assert!(denied.to_string().contains("vault")); } + /// Adversarial regression (review #58): a failing `{cmd}` secret helper + /// routinely echoes the token it tried on its OWN stderr/stdout. That body + /// must NEVER reach the `CredentialError` — it is embedded into + /// agent-visible step results, the surface the credential proxy exists to + /// keep credential-free. Only the exit status is surfaced. + #[tokio::test] + async fn failing_cmd_helper_never_surfaces_its_stderr_body() { + use std::sync::Arc; + + use agent_bridle_core::{Caveats, Scope}; + + use crate::exec::test_support::{gate_with, MockSpawner}; + + // The helper exits non-zero and prints the token it was resolving. + let leaked = "ghp_SECRET_ON_FAILURE_STDERR"; + let spawner = Arc::new(MockSpawner::with_outputs(vec![MockSpawner::fail( + &format!("error: token {leaked} is expired, re-auth required"), + 1, + )])); + let granted = Caveats { + exec: Scope::only(["gh".to_string()]), + ..Caveats::top() + }; + let gate = gate_with(&granted, spawner); + + let err = CredentialRef::Cmd { + cmd: "gh auth token".into(), + } + .resolve(&gate) + .await + .unwrap_err(); + + let rendered = err.to_string(); + assert!( + !rendered.contains(leaked), + "credential helper stderr leaked into the error: {rendered}" + ); + // It still tells the operator what happened — just the status, no body. + assert!( + rendered.contains("status 1"), + "the failure should surface the exit status: {rendered}" + ); + } + // The "Secret is not serializable" guarantee is enforced by the // `compile_fail` doctest on the `Secret` type itself. }