diff --git a/crates/octos-agent/src/agent/execution.rs b/crates/octos-agent/src/agent/execution.rs index c640f50d9c..da54cea8ea 100644 --- a/crates/octos-agent/src/agent/execution.rs +++ b/crates/octos-agent/src/agent/execution.rs @@ -23,11 +23,14 @@ //! tools still serializes the whole batch. An optimised "run Safe in parallel, //! then Exclusive in order" pipeline is explicitly deferred (see the M8.8 spec). +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::sync::Arc; use std::time::{Duration, Instant}; use eyre::Result; use octos_core::{Message, MessageRole, TokenUsage}; -use octos_llm::ChatResponse; +use octos_llm::{ChatConfig, ChatResponse, LlmProvider}; use tokio::task::JoinHandle; use tracing::{debug, info, warn}; @@ -105,6 +108,263 @@ fn build_spawn_only_produced_files_message( Some(out) } +const SPAWN_ONLY_SUMMARY_MIN_BYTES: u64 = 4 * 1024; +const SPAWN_ONLY_SUMMARY_MAX_INPUT_BYTES: u64 = 24 * 1024; +const SPAWN_ONLY_SUMMARY_MAX_OUTPUT_TOKENS: u32 = 300; + +struct SpawnOnlySummarySource { + prompt_body: String, + display_paths: Vec, +} + +fn is_text_summary_candidate(path: &Path) -> bool { + matches!( + path.extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.to_ascii_lowercase()), + Some(ext) + if matches!( + ext.as_str(), + "md" | "markdown" + | "txt" + | "json" + | "jsonl" + | "csv" + | "tsv" + | "html" + | "xml" + | "toml" + | "yaml" + | "yml" + | "log" + ) + ) +} + +fn resolve_spawn_only_summary_path(file: &str, workspace_root: &Path) -> Option { + use octos_bus::file_handle::{ToolPathScope, resolve_tool_path}; + + match resolve_tool_path(workspace_root, None, file) { + Ok(resolved) if resolved.scope == ToolPathScope::Workspace => Some(resolved.absolute), + Ok(resolved) => { + warn!( + file = %file, + scope = ?resolved.scope, + "skipping spawn_only auto-summary file outside workspace" + ); + None + } + Err(error) => { + warn!( + file = %file, + error = ?error, + "skipping spawn_only auto-summary file that failed workspace resolution" + ); + None + } + } +} + +fn reject_spawn_only_summary_symlinked_ancestors( + workspace_root: &Path, + resolved: &Path, +) -> std::io::Result<()> { + let canonical_root = + std::fs::canonicalize(workspace_root).unwrap_or_else(|_| workspace_root.to_path_buf()); + let lexical_root = workspace_root.to_path_buf(); + let (mut current, suffix) = if let Ok(suffix) = resolved.strip_prefix(&lexical_root) { + (lexical_root, suffix.to_path_buf()) + } else if let Ok(suffix) = resolved.strip_prefix(&canonical_root) { + (canonical_root, suffix.to_path_buf()) + } else { + return Err(std::io::Error::new( + std::io::ErrorKind::PermissionDenied, + format!( + "path {} is not under workspace {}", + resolved.display(), + workspace_root.display() + ), + )); + }; + + let components: Vec<_> = suffix.components().collect(); + for component in &components[..components.len().saturating_sub(1)] { + current.push(component.as_os_str()); + match std::fs::symlink_metadata(¤t) { + Ok(metadata) if metadata.file_type().is_symlink() => { + return Err(std::io::Error::new( + std::io::ErrorKind::PermissionDenied, + format!("symlink ancestor rejected: {}", current.display()), + )); + } + Ok(_) => {} + Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(()), + Err(error) => return Err(error), + } + } + Ok(()) +} + +fn open_spawn_only_summary_file(path: &Path) -> std::io::Result { + let mut options = std::fs::OpenOptions::new(); + options.read(true); + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + options.custom_flags(libc::O_NOFOLLOW); + } + #[cfg(not(unix))] + { + if path.symlink_metadata().is_ok_and(|m| m.is_symlink()) { + return Err(std::io::Error::new( + std::io::ErrorKind::PermissionDenied, + "symlink leaf rejected", + )); + } + } + options.open(path) +} + +fn collect_spawn_only_summary_source( + files: &[String], + workspace_root: Option<&Path>, +) -> Option { + let workspace_root = workspace_root?; + let mut total_text_bytes = 0_u64; + let mut remaining = SPAWN_ONLY_SUMMARY_MAX_INPUT_BYTES; + let mut prompt_body = String::new(); + let mut display_paths = Vec::new(); + + for file in files { + if remaining == 0 { + break; + } + let Some(resolved) = resolve_spawn_only_summary_path(file, workspace_root) else { + continue; + }; + if !is_text_summary_candidate(&resolved) { + continue; + } + if let Err(error) = reject_spawn_only_summary_symlinked_ancestors(workspace_root, &resolved) + { + warn!( + file = %file, + resolved = %resolved.display(), + error = %error, + "skipping spawn_only auto-summary file with unsafe path" + ); + continue; + } + let mut file_handle = match open_spawn_only_summary_file(&resolved) { + Ok(handle) => handle, + Err(error) => { + warn!( + file = %file, + resolved = %resolved.display(), + error = %error, + "skipping spawn_only auto-summary file that could not be opened safely" + ); + continue; + } + }; + let Ok(metadata) = file_handle.metadata() else { + continue; + }; + if !metadata.is_file() { + continue; + } + total_text_bytes = total_text_bytes.saturating_add(metadata.len()); + let display_path = + relativize_workspace_path(&resolved.to_string_lossy(), Some(workspace_root)); + let mut bytes = Vec::new(); + let read_limit = remaining.min(metadata.len()); + if file_handle + .by_ref() + .take(read_limit) + .read_to_end(&mut bytes) + .is_err() + { + continue; + } + remaining = remaining.saturating_sub(bytes.len() as u64); + display_paths.push(display_path.clone()); + let text = String::from_utf8_lossy(&bytes); + prompt_body.push_str("\n\n--- "); + prompt_body.push_str(&display_path); + prompt_body.push_str(" ---\n"); + prompt_body.push_str(&text); + } + + if total_text_bytes <= SPAWN_ONLY_SUMMARY_MIN_BYTES || prompt_body.trim().is_empty() { + return None; + } + + Some(SpawnOnlySummarySource { + prompt_body, + display_paths, + }) +} + +async fn build_spawn_only_auto_summary_message( + llm: Arc, + tool_name: &str, + files: &[String], + workspace_root: Option<&Path>, +) -> Option { + let source = collect_spawn_only_summary_source(files, workspace_root)?; + let messages = vec![ + Message::system( + "Summarize generated tool output for future assistant context. \ + Write 200-300 words when the content supports it, stay under \ + 300 words, be factual, and do not include a preamble.", + ), + Message::user(format!( + "Tool `{tool_name}` produced these output files. Summarize the useful \ + findings, decisions, and artifact contents so a later turn can refer \ + to them without rereading the full file.\n{}", + source.prompt_body + )), + ]; + let response = match llm + .chat( + &messages, + &[], + &ChatConfig { + max_tokens: Some(SPAWN_ONLY_SUMMARY_MAX_OUTPUT_TOKENS), + temperature: Some(0.0), + ..Default::default() + }, + ) + .await + { + Ok(response) => response, + Err(error) => { + warn!( + tool = %tool_name, + error = %error, + "spawn_only auto-summary generation failed" + ); + return None; + } + }; + let summary = response.content.unwrap_or_default(); + let summary = summary.trim(); + if summary.is_empty() { + return None; + } + + let path_hint = match source.display_paths.as_slice() { + [path] => format!("Full output at {path}; call `read_file` to inspect."), + paths => format!( + "Full outputs at {}; call `read_file` to inspect.", + paths.join(", ") + ), + }; + Some(format!( + "`{tool_name}` summary:\n{summary}\n\n({path_hint})" + )) +} + /// Strip the workspace prefix from an absolute path, returning a /// workspace-relative path string. Falls back to the original path if it /// cannot be relativised (already-relative input, different root, or no @@ -119,7 +379,14 @@ fn relativize_workspace_path(path: &str, workspace_root: Option<&std::path::Path let abs = std::path::Path::new(path); match abs.strip_prefix(root) { Ok(rel) => rel.to_string_lossy().into_owned(), - Err(_) => path.to_string(), + Err(_) => { + if let (Ok(abs), Ok(root)) = (abs.canonicalize(), root.canonicalize()) { + if let Ok(rel) = abs.strip_prefix(root) { + return rel.to_string_lossy().into_owned(); + } + } + path.to_string() + } } } @@ -196,6 +463,7 @@ impl Agent { turn_attachment_ctx: &crate::tools::TurnAttachmentContext, ) -> JoinHandle { // Clone Arc-wrapped fields so the spawned task is 'static + let llm = self.llm.clone(); let tools = self.tools.clone(); let reporter = self.reporter(); let hooks = self.hooks.clone(); @@ -401,6 +669,7 @@ impl Agent { "running spawn_only tool in background" ); let bg_tools = tools.clone(); + let bg_llm = llm.clone(); let bg_name = tc_name.clone(); let bg_args = effective_args.clone(); let bg_sender = tools.background_result_sender(); @@ -751,34 +1020,46 @@ impl Agent { // otherwise we'd suppress the file-list // bubble and the user would see no // signal that the task completed. - let already_sent_summary = !r.output.trim().is_empty(); - if !already_sent_summary { - if let Some(ref sender) = bg_sender { - if let Some(produced_msg) = + if let Some(ref sender) = bg_sender { + let auto_summary = + if bg_tools.spawn_only_auto_summarize(&bg_name) { + build_spawn_only_auto_summary_message( + bg_llm.clone(), + &bg_name, + &output_files, + bg_tools.workspace_root(), + ) + .await + } else { + None + }; + let already_sent_summary = !r.output.trim().is_empty(); + let followup_msg = auto_summary.or_else(|| { + if already_sent_summary { + None + } else { build_spawn_only_produced_files_message( &bg_name, &output_files, bg_tools.workspace_root(), ) - { - let _ = - sender(BackgroundResultPayload { - task_label: bg_name.clone(), - content: produced_msg, - kind: - BackgroundResultKind::Notification, - media: vec![], - envelope_media: vec![], - originating_thread_id: - bg_originating_thread_id.clone(), - task_id: Some(task_id.clone()), - originating_client_message_id: - bg_originating_client_message_id - .clone(), - tool_call_id: Some(bg_tc_id.clone()), - }) - .await; } + }); + if let Some(produced_msg) = followup_msg { + let _ = sender(BackgroundResultPayload { + task_label: bg_name.clone(), + content: produced_msg, + kind: BackgroundResultKind::Notification, + media: vec![], + envelope_media: vec![], + originating_thread_id: bg_originating_thread_id + .clone(), + task_id: Some(task_id.clone()), + originating_client_message_id: + bg_originating_client_message_id.clone(), + tool_call_id: Some(bg_tc_id.clone()), + }) + .await; } } } else { @@ -1240,17 +1521,35 @@ impl Agent { // LLM loses its stable // filename reference for the // next turn. + let auto_summary = if bg_tools + .spawn_only_auto_summarize(&bg_name) + { + build_spawn_only_auto_summary_message( + bg_llm.clone(), + &bg_name, + &sent_files, + bg_tools.workspace_root(), + ) + .await + } else { + None + }; let already_sent_summary = !r.output.trim().is_empty(); - if !already_sent_summary { - if let Some(produced_msg) = + let followup_msg = auto_summary.or_else(|| { + if already_sent_summary { + None + } else { build_spawn_only_produced_files_message( &bg_name, &sent_files, bg_tools.workspace_root(), ) - { - let _ = sender(BackgroundResultPayload { + } + }); + if let Some(produced_msg) = followup_msg { + let _ = + sender(BackgroundResultPayload { task_label: bg_name.clone(), content: produced_msg, kind: @@ -1266,7 +1565,6 @@ impl Agent { tool_call_id: Some(bg_tc_id.clone()), }) .await; - } } } } @@ -1973,9 +2271,63 @@ fn panic_result(tool_call: &octos_core::ToolCall, reason: &str) -> ToolCallResul #[cfg(test)] mod tests { use super::{ - build_spawn_only_produced_files_message, relativize_workspace_path, - satisfied_completion_content, should_auto_send_tool_files, + SPAWN_ONLY_SUMMARY_MAX_OUTPUT_TOKENS, build_spawn_only_auto_summary_message, + build_spawn_only_produced_files_message, collect_spawn_only_summary_source, + relativize_workspace_path, satisfied_completion_content, should_auto_send_tool_files, }; + use async_trait::async_trait; + use octos_core::Message; + use octos_llm::{ChatConfig, ChatResponse, LlmProvider, StopReason, TokenUsage, ToolSpec}; + use std::sync::{Arc, Mutex}; + + struct SummaryProvider { + reply: String, + calls: Mutex, ChatConfig)>>, + } + + impl SummaryProvider { + fn new(reply: &str) -> Self { + Self { + reply: reply.to_string(), + calls: Mutex::new(Vec::new()), + } + } + } + + #[async_trait] + impl LlmProvider for SummaryProvider { + async fn chat( + &self, + messages: &[Message], + _tools: &[ToolSpec], + config: &ChatConfig, + ) -> eyre::Result { + self.calls + .lock() + .unwrap() + .push((messages.to_vec(), config.clone())); + Ok(ChatResponse { + content: Some(self.reply.clone()), + reasoning_content: None, + tool_calls: Vec::new(), + stop_reason: StopReason::EndTurn, + usage: TokenUsage::default(), + provider_index: None, + }) + } + + fn context_window(&self) -> u32 { + 128_000 + } + + fn model_id(&self) -> &str { + "summary-provider-test" + } + + fn provider_name(&self) -> &str { + "mock" + } + } #[test] fn explicit_send_file_turn_suppresses_plugin_auto_send_for_other_tools() { @@ -2065,6 +2417,110 @@ mod tests { assert!(!msg.contains("# Deep Research:")); } + #[tokio::test] + async fn auto_summary_message_reads_large_workspace_text_with_bounded_llm_call() { + let temp = tempfile::tempdir().unwrap(); + let workspace = temp.path().join("workspace"); + let report_dir = workspace.join("reports"); + std::fs::create_dir_all(&report_dir).unwrap(); + let report = report_dir.join("deep_search.md"); + std::fs::write( + &report, + format!( + "# Deep Search\n\nALPHA_SENTINEL {}\n", + "body ".repeat(1_200) + ), + ) + .unwrap(); + + let provider = Arc::new(SummaryProvider::new( + "Alpha finding, beta decision, and gamma artifact are ready.", + )); + let llm: Arc = provider.clone(); + let msg = build_spawn_only_auto_summary_message( + llm, + "deep_search", + &[report.to_string_lossy().into_owned()], + Some(&workspace), + ) + .await + .expect("large text output should produce a summary follow-up"); + + assert!(msg.starts_with("`deep_search` summary:\n"), "got: {msg}"); + assert!(msg.contains("Alpha finding, beta decision"), "got: {msg}"); + assert!( + msg.contains("(Full output at reports/deep_search.md; call `read_file` to inspect.)"), + "got: {msg}" + ); + + let calls = provider.calls.lock().unwrap(); + assert_eq!(calls.len(), 1); + assert_eq!( + calls[0].1.max_tokens, + Some(SPAWN_ONLY_SUMMARY_MAX_OUTPUT_TOKENS) + ); + assert_eq!(calls[0].1.temperature, Some(0.0)); + assert!( + calls[0] + .0 + .iter() + .any(|m| m.content.contains("ALPHA_SENTINEL")), + "summary prompt must include bounded file content" + ); + } + + #[test] + fn auto_summary_source_requires_more_than_four_kb_of_text_files() { + let temp = tempfile::tempdir().unwrap(); + let workspace = temp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + let small = workspace.join("small.md"); + std::fs::write(&small, "too small").unwrap(); + let binary = workspace.join("voice.mp3"); + std::fs::write(&binary, "X".repeat(8_000)).unwrap(); + + assert!( + collect_spawn_only_summary_source( + &[small.to_string_lossy().into_owned()], + Some(&workspace) + ) + .is_none(), + "small text output must not spend a summary LLM call" + ); + assert!( + collect_spawn_only_summary_source( + &[binary.to_string_lossy().into_owned()], + Some(&workspace) + ) + .is_none(), + "binary/non-text extensions must be skipped" + ); + } + + #[cfg(unix)] + #[test] + fn auto_summary_source_rejects_symlinked_parent_paths() { + use std::os::unix::fs::symlink; + + let temp = tempfile::tempdir().unwrap(); + let workspace = temp.path().join("workspace"); + let outside = temp.path().join("outside"); + std::fs::create_dir_all(&workspace).unwrap(); + std::fs::create_dir_all(&outside).unwrap(); + std::fs::write(outside.join("report.md"), "outside ".repeat(1_000)).unwrap(); + symlink(&outside, workspace.join("linked")).unwrap(); + let linked_report = workspace.join("linked/report.md"); + + assert!( + collect_spawn_only_summary_source( + &[linked_report.to_string_lossy().into_owned()], + Some(&workspace) + ) + .is_none(), + "auto-summary must not read through a symlinked workspace ancestor" + ); + } + #[test] fn relativize_strips_workspace_prefix() { let root = std::path::PathBuf::from("/u/me/ws"); diff --git a/crates/octos-agent/src/agent/loop_runner.rs b/crates/octos-agent/src/agent/loop_runner.rs index 22158ff802..d24538b1d5 100644 --- a/crates/octos-agent/src/agent/loop_runner.rs +++ b/crates/octos-agent/src/agent/loop_runner.rs @@ -3487,6 +3487,7 @@ printf '{"output":"voice saved","success":true}\n' env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let plugin = PluginTool::new("mofa-fm".into(), def, script_path).with_extra_env(vec![( diff --git a/crates/octos-agent/src/plugins/extras.rs b/crates/octos-agent/src/plugins/extras.rs index b29d734335..a411be374e 100644 --- a/crates/octos-agent/src/plugins/extras.rs +++ b/crates/octos-agent/src/plugins/extras.rs @@ -39,6 +39,8 @@ pub struct SkillExtras { pub spawn_only_tools: Vec, /// Custom messages per spawn_only tool. pub spawn_only_messages: std::collections::HashMap, + /// spawn_only tools opted into bounded file summary follow-ups. + pub spawn_only_auto_summarize: Vec, } /// Resolve manifest extras against the skill directory. @@ -438,6 +440,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }) .collect(), diff --git a/crates/octos-agent/src/plugins/loader.rs b/crates/octos-agent/src/plugins/loader.rs index a323bc7cb5..e1297270a1 100644 --- a/crates/octos-agent/src/plugins/loader.rs +++ b/crates/octos-agent/src/plugins/loader.rs @@ -214,6 +214,7 @@ impl PluginLoader { Ok((tools, extras)) => { let n = tools.len(); let spawn_only = extras.spawn_only_tools.clone(); + let auto_summarize = extras.spawn_only_auto_summarize.clone(); for loaded in tools { let tool = loaded.tool; let name = tool.name().to_string(); @@ -230,6 +231,9 @@ impl PluginLoader { for name in &spawn_only { let msg = extras.spawn_only_messages.get(name).cloned(); registry.mark_spawn_only(name, msg); + if auto_summarize.iter().any(|tool| tool == name) { + registry.mark_spawn_only_auto_summarize(name); + } } // Don't defer — tool stays visible to LLM. // The execution loop auto-redirects calls to background spawn. @@ -565,6 +569,12 @@ impl PluginLoader { ) }) .collect(); + let spawn_only_auto_summarize: Vec = manifest + .tools + .iter() + .filter(|t| t.spawn_only && t.auto_summarize && t.validate_for_registration().is_ok()) + .map(|t| t.name.clone()) + .collect(); let plugin_name = manifest.name.clone(); let tools: Vec = manifest @@ -664,6 +674,7 @@ impl PluginLoader { // Return extras with spawn_only info extras.spawn_only_tools = spawn_only_names; extras.spawn_only_messages = spawn_only_msgs; + extras.spawn_only_auto_summarize = spawn_only_auto_summarize; Ok((tools, extras)) } @@ -1919,6 +1930,42 @@ mod tests { assert!(plugin_dir.join("main").exists()); } + #[cfg(unix)] + #[test] + fn test_loader_marks_spawn_only_auto_summarize_from_manifest() { + let dir = tempfile::tempdir().unwrap(); + let plugin_dir = dir.path().join("mofa-publish"); + std::fs::create_dir_all(plugin_dir.join("scripts")).unwrap(); + + std::fs::write( + plugin_dir.join("manifest.json"), + r#"{ + "name": "mofa-publish", + "version": "0.1.0", + "tools": [{ + "name": "mofa_publish", + "description": "deploy", + "input_schema": {"type": "object", "properties": {}}, + "spawn_only": true, + "auto_summarize": true + }] +}"#, + ) + .unwrap(); + std::fs::write( + plugin_dir.join("scripts/publish_site.sh"), + "#!/usr/bin/env bash\nset -euo pipefail\necho \"publish:$*\"\n", + ) + .unwrap(); + + let mut registry = ToolRegistry::new(); + let result = + PluginLoader::load_into(&mut registry, &[dir.path().to_path_buf()], &[]).unwrap(); + assert_eq!(result.tool_count, 1); + assert!(registry.is_spawn_only("mofa_publish")); + assert!(registry.spawn_only_auto_summarize("mofa_publish")); + } + #[test] fn test_builtin_env_allowlist_augments_first_party_mofa_tools_only() { let def = PluginToolDef { @@ -1929,6 +1976,7 @@ mod tests { env: vec!["EXISTING_ENV".to_string(), "GEMINI_API_KEY".to_string()], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; @@ -1953,6 +2001,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let untrusted = apply_builtin_env_allowlist("custom-plugin", untrusted); @@ -2466,6 +2515,7 @@ edition = "2021" ], spawn_only_tools: vec![], spawn_only_messages: Default::default(), + spawn_only_auto_summarize: vec![], }; result.merge_extras(e1); @@ -2479,6 +2529,7 @@ edition = "2021" ], spawn_only_tools: vec![], spawn_only_messages: Default::default(), + spawn_only_auto_summarize: vec![], }; result.merge_extras(e2); @@ -2492,6 +2543,7 @@ edition = "2021" ], spawn_only_tools: vec![], spawn_only_messages: Default::default(), + spawn_only_auto_summarize: vec![], }; result.merge_extras(e3); diff --git a/crates/octos-agent/src/plugins/manifest.rs b/crates/octos-agent/src/plugins/manifest.rs index 124a897e7c..688104f40e 100644 --- a/crates/octos-agent/src/plugins/manifest.rs +++ b/crates/octos-agent/src/plugins/manifest.rs @@ -193,6 +193,10 @@ pub struct PluginToolDef { /// Default: "SUCCESS: Task is now running in background..." #[serde(default)] pub spawn_only_message: Option, + /// If true, successful spawn_only completions with large text file outputs + /// append a bounded LLM-generated summary to chat history. + #[serde(default)] + pub auto_summarize: bool, /// Item 6 of OCTOS_M8_FIX_FIRST_CHECKLIST_2026-04-24: /// optional concurrency class. When `"exclusive"` the M8.8 /// scheduler serialises this tool against any sibling in the same @@ -715,10 +719,39 @@ mod tests { env: env.into_iter().map(str::to_string).collect(), risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, } } + #[test] + fn tool_auto_summarize_defaults_false_and_parses_true() { + let manifest: PluginManifest = serde_json::from_str( + r#"{ + "name": "summary-default", + "version": "0.1.0", + "tools": [{"name": "default_tool", "description": "d"}] + }"#, + ) + .unwrap(); + assert!(!manifest.tools[0].auto_summarize); + + let manifest: PluginManifest = serde_json::from_str( + r#"{ + "name": "summary-opt-in", + "version": "0.1.0", + "tools": [{ + "name": "summary_tool", + "description": "d", + "spawn_only": true, + "auto_summarize": true + }] + }"#, + ) + .unwrap(); + assert!(manifest.tools[0].auto_summarize); + } + #[test] fn validate_for_registration_accepts_clean_allowlist() { let def = def_with_env(vec!["OPENAI_API_KEY", "SMTP_HOST", "_FOO_BAR_"]); @@ -843,6 +876,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: class.map(str::to_string), } } diff --git a/crates/octos-agent/src/plugins/tool.rs b/crates/octos-agent/src/plugins/tool.rs index 0f299a0f11..2f759a8861 100644 --- a/crates/octos-agent/src/plugins/tool.rs +++ b/crates/octos-agent/src/plugins/tool.rs @@ -2611,6 +2611,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, } } @@ -2695,6 +2696,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -2742,6 +2744,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -2802,6 +2805,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -2850,6 +2854,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; // Plugin's work_dir mirrors the modern `runtime/session.rs` @@ -2896,6 +2901,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-podcast".into(), def, PathBuf::from("/bin/true")) @@ -3028,6 +3034,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-podcast".into(), def, PathBuf::from("/bin/true")) @@ -3087,6 +3094,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-slides".into(), def, PathBuf::from("/bin/true")) @@ -3214,6 +3222,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-podcast".into(), def, PathBuf::from("/bin/true")) @@ -3425,6 +3434,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-frame".into(), def, PathBuf::from("/bin/true")) @@ -3470,6 +3480,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -3505,6 +3516,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -3536,6 +3548,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")) @@ -3566,6 +3579,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")); @@ -3604,6 +3618,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, } } @@ -3941,6 +3956,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("p".into(), def, script_path) @@ -3983,6 +3999,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("p".into(), def, script_path) @@ -4028,6 +4045,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("p".into(), def, script_path) @@ -4869,6 +4887,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, } } @@ -5287,6 +5306,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")); @@ -5375,6 +5395,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")); @@ -5501,6 +5522,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool_out = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")); @@ -5680,6 +5702,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-podcast".into(), def, PathBuf::from("/bin/true")) @@ -5828,6 +5851,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("mofa-podcast".into(), def, executable); @@ -5849,6 +5873,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let mofa_executable = skill_dir.path().join("mofa-slides"); @@ -6074,6 +6099,7 @@ mod tests { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let tool = PluginTool::new("plug".into(), def, PathBuf::from("/bin/true")); diff --git a/crates/octos-agent/src/task_supervisor.rs b/crates/octos-agent/src/task_supervisor.rs index 9403d4cb85..4a88e8db8b 100644 --- a/crates/octos-agent/src/task_supervisor.rs +++ b/crates/octos-agent/src/task_supervisor.rs @@ -3981,6 +3981,7 @@ mod tests { /// 1. notify_failure observes ack=false. /// 2. mark_synth_ack_emitted records ack + drains empty pending. /// 3. notify_failure inserts pending — nothing will drain it. + /// /// Post-fix the combined mutex makes step 2 atomic with the /// check-and-insert pair in step 1+3, so the pending entry is /// either drained in step 2 OR observed in step 1 and dispatched diff --git a/crates/octos-agent/src/tools/registry.rs b/crates/octos-agent/src/tools/registry.rs index 91bcf8065e..4d15f1e0c5 100644 --- a/crates/octos-agent/src/tools/registry.rs +++ b/crates/octos-agent/src/tools/registry.rs @@ -153,6 +153,8 @@ pub struct ToolRegistry { spawn_only: HashSet, /// Custom messages for spawn_only tools returned to the LLM after auto-backgrounding. spawn_only_messages: HashMap, + /// spawn_only tools whose file outputs should be summarized into chat history. + spawn_only_auto_summarize: HashSet, /// Callback to notify session actor when background (spawn_only) tasks complete or fail. background_result_sender: Option, /// Supervisor for tracking background task lifecycle. @@ -192,6 +194,7 @@ impl ToolRegistry { plugin_tools: HashSet::new(), spawn_only: HashSet::new(), spawn_only_messages: HashMap::new(), + spawn_only_auto_summarize: HashSet::new(), background_result_sender: None, supervisor: Arc::new(TaskSupervisor::new()), spawn_only_invoked: Arc::new(std::sync::atomic::AtomicBool::new(false)), @@ -219,17 +222,30 @@ impl ToolRegistry { } } + /// Mark a spawn_only tool as opted in to file-summary follow-ups. + pub fn mark_spawn_only_auto_summarize(&mut self, name: &str) { + if self.spawn_only.contains(name) { + self.spawn_only_auto_summarize.insert(name.to_string()); + } + } + /// Check if a tool is marked spawn_only. pub fn is_spawn_only(&self, name: &str) -> bool { self.spawn_only.contains(name) } + /// Check if a spawn_only tool requested file-summary follow-ups. + pub fn spawn_only_auto_summarize(&self, name: &str) -> bool { + self.spawn_only_auto_summarize.contains(name) + } + /// Clear all spawn_only markers so tools appear as regular tools. /// Used in subagent registries where spawn_only tools should be /// callable directly (the subagent IS the background context). pub fn clear_spawn_only(&mut self) { self.spawn_only.clear(); self.spawn_only_messages.clear(); + self.spawn_only_auto_summarize.clear(); self.invalidate_cache(); } @@ -673,6 +689,8 @@ impl ToolRegistry { self.spawn_only.retain(|name| self.tools.contains_key(name)); self.spawn_only_messages .retain(|name, _| self.tools.contains_key(name)); + self.spawn_only_auto_summarize + .retain(|name| self.tools.contains_key(name)); // Stale `deferred` entries are interior-mutable; lock and prune // here so a subsequent `activate(...)` cannot resurrect a tool // that policy has already removed. @@ -825,6 +843,7 @@ impl ToolRegistry { plugin_tools: self.plugin_tools.clone(), spawn_only: self.spawn_only.clone(), spawn_only_messages: self.spawn_only_messages.clone(), + spawn_only_auto_summarize: self.spawn_only_auto_summarize.clone(), background_result_sender: None, supervisor: Arc::new(TaskSupervisor::new()), spawn_only_invoked: Arc::new(std::sync::atomic::AtomicBool::new(false)), diff --git a/crates/octos-agent/tests/m8_integration_concurrency.rs b/crates/octos-agent/tests/m8_integration_concurrency.rs index 372136b966..95ff96fff7 100644 --- a/crates/octos-agent/tests/m8_integration_concurrency.rs +++ b/crates/octos-agent/tests/m8_integration_concurrency.rs @@ -82,6 +82,7 @@ fn plugin_with_exclusive_manifest_serializes_with_other_exclusive_tools_in_batch env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: Some("exclusive".into()), }; let exclusive_tool = PluginTool::new( @@ -109,6 +110,7 @@ fn plugin_with_no_concurrency_declaration_defaults_to_safe() { env: vec![], risk: None, spawn_only_message: None, + auto_summarize: false, concurrency_class: None, }; let safe_tool = PluginTool::new( diff --git a/crates/octos-agent/tests/spawn_only_handle_envelope.rs b/crates/octos-agent/tests/spawn_only_handle_envelope.rs index f91a4530fc..ded7f8a9b2 100644 --- a/crates/octos-agent/tests/spawn_only_handle_envelope.rs +++ b/crates/octos-agent/tests/spawn_only_handle_envelope.rs @@ -4,11 +4,14 @@ //! message returned to the LLM must be the JSON `task_handle` envelope, not //! the full tool output. This pins the contract. -use std::sync::Arc; +use std::path::PathBuf; use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; use std::time::Duration; use async_trait::async_trait; +use futures::FutureExt; +use octos_agent::tools::spawn::{BackgroundResultPayload, BackgroundResultSender}; use octos_agent::{Agent, AgentConfig, ReadTaskOutputTool, Tool, ToolRegistry, ToolResult}; use octos_core::{AgentId, Message, ToolCall}; use octos_llm::{ChatConfig, ChatResponse, LlmProvider, StopReason, TokenUsage, ToolSpec}; @@ -16,25 +19,35 @@ use octos_memory::EpisodeStore; use tempfile::TempDir; struct ScriptedLlm { - responses: std::sync::Mutex>, + responses: Mutex>, + calls: Mutex, ChatConfig)>>, } impl ScriptedLlm { fn new(responses: Vec) -> Self { Self { - responses: std::sync::Mutex::new(responses), + responses: Mutex::new(responses), + calls: Mutex::new(Vec::new()), } } + + fn calls(&self) -> Vec<(Vec, ChatConfig)> { + self.calls.lock().unwrap().clone() + } } #[async_trait] impl LlmProvider for ScriptedLlm { async fn chat( &self, - _messages: &[Message], + messages: &[Message], _tools: &[ToolSpec], - _config: &ChatConfig, + config: &ChatConfig, ) -> eyre::Result { + self.calls + .lock() + .unwrap() + .push((messages.to_vec(), config.clone())); let mut r = self.responses.lock().unwrap(); if r.is_empty() { eyre::bail!("ScriptedLlm: no more responses"); @@ -83,6 +96,72 @@ impl Tool for HugeOutputTool { } } +struct FileOutputTool { + name: &'static str, + invocations: Arc, + output_path: PathBuf, +} + +#[async_trait] +impl Tool for FileOutputTool { + fn name(&self) -> &str { + self.name + } + + fn description(&self) -> &str { + "spawn_only probe that writes a large text artifact" + } + + fn input_schema(&self) -> serde_json::Value { + serde_json::json!({"type": "object"}) + } + + async fn execute(&self, _args: &serde_json::Value) -> eyre::Result { + self.invocations.fetch_add(1, Ordering::SeqCst); + if let Some(parent) = self.output_path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write( + &self.output_path, + format!( + "# Summary Probe\n\nALPHA_SENTINEL {}\n", + "body ".repeat(1_200) + ), + )?; + Ok(ToolResult { + output: String::new(), + success: true, + files_to_send: vec![self.output_path.clone()], + ..Default::default() + }) + } +} + +struct SendFileOkTool; + +#[async_trait] +impl Tool for SendFileOkTool { + fn name(&self) -> &str { + "send_file" + } + + fn description(&self) -> &str { + "test send_file stub" + } + + fn input_schema(&self) -> serde_json::Value { + serde_json::json!({"type": "object"}) + } + + async fn execute(&self, _args: &serde_json::Value) -> eyre::Result { + Ok(ToolResult { + output: "sent".to_string(), + success: true, + ..Default::default() + }) + } +} + fn tool_use(calls: Vec) -> ChatResponse { ChatResponse { content: None, @@ -225,6 +304,131 @@ async fn spawn_only_intercept_returns_task_handle_envelope_not_full_output() { tokio::time::sleep(Duration::from_millis(50)).await; } +#[tokio::test] +async fn spawn_only_auto_summarize_appends_followup_for_large_file_output() { + let memory_dir = TempDir::new().unwrap(); + let workspace = memory_dir.path().join("ws"); + std::fs::create_dir_all(&workspace).unwrap(); + let output_path = workspace.join("reports/summary_probe.md"); + + let invocations = Arc::new(AtomicU32::new(0)); + let probe = FileOutputTool { + name: "summary_probe", + invocations: invocations.clone(), + output_path: output_path.clone(), + }; + + let captured: Arc>> = Arc::new(Mutex::new(Vec::new())); + let captured_for_sender = captured.clone(); + let sender: BackgroundResultSender = Arc::new(move |payload| { + let captured = captured_for_sender.clone(); + async move { + captured.lock().unwrap().push(payload); + true + } + .boxed() + }); + + let mut tools = ToolRegistry::new(); + tools.set_workspace_root(workspace.clone()); + tools.register(probe); + tools.register(SendFileOkTool); + tools.mark_spawn_only("summary_probe", None); + tools.mark_spawn_only_auto_summarize("summary_probe"); + tools.set_background_result_sender(sender); + + let supervisor = tools.supervisor(); + tools.register(ReadTaskOutputTool::new( + supervisor, + "test-session", + None, + workspace.clone(), + )); + + let memory = Arc::new( + EpisodeStore::open(memory_dir.path().join(".octos")) + .await + .unwrap(), + ); + + let scripted = Arc::new(ScriptedLlm::new(vec![ + tool_use(vec![tc("call-summary-1", "summary_probe")]), + end_turn("Alpha summary remembers the generated report."), + ])); + let llm: Arc = scripted.clone(); + + let agent = + Agent::new(AgentId::new("auto-summary"), llm, tools, memory).with_config(AgentConfig { + save_episodes: false, + suppress_auto_send_files: true, + ..Default::default() + }); + + let response = agent + .process_message("kick summary probe", &[], vec![]) + .await + .expect("agent loop must not error"); + assert!( + response.messages.iter().any(|m| { + matches!(m.role, octos_core::MessageRole::Tool) + && m.tool_call_id + .as_deref() + .is_some_and(|id| id.contains("call-summary-1")) + }), + "foreground turn should return the spawn_only handle" + ); + + let mut summary_payload = None; + for _ in 0..100 { + summary_payload = captured.lock().unwrap().iter().find_map(|payload| { + payload + .content + .starts_with("`summary_probe` summary:") + .then(|| payload.clone()) + }); + if summary_payload.is_some() { + break; + } + tokio::time::sleep(Duration::from_millis(20)).await; + } + let summary_payload = summary_payload.unwrap_or_else(|| { + panic!( + "expected auto-summary background payload, got: {:#?}", + captured.lock().unwrap() + ) + }); + + assert_eq!(invocations.load(Ordering::SeqCst), 1); + assert!( + summary_payload + .content + .contains("Alpha summary remembers the generated report."), + "got: {}", + summary_payload.content + ); + assert!( + summary_payload + .content + .contains("Full output at reports/summary_probe.md; call `read_file` to inspect."), + "got: {}", + summary_payload.content + ); + assert!(summary_payload.media.is_empty()); + assert!(summary_payload.envelope_media.is_empty()); + + let calls = scripted.calls(); + assert!( + calls.iter().any(|(messages, config)| { + config.max_tokens == Some(300) + && config.temperature == Some(0.0) + && messages + .iter() + .any(|m| m.content.contains("ALPHA_SENTINEL")) + }), + "summary LLM call must be bounded and include file content; calls: {calls:#?}" + ); +} + // Codex P2 (round 1) regression guard: when `read_task_output` is NOT // registered, the spawn_only intercept must fall back to the legacy // free-text message instead of advertising a tool the LLM cannot call. diff --git a/crates/octos-cli/src/cli_agent_adapter.rs b/crates/octos-cli/src/cli_agent_adapter.rs index acee1c640f..d72e02f107 100644 --- a/crates/octos-cli/src/cli_agent_adapter.rs +++ b/crates/octos-cli/src/cli_agent_adapter.rs @@ -329,23 +329,23 @@ mod tests { use super::*; #[cfg(unix)] - fn write_executable(dir: &tempfile::TempDir, name: &str, body: &str) -> PathBuf { - use std::os::unix::fs::PermissionsExt; - + fn write_script(dir: &tempfile::TempDir, name: &str, body: &str) -> PathBuf { let path = dir.path().join(name); std::fs::write(&path, body).unwrap(); - let mut perms = std::fs::metadata(&path).unwrap().permissions(); - perms.set_mode(0o755); - std::fs::set_permissions(&path, perms).unwrap(); path } + #[cfg(unix)] + fn script_command(script: PathBuf) -> CliAgentCommandConfig { + CliAgentCommandConfig::new("/bin/sh").arg(script.to_string_lossy()) + } + #[cfg(unix)] #[tokio::test] async fn captures_stdout_stderr_and_declared_artifacts() { let dir = tempfile::tempdir().unwrap(); let artifact = dir.path().join("report.md"); - let script = write_executable( + let script = write_script( &dir, "agent-fixture", r#"#!/bin/sh @@ -356,7 +356,7 @@ printf '# report\n' > "$1" ); let result = run_cli_agent_command( - CliAgentCommandConfig::new(script) + script_command(script) .arg(artifact.to_string_lossy()) .declared_artifact(&artifact), ) @@ -382,7 +382,7 @@ printf '# report\n' > "$1" #[tokio::test] async fn relative_declared_artifacts_resolve_against_child_cwd() { let dir = tempfile::tempdir().unwrap(); - let script = write_executable( + let script = write_script( &dir, "relative-artifact-agent", r#"#!/bin/sh @@ -391,7 +391,7 @@ printf '# report\n' > report.md ); let result = run_cli_agent_command( - CliAgentCommandConfig::new(script) + script_command(script) .cwd(dir.path()) .declared_artifact("report.md"), ) @@ -411,7 +411,7 @@ printf '# report\n' > report.md #[tokio::test] async fn transcript_capture_is_bounded_and_marks_truncation() { let dir = tempfile::tempdir().unwrap(); - let script = write_executable( + let script = write_script( &dir, "large-output-agent", r#"#!/bin/sh @@ -419,9 +419,7 @@ perl -e 'print "x" x (1024 * 1024 + 64)' "#, ); - let result = run_cli_agent_command(CliAgentCommandConfig::new(script)) - .await - .unwrap(); + let result = run_cli_agent_command(script_command(script)).await.unwrap(); assert_eq!( result.transcript.stdout.len(), @@ -436,7 +434,7 @@ perl -e 'print "x" x (1024 * 1024 + 64)' async fn times_out_and_kills_child() { let dir = tempfile::tempdir().unwrap(); let marker = dir.path().join("finished"); - let script = write_executable( + let script = write_script( &dir, "slow-agent", r#"#!/bin/sh @@ -446,7 +444,7 @@ printf done > "$1" ); let result = run_cli_agent_command( - CliAgentCommandConfig::new(script) + script_command(script) .arg(marker.to_string_lossy()) .timeout(Duration::from_millis(100)), ) @@ -463,7 +461,7 @@ printf done > "$1" async fn cancel_reports_cancelled_and_stops_process() { let dir = tempfile::tempdir().unwrap(); let marker = dir.path().join("finished"); - let script = write_executable( + let script = write_script( &dir, "cancel-agent", r#"#!/bin/sh @@ -473,7 +471,7 @@ printf done > "$1" ); let mut process = CliAgentProcess::spawn( - CliAgentCommandConfig::new(script) + script_command(script) .arg(marker.to_string_lossy()) .timeout(Duration::from_secs(5)), ) @@ -490,7 +488,7 @@ printf done > "$1" async fn close_reports_closed_and_stops_process() { let dir = tempfile::tempdir().unwrap(); let marker = dir.path().join("finished"); - let script = write_executable( + let script = write_script( &dir, "close-agent", r#"#!/bin/sh @@ -500,7 +498,7 @@ printf done > "$1" ); let mut process = CliAgentProcess::spawn( - CliAgentCommandConfig::new(script) + script_command(script) .arg(marker.to_string_lossy()) .timeout(Duration::from_secs(5)), ) @@ -517,7 +515,7 @@ printf done > "$1" async fn argv_config_does_not_invoke_shell_metacharacters() { let dir = tempfile::tempdir().unwrap(); let injected = dir.path().join("pwned"); - let script = write_executable( + let script = write_script( &dir, "echo-argv", r#"#!/bin/sh @@ -526,7 +524,7 @@ printf '%s\n' "$1" ); let payload = format!("literal; touch {}", injected.display()); - let result = run_cli_agent_command(CliAgentCommandConfig::new(script).arg(payload.clone())) + let result = run_cli_agent_command(script_command(script).arg(payload.clone())) .await .unwrap();