From 00d3d5589163c117dabae9ac1478bf880eea69ba Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Mon, 23 Mar 2026 19:41:10 +0530 Subject: [PATCH 01/12] tests: add real agent runner harness --- tests/Cargo.toml | 3 + tests/src/agent_runner.rs | 286 ++++++++++++++++++++++++++++++ tests/src/lib.rs | 4 + tests/tests/agent_runner_tests.rs | 66 +++++++ 4 files changed, 359 insertions(+) create mode 100644 tests/src/agent_runner.rs create mode 100644 tests/tests/agent_runner_tests.rs diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 8f8b03ab1..4def38ca1 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -9,9 +9,12 @@ description = "Testing utilities for the MoFA agent framework" [dependencies] mofa-kernel = { path = "../crates/mofa-kernel" } mofa-foundation = { path = "../crates/mofa-foundation" } +mofa-runtime = { path = "../crates/mofa-runtime" } tokio = { workspace = true } async-trait = { workspace = true } anyhow = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } chrono = { workspace = true } +thiserror = { workspace = true } +uuid = { workspace = true } diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs new file mode 100644 index 000000000..becf8eca7 --- /dev/null +++ b/tests/src/agent_runner.rs @@ -0,0 +1,286 @@ +//! Real agent runner harness for integration-style tests. +//! +//! Provides a lightweight wrapper around the MoFA runtime `AgentRunner` +//! with an isolated workspace and deterministic mock LLM. + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use mofa_foundation::agent::executor::{AgentExecutor, AgentExecutorConfig}; +use mofa_kernel::agent::context::AgentContext; +use mofa_kernel::agent::core::MoFAAgent; +use mofa_kernel::agent::error::{AgentError, AgentResult}; +use mofa_kernel::agent::types::{AgentInput, AgentOutput, ChatCompletionRequest}; +use mofa_kernel::agent::types::{ChatCompletionResponse, ToolCall}; +use mofa_kernel::agent::AgentCapabilities; +use mofa_runtime::runner::{AgentRunner, RunnerState, RunnerStats}; +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use thiserror::Error; +use tokio::sync::RwLock; +use uuid::Uuid; + +/// Errors returned by the agent runner harness itself. +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum AgentRunnerError { + #[error("failed to create test workspace: {0}")] + WorkspaceIo(#[from] std::io::Error), + + #[error("agent runner failure: {0}")] + Agent(#[from] AgentError), +} + +/// Metadata captured for each run. +#[derive(Debug, Clone)] +pub struct AgentRunMetadata { + pub agent_id: String, + pub agent_name: String, + pub execution_id: String, + pub session_id: Option, + pub workspace_root: PathBuf, + pub runner_state: RunnerState, + pub runner_stats: RunnerStats, + pub started_at: DateTime, +} + +/// Result of a single agent run. +#[derive(Debug)] +pub struct AgentRunResult { + pub output: Option, + pub error: Option, + pub duration: Duration, + pub metadata: AgentRunMetadata, +} + +impl AgentRunResult { + pub fn is_success(&self) -> bool { + self.error.is_none() + } + + pub fn output_text(&self) -> Option { + self.output.as_ref().map(AgentOutput::to_text) + } +} + +/// Simple deterministic LLM provider for tests. +#[derive(Debug)] +pub struct MockAgentLLMProvider { + name: String, + responses: RwLock>, + default_response: RwLock, +} + +impl MockAgentLLMProvider { + pub fn new(name: impl Into) -> Self { + Self { + name: name.into(), + responses: RwLock::new(VecDeque::new()), + default_response: RwLock::new("This is a mock response.".to_string()), + } + } + + pub async fn add_response(&self, response: impl Into) { + self.responses.write().await.push_back(response.into()); + } + + pub async fn set_default_response(&self, response: impl Into) { + *self.default_response.write().await = response.into(); + } + + pub async fn pending_responses(&self) -> usize { + self.responses.read().await.len() + } +} + +#[async_trait] +impl mofa_kernel::agent::types::LLMProvider for MockAgentLLMProvider { + fn name(&self) -> &str { + &self.name + } + + async fn chat( + &self, + _request: ChatCompletionRequest, + ) -> AgentResult { + let response = { + let mut responses = self.responses.write().await; + if let Some(next) = responses.pop_front() { + next + } else { + self.default_response.read().await.clone() + } + }; + + Ok(ChatCompletionResponse { + content: Some(response), + tool_calls: Some(Vec::::new()), + usage: None, + }) + } +} + +struct SessionAwareExecutor { + executor: AgentExecutor, +} + +impl SessionAwareExecutor { + fn new(executor: AgentExecutor) -> Self { + Self { executor } + } +} + +#[async_trait] +impl MoFAAgent for SessionAwareExecutor { + fn id(&self) -> &str { + self.executor.id() + } + + fn name(&self) -> &str { + self.executor.name() + } + + fn capabilities(&self) -> &AgentCapabilities { + self.executor.capabilities() + } + + fn state(&self) -> mofa_kernel::agent::AgentState { + self.executor.state() + } + + async fn initialize(&mut self, ctx: &AgentContext) -> AgentResult<()> { + self.executor.initialize(ctx).await + } + + async fn execute( + &mut self, + input: AgentInput, + ctx: &AgentContext, + ) -> AgentResult { + let message = input.as_text().unwrap_or(""); + let session_key = ctx.session_id.as_deref().unwrap_or("default"); + let response = self.executor.process_message(session_key, message).await?; + Ok(AgentOutput::text(response)) + } + + async fn shutdown(&mut self) -> AgentResult<()> { + self.executor.shutdown().await + } +} + +struct TempWorkspace { + root: PathBuf, +} + +impl TempWorkspace { + fn new(prefix: &str) -> Result { + let root = std::env::temp_dir().join(format!("{}-{}", prefix, Uuid::now_v7())); + std::fs::create_dir_all(&root)?; + Ok(Self { root }) + } + + fn path(&self) -> &Path { + &self.root + } +} + +impl Drop for TempWorkspace { + fn drop(&mut self) { + let _ = std::fs::remove_dir_all(&self.root); + } +} + +/// Test harness for running real agent execution paths. +pub struct AgentTestRunner { + workspace: TempWorkspace, + session_id: String, + execution_id: String, + llm: Arc, + runner: AgentRunner, +} + +impl AgentTestRunner { + pub async fn new() -> Result { + Self::with_config(AgentExecutorConfig::default()).await + } + + pub async fn with_config(config: AgentExecutorConfig) -> Result { + let workspace = TempWorkspace::new("mofa-agent-test")?; + let llm = Arc::new(MockAgentLLMProvider::new("mock-llm")); + let executor = AgentExecutor::with_config(llm.clone(), workspace.path(), config).await?; + let agent = SessionAwareExecutor::new(executor); + + let execution_id = Uuid::now_v7().to_string(); + let session_id = Uuid::now_v7().to_string(); + let context = AgentContext::with_session(&execution_id, &session_id); + + let runner = AgentRunner::with_context(agent, context).await?; + + Ok(Self { + workspace, + session_id, + execution_id, + llm, + runner, + }) + } + + pub fn workspace(&self) -> &Path { + self.workspace.path() + } + + pub fn session_id(&self) -> &str { + &self.session_id + } + + pub fn execution_id(&self) -> &str { + &self.execution_id + } + + pub fn mock_llm(&self) -> Arc { + Arc::clone(&self.llm) + } + + pub async fn run_text(&mut self, input: &str) -> Result { + self.run_input(AgentInput::text(input)).await + } + + pub async fn run_input( + &mut self, + input: AgentInput, + ) -> Result { + let started_at = Utc::now(); + let timer = Instant::now(); + let result = self.runner.execute(input).await; + let duration = timer.elapsed(); + + let (output, error) = match result { + Ok(output) => (Some(output), None), + Err(err) => (None, Some(err)), + }; + + let metadata = AgentRunMetadata { + agent_id: self.runner.agent().id().to_string(), + agent_name: self.runner.agent().name().to_string(), + execution_id: self.runner.context().execution_id.clone(), + session_id: self.runner.context().session_id.clone(), + workspace_root: self.workspace.path().to_path_buf(), + runner_state: self.runner.state().await, + runner_stats: self.runner.stats().await, + started_at, + }; + + Ok(AgentRunResult { + output, + error, + duration, + metadata, + }) + } + + pub async fn shutdown(self) -> Result<(), AgentRunnerError> { + self.runner.shutdown().await?; + Ok(()) + } +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 3368dc49b..80325edde 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -4,6 +4,7 @@ //! control for testing MoFA agents. pub mod adversarial; +pub mod agent_runner; pub mod assertions; pub mod backend; pub mod bus; @@ -14,6 +15,9 @@ pub mod tools; pub use backend::MockLLMBackend; pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; +pub use agent_runner::{ + AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, +}; pub use report::{ JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestReportBuilder, TestStatus, TextFormatter, diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs new file mode 100644 index 000000000..22c1f2cc0 --- /dev/null +++ b/tests/tests/agent_runner_tests.rs @@ -0,0 +1,66 @@ +use mofa_testing::agent_runner::AgentTestRunner; + +#[tokio::test] +async fn agent_runner_executes_and_captures_output() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_response("Mocked response") + .await; + + let result = runner + .run_text("hello") + .await + .expect("run should succeed"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("Mocked response")); + assert_eq!( + result.metadata.session_id.as_deref(), + Some(runner.session_id()) + ); + assert_eq!(result.metadata.execution_id, runner.execution_id()); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_creates_isolated_workspaces() { + let mut runner_a = AgentTestRunner::new().await.expect("runner A initializes"); + let mut runner_b = AgentTestRunner::new().await.expect("runner B initializes"); + + assert_ne!(runner_a.workspace(), runner_b.workspace()); + + runner_a + .mock_llm() + .add_response("Response A") + .await; + runner_b + .mock_llm() + .add_response("Response B") + .await; + + let _ = runner_a + .run_text("hi") + .await + .expect("runner A executes"); + let _ = runner_b + .run_text("hi") + .await + .expect("runner B executes"); + + let session_a = runner_a + .workspace() + .join("sessions") + .join(format!("{}.jsonl", runner_a.session_id())); + let session_b = runner_b + .workspace() + .join("sessions") + .join(format!("{}.jsonl", runner_b.session_id())); + + assert!(session_a.exists()); + assert!(session_b.exists()); + + runner_a.shutdown().await.expect("shutdown A"); + runner_b.shutdown().await.expect("shutdown B"); +} From aed5b971ded3a193bbe03d1d814fed121411ae00 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Mon, 23 Mar 2026 22:18:28 +0530 Subject: [PATCH 02/12] Add agent runner test harness with workspace isolation and tool/bootstraps support --- tests/src/agent_runner.rs | 150 +++++++++++++++++++++++++++--- tests/tests/agent_runner_tests.rs | 105 +++++++++++++++++++++ 2 files changed, 242 insertions(+), 13 deletions(-) diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs index becf8eca7..307d1b307 100644 --- a/tests/src/agent_runner.rs +++ b/tests/src/agent_runner.rs @@ -9,9 +9,12 @@ use mofa_foundation::agent::executor::{AgentExecutor, AgentExecutorConfig}; use mofa_kernel::agent::context::AgentContext; use mofa_kernel::agent::core::MoFAAgent; use mofa_kernel::agent::error::{AgentError, AgentResult}; +use mofa_foundation::agent::components::tool::as_tool; +use mofa_foundation::agent::session::{JsonlSessionStorage, Session, SessionStorage}; use mofa_kernel::agent::types::{AgentInput, AgentOutput, ChatCompletionRequest}; use mofa_kernel::agent::types::{ChatCompletionResponse, ToolCall}; use mofa_kernel::agent::AgentCapabilities; +use mofa_kernel::agent::AgentState; use mofa_runtime::runner::{AgentRunner, RunnerState, RunnerStats}; use std::collections::VecDeque; use std::path::{Path, PathBuf}; @@ -34,19 +37,26 @@ pub enum AgentRunnerError { /// Metadata captured for each run. #[derive(Debug, Clone)] +#[non_exhaustive] pub struct AgentRunMetadata { pub agent_id: String, pub agent_name: String, pub execution_id: String, pub session_id: Option, pub workspace_root: PathBuf, - pub runner_state: RunnerState, - pub runner_stats: RunnerStats, + pub runner_state_before: RunnerState, + pub runner_state_after: RunnerState, + pub runner_stats_before: RunnerStats, + pub runner_stats_after: RunnerStats, + pub agent_state_before: AgentState, + pub agent_state_after: AgentState, pub started_at: DateTime, + pub session_snapshot: Option, } /// Result of a single agent run. #[derive(Debug)] +#[non_exhaustive] pub struct AgentRunResult { pub output: Option, pub error: Option, @@ -68,8 +78,19 @@ impl AgentRunResult { #[derive(Debug)] pub struct MockAgentLLMProvider { name: String, - responses: RwLock>, + responses: RwLock>, default_response: RwLock, + last_request: RwLock>, +} + +#[derive(Debug, Clone)] +enum MockLlmResponse { + Text(String), + ToolCall { + content: Option, + tool_calls: Vec, + }, + Error(String), } impl MockAgentLLMProvider { @@ -78,11 +99,39 @@ impl MockAgentLLMProvider { name: name.into(), responses: RwLock::new(VecDeque::new()), default_response: RwLock::new("This is a mock response.".to_string()), + last_request: RwLock::new(None), } } pub async fn add_response(&self, response: impl Into) { - self.responses.write().await.push_back(response.into()); + self.responses + .write() + .await + .push_back(MockLlmResponse::Text(response.into())); + } + + pub async fn add_tool_call_response( + &self, + tool_name: &str, + arguments: serde_json::Value, + content: Option, + ) { + let tool_call = ToolCall { + id: Uuid::now_v7().to_string(), + name: tool_name.to_string(), + arguments, + }; + self.responses.write().await.push_back(MockLlmResponse::ToolCall { + content, + tool_calls: vec![tool_call], + }); + } + + pub async fn add_error_response(&self, message: impl Into) { + self.responses + .write() + .await + .push_back(MockLlmResponse::Error(message.into())); } pub async fn set_default_response(&self, response: impl Into) { @@ -92,6 +141,10 @@ impl MockAgentLLMProvider { pub async fn pending_responses(&self) -> usize { self.responses.read().await.len() } + + pub async fn last_request(&self) -> Option { + self.last_request.read().await.clone() + } } #[async_trait] @@ -102,22 +155,31 @@ impl mofa_kernel::agent::types::LLMProvider for MockAgentLLMProvider { async fn chat( &self, - _request: ChatCompletionRequest, + request: ChatCompletionRequest, ) -> AgentResult { + *self.last_request.write().await = Some(request); let response = { let mut responses = self.responses.write().await; if let Some(next) = responses.pop_front() { next } else { - self.default_response.read().await.clone() + MockLlmResponse::Text(self.default_response.read().await.clone()) } }; - Ok(ChatCompletionResponse { - content: Some(response), - tool_calls: Some(Vec::::new()), - usage: None, - }) + match response { + MockLlmResponse::Text(content) => Ok(ChatCompletionResponse { + content: Some(content), + tool_calls: Some(Vec::::new()), + usage: None, + }), + MockLlmResponse::ToolCall { content, tool_calls } => Ok(ChatCompletionResponse { + content, + tool_calls: Some(tool_calls), + usage: None, + }), + MockLlmResponse::Error(message) => Err(AgentError::ExecutionFailed(message)), + } } } @@ -129,6 +191,13 @@ impl SessionAwareExecutor { fn new(executor: AgentExecutor) -> Self { Self { executor } } + + async fn register_tool( + &self, + tool: Arc, + ) -> AgentResult<()> { + self.executor.register_tool(tool).await + } } #[async_trait] @@ -183,6 +252,15 @@ impl TempWorkspace { fn path(&self) -> &Path { &self.root } + + fn write_file(&self, relative_path: &Path, content: &str) -> Result { + let path = self.root.join(relative_path); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&path, content)?; + Ok(path) + } } impl Drop for TempWorkspace { @@ -242,6 +320,34 @@ impl AgentTestRunner { Arc::clone(&self.llm) } + pub fn write_bootstrap_file( + &self, + filename: &str, + content: &str, + ) -> Result { + self.workspace.write_file(Path::new(filename), content) + } + + pub fn write_workspace_file( + &self, + relative_path: impl AsRef, + content: &str, + ) -> Result { + self.workspace.write_file(relative_path.as_ref(), content) + } + + pub async fn register_simple_tool(&self, tool: T) -> Result<(), AgentRunnerError> + where + T: mofa_foundation::agent::components::tool::SimpleTool + Send + Sync + 'static, + { + let tool_ref = as_tool(tool); + self.runner + .agent() + .register_tool(tool_ref) + .await + .map_err(AgentRunnerError::from) + } + pub async fn run_text(&mut self, input: &str) -> Result { self.run_input(AgentInput::text(input)).await } @@ -251,9 +357,16 @@ impl AgentTestRunner { input: AgentInput, ) -> Result { let started_at = Utc::now(); + let runner_state_before = self.runner.state().await; + let runner_stats_before = self.runner.stats().await; + let agent_state_before = self.runner.agent_state(); let timer = Instant::now(); let result = self.runner.execute(input).await; let duration = timer.elapsed(); + let runner_state_after = self.runner.state().await; + let runner_stats_after = self.runner.stats().await; + let agent_state_after = self.runner.agent_state(); + let session_snapshot = self.load_session_snapshot().await; let (output, error) = match result { Ok(output) => (Some(output), None), @@ -266,9 +379,14 @@ impl AgentTestRunner { execution_id: self.runner.context().execution_id.clone(), session_id: self.runner.context().session_id.clone(), workspace_root: self.workspace.path().to_path_buf(), - runner_state: self.runner.state().await, - runner_stats: self.runner.stats().await, + runner_state_before, + runner_state_after, + runner_stats_before, + runner_stats_after, + agent_state_before, + agent_state_after, started_at, + session_snapshot, }; Ok(AgentRunResult { @@ -283,4 +401,10 @@ impl AgentTestRunner { self.runner.shutdown().await?; Ok(()) } + + async fn load_session_snapshot(&self) -> Option { + let session_id = self.runner.context().session_id.as_deref()?; + let storage = JsonlSessionStorage::new(self.workspace.path()).await.ok()?; + storage.load(session_id).await.ok()? + } } diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs index 22c1f2cc0..3b9ae1848 100644 --- a/tests/tests/agent_runner_tests.rs +++ b/tests/tests/agent_runner_tests.rs @@ -1,4 +1,6 @@ use mofa_testing::agent_runner::AgentTestRunner; +use mofa_testing::tools::MockTool; +use serde_json::json; #[tokio::test] async fn agent_runner_executes_and_captures_output() { @@ -20,6 +22,11 @@ async fn agent_runner_executes_and_captures_output() { Some(runner.session_id()) ); assert_eq!(result.metadata.execution_id, runner.execution_id()); + assert_eq!(result.metadata.runner_stats_before.total_executions, 0); + assert_eq!(result.metadata.runner_stats_after.total_executions, 1); + assert!(result.metadata.session_snapshot.is_some()); + let snapshot = result.metadata.session_snapshot.as_ref().unwrap(); + assert_eq!(snapshot.len(), 2); runner.shutdown().await.expect("shutdown succeeds"); } @@ -64,3 +71,101 @@ async fn agent_runner_creates_isolated_workspaces() { runner_a.shutdown().await.expect("shutdown A"); runner_b.shutdown().await.expect("shutdown B"); } + +#[tokio::test] +async fn agent_runner_executes_tool_calls() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + + let tool = MockTool::new( + "echo_tool", + "Echo the provided input", + json!({ + "type": "object", + "properties": { + "input": { "type": "string" } + }, + "required": ["input"] + }), + ); + + runner + .register_simple_tool(tool.clone()) + .await + .expect("tool registered"); + + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner + .mock_llm() + .add_response("Final response") + .await; + + let result = runner + .run_text("use tool") + .await + .expect("run should succeed"); + + assert_eq!(result.output_text().as_deref(), Some("Final response")); + assert_eq!(tool.call_count().await, 1); + let last_call = tool.last_call().await.expect("tool call captured"); + assert_eq!(last_call.arguments, json!({ "input": "ping" })); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_loads_bootstrap_files() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .write_bootstrap_file("AGENTS.md", "Bootstrap content for agent test.") + .expect("bootstrap file written"); + + runner + .mock_llm() + .add_response("Bootstrapped response") + .await; + + let _ = runner + .run_text("check prompt") + .await + .expect("run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("AGENTS.md")); + assert!(system_message.contains("Bootstrap content for agent test.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_captures_llm_failure() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_error_response("mock failure") + .await; + + let result = runner + .run_text("trigger failure") + .await + .expect("run should return result"); + + assert!(!result.is_success()); + let error = result.error.expect("error captured"); + assert!(error.to_string().contains("mock failure")); + assert_eq!(result.metadata.runner_stats_after.failed_executions, 1); + + runner.shutdown().await.expect("shutdown succeeds"); +} From 4543c4d2cceb508a1e95a0a2826d721cc182f3ea Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Mon, 23 Mar 2026 22:33:27 +0530 Subject: [PATCH 03/12] Add agent runner metadata, tool capture, and prompt customization --- .../src/agent/context/prompt.rs | 11 +++ crates/mofa-foundation/src/agent/executor.rs | 14 ++- tests/src/agent_runner.rs | 99 ++++++++++++++++++- tests/src/lib.rs | 1 + tests/src/tools.rs | 25 ++++- tests/tests/agent_runner_tests.rs | 77 ++++++++++++++- 6 files changed, 220 insertions(+), 7 deletions(-) diff --git a/crates/mofa-foundation/src/agent/context/prompt.rs b/crates/mofa-foundation/src/agent/context/prompt.rs index 04ec30bd6..87cc8a127 100644 --- a/crates/mofa-foundation/src/agent/context/prompt.rs +++ b/crates/mofa-foundation/src/agent/context/prompt.rs @@ -142,6 +142,17 @@ impl PromptContext { self } + /// Replace the agent identity. + pub fn set_identity(&mut self, identity: AgentIdentity) { + self.agent_name = identity.name.clone(); + self.identity = identity; + } + + /// Replace the bootstrap file list. + pub fn set_bootstrap_files(&mut self, files: Vec) { + self.bootstrap_files = files; + } + /// Set skills that should always be loaded pub fn with_always_load(mut self, skills: Vec) -> Self { self.always_load = skills; diff --git a/crates/mofa-foundation/src/agent/executor.rs b/crates/mofa-foundation/src/agent/executor.rs index 605ab8fe0..3219657bf 100644 --- a/crates/mofa-foundation/src/agent/executor.rs +++ b/crates/mofa-foundation/src/agent/executor.rs @@ -548,6 +548,15 @@ impl AgentExecutor { &self.config } + /// Update the prompt context (system prompt builder). + pub async fn update_prompt_context(&self, updater: F) + where + F: FnOnce(&mut PromptContext), + { + let mut ctx = self.context.write().await; + updater(&mut ctx); + } + /// Get mutable reference to base agent pub fn base_mut(&mut self) -> &mut BaseAgent { &mut self.base @@ -643,7 +652,10 @@ mod tests { "mock" } - async fn chat(&self, _request: ChatCompletionRequest) -> AgentResult { + async fn chat( + &self, + _request: ChatCompletionRequest, + ) -> AgentResult { Ok(ChatCompletionResponse { content: Some("ok".to_string()), tool_calls: Some(Vec::::new()), diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs index 307d1b307..74dc87b6e 100644 --- a/tests/src/agent_runner.rs +++ b/tests/src/agent_runner.rs @@ -5,12 +5,14 @@ use async_trait::async_trait; use chrono::{DateTime, Utc}; +use mofa_foundation::agent::context::prompt::AgentIdentity; use mofa_foundation::agent::executor::{AgentExecutor, AgentExecutorConfig}; use mofa_kernel::agent::context::AgentContext; use mofa_kernel::agent::core::MoFAAgent; use mofa_kernel::agent::error::{AgentError, AgentResult}; use mofa_foundation::agent::components::tool::as_tool; use mofa_foundation::agent::session::{JsonlSessionStorage, Session, SessionStorage}; +use crate::tools::MockTool; use mofa_kernel::agent::types::{AgentInput, AgentOutput, ChatCompletionRequest}; use mofa_kernel::agent::types::{ChatCompletionResponse, ToolCall}; use mofa_kernel::agent::AgentCapabilities; @@ -52,6 +54,9 @@ pub struct AgentRunMetadata { pub agent_state_after: AgentState, pub started_at: DateTime, pub session_snapshot: Option, + pub tool_calls: Vec, + pub llm_last_request: Option, + pub llm_last_response: Option, } /// Result of a single agent run. @@ -64,6 +69,15 @@ pub struct AgentRunResult { pub metadata: AgentRunMetadata, } +/// Captures a tool call with its input and output. +#[derive(Debug, Clone)] +pub struct ToolCallRecord { + pub tool_name: String, + pub input: serde_json::Value, + pub output: Option, + pub success: bool, +} + impl AgentRunResult { pub fn is_success(&self) -> bool { self.error.is_none() @@ -81,6 +95,7 @@ pub struct MockAgentLLMProvider { responses: RwLock>, default_response: RwLock, last_request: RwLock>, + last_response: RwLock>, } #[derive(Debug, Clone)] @@ -100,6 +115,7 @@ impl MockAgentLLMProvider { responses: RwLock::new(VecDeque::new()), default_response: RwLock::new("This is a mock response.".to_string()), last_request: RwLock::new(None), + last_response: RwLock::new(None), } } @@ -145,6 +161,10 @@ impl MockAgentLLMProvider { pub async fn last_request(&self) -> Option { self.last_request.read().await.clone() } + + pub async fn last_response(&self) -> Option { + self.last_response.read().await.clone() + } } #[async_trait] @@ -167,7 +187,7 @@ impl mofa_kernel::agent::types::LLMProvider for MockAgentLLMProvider { } }; - match response { + let response = match response { MockLlmResponse::Text(content) => Ok(ChatCompletionResponse { content: Some(content), tool_calls: Some(Vec::::new()), @@ -179,7 +199,10 @@ impl mofa_kernel::agent::types::LLMProvider for MockAgentLLMProvider { usage: None, }), MockLlmResponse::Error(message) => Err(AgentError::ExecutionFailed(message)), - } + }?; + + *self.last_response.write().await = Some(response.clone()); + Ok(response) } } @@ -198,6 +221,13 @@ impl SessionAwareExecutor { ) -> AgentResult<()> { self.executor.register_tool(tool).await } + + async fn update_prompt_context(&self, updater: F) + where + F: FnOnce(&mut mofa_foundation::agent::context::prompt::PromptContext), + { + self.executor.update_prompt_context(updater).await; + } } #[async_trait] @@ -276,6 +306,7 @@ pub struct AgentTestRunner { execution_id: String, llm: Arc, runner: AgentRunner, + mock_tools: Vec, } impl AgentTestRunner { @@ -301,6 +332,7 @@ impl AgentTestRunner { execution_id, llm, runner, + mock_tools: Vec::new(), }) } @@ -348,10 +380,45 @@ impl AgentTestRunner { .map_err(AgentRunnerError::from) } + pub async fn register_mock_tool(&mut self, tool: MockTool) -> Result<(), AgentRunnerError> { + self.register_simple_tool(tool.clone()).await?; + self.mock_tools.push(tool); + Ok(()) + } + + pub async fn configure_prompt( + &self, + identity: Option, + bootstrap_files: Option>, + ) { + self.runner + .agent() + .update_prompt_context(|ctx| { + if let Some(identity) = identity { + ctx.set_identity(identity); + } + if let Some(files) = bootstrap_files { + ctx.set_bootstrap_files(files); + } + }) + .await; + } + pub async fn run_text(&mut self, input: &str) -> Result { self.run_input(AgentInput::text(input)).await } + pub async fn run_texts( + &mut self, + inputs: &[&str], + ) -> Result, AgentRunnerError> { + let mut results = Vec::with_capacity(inputs.len()); + for input in inputs { + results.push(self.run_text(input).await?); + } + Ok(results) + } + pub async fn run_input( &mut self, input: AgentInput, @@ -367,6 +434,9 @@ impl AgentTestRunner { let runner_stats_after = self.runner.stats().await; let agent_state_after = self.runner.agent_state(); let session_snapshot = self.load_session_snapshot().await; + let tool_calls = self.collect_tool_calls().await; + let llm_last_request = self.llm.last_request().await; + let llm_last_response = self.llm.last_response().await; let (output, error) = match result { Ok(output) => (Some(output), None), @@ -387,6 +457,9 @@ impl AgentTestRunner { agent_state_after, started_at, session_snapshot, + tool_calls, + llm_last_request, + llm_last_response, }; Ok(AgentRunResult { @@ -407,4 +480,26 @@ impl AgentTestRunner { let storage = JsonlSessionStorage::new(self.workspace.path()).await.ok()?; storage.load(session_id).await.ok()? } + + async fn collect_tool_calls(&self) -> Vec { + let mut records = Vec::new(); + for tool in &self.mock_tools { + let calls = tool.history().await; + let results = tool.results().await; + for (idx, call) in calls.into_iter().enumerate() { + let result = results.get(idx).cloned(); + let (output, success) = match result { + Some(result) => (Some(result.output.clone()), result.success), + None => (None, false), + }; + records.push(ToolCallRecord { + tool_name: tool.name().to_string(), + input: call.arguments, + output, + success, + }); + } + } + records + } } diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 80325edde..0c61c085a 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -17,6 +17,7 @@ pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; pub use agent_runner::{ AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, + ToolCallRecord, }; pub use report::{ JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestReportBuilder, TestStatus, diff --git a/tests/src/tools.rs b/tests/src/tools.rs index 48356b551..daf944db6 100644 --- a/tests/src/tools.rs +++ b/tests/src/tools.rs @@ -20,6 +20,7 @@ pub struct MockTool { category: ToolCategory, pub stubbed_result: Arc>, pub call_history: Arc>>, + pub result_history: Arc>>, failure_queue: Arc>>, failure_patterns: Arc>>, result_sequence: Arc>>, @@ -37,6 +38,7 @@ impl MockTool { "Mock execution default", ))), call_history: Arc::new(RwLock::new(Vec::new())), + result_history: Arc::new(RwLock::new(Vec::new())), failure_queue: Arc::new(RwLock::new(VecDeque::new())), failure_patterns: Arc::new(RwLock::new(Vec::new())), result_sequence: Arc::new(RwLock::new(VecDeque::new())), @@ -58,6 +60,16 @@ impl MockTool { self.call_history.read().await.len() } + /// Retrieve a clone of the full result history. + pub async fn results(&self) -> Vec { + self.result_history.read().await.clone() + } + + /// Returns the most recent result, or `None` if never executed. + pub async fn last_result(&self) -> Option { + self.result_history.read().await.last().cloned() + } + /// Queue failures for the next N calls. pub async fn fail_next(&self, count: usize, error_msg: &str) { let mut queue = self.failure_queue.write().await; @@ -115,7 +127,9 @@ impl SimpleTool for MockTool { { let mut queue = self.failure_queue.write().await; if let Some(err) = queue.pop_front() { - return ToolResult::failure(err); + let result = ToolResult::failure(err); + self.result_history.write().await.push(result.clone()); + return result; } } @@ -124,7 +138,9 @@ impl SimpleTool for MockTool { let patterns = self.failure_patterns.read().await; for (pattern, err) in patterns.iter() { if input.arguments == *pattern { - return ToolResult::failure(err); + let result = ToolResult::failure(err); + self.result_history.write().await.push(result.clone()); + return result; } } } @@ -133,11 +149,14 @@ impl SimpleTool for MockTool { { let mut seq = self.result_sequence.write().await; if let Some(result) = seq.pop_front() { + self.result_history.write().await.push(result.clone()); return result; } } - self.stubbed_result.read().await.clone() + let result = self.stubbed_result.read().await.clone(); + self.result_history.write().await.push(result.clone()); + result } fn category(&self) -> ToolCategory { diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs index 3b9ae1848..92e01101d 100644 --- a/tests/tests/agent_runner_tests.rs +++ b/tests/tests/agent_runner_tests.rs @@ -89,7 +89,7 @@ async fn agent_runner_executes_tool_calls() { ); runner - .register_simple_tool(tool.clone()) + .register_mock_tool(tool.clone()) .await .expect("tool registered"); @@ -111,6 +111,15 @@ async fn agent_runner_executes_tool_calls() { assert_eq!(tool.call_count().await, 1); let last_call = tool.last_call().await.expect("tool call captured"); assert_eq!(last_call.arguments, json!({ "input": "ping" })); + assert_eq!(result.metadata.tool_calls.len(), 1); + let record = &result.metadata.tool_calls[0]; + assert_eq!(record.tool_name, "echo_tool"); + assert_eq!(record.input, json!({ "input": "ping" })); + assert!(record.success); + assert_eq!( + record.output, + Some(json!("Mock execution default")) + ); runner.shutdown().await.expect("shutdown succeeds"); } @@ -149,6 +158,72 @@ async fn agent_runner_loads_bootstrap_files() { runner.shutdown().await.expect("shutdown succeeds"); } +#[tokio::test] +async fn agent_runner_supports_multi_turn_runs() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("First reply").await; + runner.mock_llm().add_response("Second reply").await; + + let results = runner + .run_texts(&["turn one", "turn two"]) + .await + .expect("multi-turn run succeeds"); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].output_text().as_deref(), Some("First reply")); + assert_eq!(results[1].output_text().as_deref(), Some("Second reply")); + + let snapshot = results + .last() + .and_then(|result| result.metadata.session_snapshot.as_ref()) + .expect("session snapshot captured"); + assert_eq!(snapshot.len(), 4); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_customizes_prompt_identity_and_bootstraps() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .write_bootstrap_file("CUSTOM.md", "Custom bootstrap content.") + .expect("bootstrap file written"); + runner + .configure_prompt( + Some(mofa_foundation::agent::context::prompt::AgentIdentity { + name: "TestAgent".to_string(), + description: "Custom identity".to_string(), + icon: None, + }), + Some(vec!["CUSTOM.md".to_string()]), + ) + .await; + + runner.mock_llm().add_response("Custom response").await; + let _ = runner + .run_text("custom prompt") + .await + .expect("run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("TestAgent")); + assert!(system_message.contains("Custom identity")); + assert!(system_message.contains("CUSTOM.md")); + assert!(system_message.contains("Custom bootstrap content.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + #[tokio::test] async fn agent_runner_captures_llm_failure() { let mut runner = AgentTestRunner::new().await.expect("runner initializes"); From dcdb83ec5924c37e26531b8da7f72ad1261a9d2f Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Mon, 23 Mar 2026 22:41:39 +0530 Subject: [PATCH 04/12] Add workspace snapshots, session assertions, and tool timing to agent runner --- tests/src/agent_runner.rs | 106 +++++++++++++++++++++++++++++- tests/src/assertions.rs | 28 ++++++++ tests/src/lib.rs | 2 +- tests/src/tools.rs | 20 +++++- tests/tests/agent_runner_tests.rs | 39 +++++++++++ 5 files changed, 188 insertions(+), 7 deletions(-) diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs index 74dc87b6e..4f3c6a0b5 100644 --- a/tests/src/agent_runner.rs +++ b/tests/src/agent_runner.rs @@ -19,6 +19,8 @@ use mofa_kernel::agent::AgentCapabilities; use mofa_kernel::agent::AgentState; use mofa_runtime::runner::{AgentRunner, RunnerState, RunnerStats}; use std::collections::VecDeque; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -57,6 +59,8 @@ pub struct AgentRunMetadata { pub tool_calls: Vec, pub llm_last_request: Option, pub llm_last_response: Option, + pub workspace_snapshot_before: WorkspaceSnapshot, + pub workspace_snapshot_after: WorkspaceSnapshot, } /// Result of a single agent run. @@ -76,6 +80,22 @@ pub struct ToolCallRecord { pub input: serde_json::Value, pub output: Option, pub success: bool, + pub duration_ms: Option, + pub timed_out: bool, +} + +/// Snapshot of files in the test workspace. +#[derive(Debug, Clone)] +pub struct WorkspaceSnapshot { + pub files: Vec, +} + +#[derive(Debug, Clone)] +pub struct WorkspaceFileSnapshot { + pub relative_path: String, + pub size_bytes: u64, + pub modified_ms: Option, + pub checksum: u64, } impl AgentRunResult { @@ -291,6 +311,13 @@ impl TempWorkspace { std::fs::write(&path, content)?; Ok(path) } + + fn snapshot(&self) -> WorkspaceSnapshot { + let mut files = Vec::new(); + collect_workspace_files(&self.root, &self.root, &mut files); + files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path)); + WorkspaceSnapshot { files } + } } impl Drop for TempWorkspace { @@ -299,6 +326,57 @@ impl Drop for TempWorkspace { } } +fn collect_workspace_files(root: &Path, current: &Path, files: &mut Vec) { + let entries = match std::fs::read_dir(current) { + Ok(entries) => entries, + Err(_) => return, + }; + + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + collect_workspace_files(root, &path, files); + continue; + } + + let metadata = match entry.metadata() { + Ok(metadata) => metadata, + Err(_) => continue, + }; + + let size_bytes = metadata.len(); + let modified_ms = metadata + .modified() + .ok() + .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|duration| duration.as_millis() as u64); + + let bytes = match std::fs::read(&path) { + Ok(bytes) => bytes, + Err(_) => Vec::new(), + }; + let checksum = hash_bytes(&bytes); + let relative_path = path + .strip_prefix(root) + .unwrap_or(&path) + .to_string_lossy() + .to_string(); + + files.push(WorkspaceFileSnapshot { + relative_path, + size_bytes, + modified_ms, + checksum, + }); + } +} + +fn hash_bytes(bytes: &[u8]) -> u64 { + let mut hasher = DefaultHasher::new(); + bytes.hash(&mut hasher); + hasher.finish() +} + /// Test harness for running real agent execution paths. pub struct AgentTestRunner { workspace: TempWorkspace, @@ -427,6 +505,7 @@ impl AgentTestRunner { let runner_state_before = self.runner.state().await; let runner_stats_before = self.runner.stats().await; let agent_state_before = self.runner.agent_state(); + let workspace_snapshot_before = self.workspace.snapshot(); let timer = Instant::now(); let result = self.runner.execute(input).await; let duration = timer.elapsed(); @@ -434,6 +513,7 @@ impl AgentTestRunner { let runner_stats_after = self.runner.stats().await; let agent_state_after = self.runner.agent_state(); let session_snapshot = self.load_session_snapshot().await; + let workspace_snapshot_after = self.workspace.snapshot(); let tool_calls = self.collect_tool_calls().await; let llm_last_request = self.llm.last_request().await; let llm_last_response = self.llm.last_response().await; @@ -460,6 +540,8 @@ impl AgentTestRunner { tool_calls, llm_last_request, llm_last_response, + workspace_snapshot_before, + workspace_snapshot_after, }; Ok(AgentRunResult { @@ -488,15 +570,33 @@ impl AgentTestRunner { let results = tool.results().await; for (idx, call) in calls.into_iter().enumerate() { let result = results.get(idx).cloned(); - let (output, success) = match result { - Some(result) => (Some(result.output.clone()), result.success), - None => (None, false), + let (output, success, duration_ms, timed_out) = match result { + Some(result) => { + let duration_ms = result + .metadata + .get("duration_ms") + .and_then(|value| value.parse::().ok()); + let timed_out = result + .error + .as_ref() + .map(|err| err.contains("timed out")) + .unwrap_or(false); + ( + Some(result.output.clone()), + result.success, + duration_ms, + timed_out, + ) + } + None => (None, false, None, false), }; records.push(ToolCallRecord { tool_name: tool.name().to_string(), input: call.arguments, output, success, + duration_ms, + timed_out, }); } } diff --git a/tests/src/assertions.rs b/tests/src/assertions.rs index 1a0b58594..7d4f0dbde 100644 --- a/tests/src/assertions.rs +++ b/tests/src/assertions.rs @@ -84,3 +84,31 @@ macro_rules! assert_bus_message_sent { ); }}; } + +/// Assert a session's messages match the expected (role, content) pairs. +pub fn assert_session_messages( + session: &mofa_foundation::agent::session::Session, + expected: &[(&str, &str)], +) { + assert_eq!( + session.messages.len(), + expected.len(), + "Expected {} session messages, got {}", + expected.len(), + session.messages.len() + ); + + for (idx, (role, content)) in expected.iter().enumerate() { + let msg = &session.messages[idx]; + assert_eq!( + msg.role, *role, + "Expected role '{}' at index {}, got '{}'", + role, idx, msg.role + ); + assert_eq!( + msg.content, *content, + "Expected content '{}' at index {}, got '{}'", + content, idx, msg.content + ); + } +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 0c61c085a..8d8af6fcf 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -17,7 +17,7 @@ pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; pub use agent_runner::{ AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, - ToolCallRecord, + ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot, }; pub use report::{ JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestReportBuilder, TestStatus, diff --git a/tests/src/tools.rs b/tests/src/tools.rs index daf944db6..fbc6bb5b6 100644 --- a/tests/src/tools.rs +++ b/tests/src/tools.rs @@ -122,12 +122,16 @@ impl SimpleTool for MockTool { async fn execute(&self, input: ToolInput) -> ToolResult { self.call_history.write().await.push(input.clone()); + let start = std::time::Instant::now(); // 1. Drain failure queue { let mut queue = self.failure_queue.write().await; if let Some(err) = queue.pop_front() { - let result = ToolResult::failure(err); + let mut result = ToolResult::failure(err); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); self.result_history.write().await.push(result.clone()); return result; } @@ -138,7 +142,10 @@ impl SimpleTool for MockTool { let patterns = self.failure_patterns.read().await; for (pattern, err) in patterns.iter() { if input.arguments == *pattern { - let result = ToolResult::failure(err); + let mut result = ToolResult::failure(err); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); self.result_history.write().await.push(result.clone()); return result; } @@ -149,12 +156,19 @@ impl SimpleTool for MockTool { { let mut seq = self.result_sequence.write().await; if let Some(result) = seq.pop_front() { + let mut result = result; + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); self.result_history.write().await.push(result.clone()); return result; } } - let result = self.stubbed_result.read().await.clone(); + let mut result = self.stubbed_result.read().await.clone(); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); self.result_history.write().await.push(result.clone()); result } diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs index 92e01101d..2b74a3aea 100644 --- a/tests/tests/agent_runner_tests.rs +++ b/tests/tests/agent_runner_tests.rs @@ -1,4 +1,5 @@ use mofa_testing::agent_runner::AgentTestRunner; +use mofa_testing::assertions::assert_session_messages; use mofa_testing::tools::MockTool; use serde_json::json; @@ -27,6 +28,25 @@ async fn agent_runner_executes_and_captures_output() { assert!(result.metadata.session_snapshot.is_some()); let snapshot = result.metadata.session_snapshot.as_ref().unwrap(); assert_eq!(snapshot.len(), 2); + assert_session_messages(snapshot, &[("user", "hello"), ("assistant", "Mocked response")]); + + let expected_session_path = format!("sessions/{}.jsonl", runner.session_id()); + assert!( + !result + .metadata + .workspace_snapshot_before + .files + .iter() + .any(|file| file.relative_path == expected_session_path) + ); + assert!( + result + .metadata + .workspace_snapshot_after + .files + .iter() + .any(|file| file.relative_path == expected_session_path) + ); runner.shutdown().await.expect("shutdown succeeds"); } @@ -56,6 +76,7 @@ async fn agent_runner_creates_isolated_workspaces() { .await .expect("runner B executes"); + // Session files should exist in each separate workspace. let session_a = runner_a .workspace() .join("sessions") @@ -93,6 +114,7 @@ async fn agent_runner_executes_tool_calls() { .await .expect("tool registered"); + // First response triggers a tool call; second response is the final answer. runner .mock_llm() .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) @@ -107,6 +129,7 @@ async fn agent_runner_executes_tool_calls() { .await .expect("run should succeed"); + // Tool call should be captured in both tool history and run metadata. assert_eq!(result.output_text().as_deref(), Some("Final response")); assert_eq!(tool.call_count().await, 1); let last_call = tool.last_call().await.expect("tool call captured"); @@ -120,6 +143,7 @@ async fn agent_runner_executes_tool_calls() { record.output, Some(json!("Mock execution default")) ); + assert!(record.duration_ms.is_some()); runner.shutdown().await.expect("shutdown succeeds"); } @@ -146,6 +170,7 @@ async fn agent_runner_loads_bootstrap_files() { .last_request() .await .expect("request captured"); + // Validate the system message includes the bootstrap content. let system_message = request .messages .first() @@ -160,6 +185,7 @@ async fn agent_runner_loads_bootstrap_files() { #[tokio::test] async fn agent_runner_supports_multi_turn_runs() { + // Multi-turn helper should keep the same session and extend history. let mut runner = AgentTestRunner::new().await.expect("runner initializes"); runner.mock_llm().add_response("First reply").await; runner.mock_llm().add_response("Second reply").await; @@ -173,17 +199,28 @@ async fn agent_runner_supports_multi_turn_runs() { assert_eq!(results[0].output_text().as_deref(), Some("First reply")); assert_eq!(results[1].output_text().as_deref(), Some("Second reply")); + // Session snapshot should contain two user/assistant pairs. let snapshot = results .last() .and_then(|result| result.metadata.session_snapshot.as_ref()) .expect("session snapshot captured"); assert_eq!(snapshot.len(), 4); + assert_session_messages( + snapshot, + &[ + ("user", "turn one"), + ("assistant", "First reply"), + ("user", "turn two"), + ("assistant", "Second reply"), + ], + ); runner.shutdown().await.expect("shutdown succeeds"); } #[tokio::test] async fn agent_runner_customizes_prompt_identity_and_bootstraps() { + // Custom identity + bootstrap list should appear in the system prompt. let mut runner = AgentTestRunner::new().await.expect("runner initializes"); runner .write_bootstrap_file("CUSTOM.md", "Custom bootstrap content.") @@ -210,6 +247,7 @@ async fn agent_runner_customizes_prompt_identity_and_bootstraps() { .last_request() .await .expect("request captured"); + // Validate custom identity and bootstrap content. let system_message = request .messages .first() @@ -226,6 +264,7 @@ async fn agent_runner_customizes_prompt_identity_and_bootstraps() { #[tokio::test] async fn agent_runner_captures_llm_failure() { + // LLM failures should surface in AgentRunResult with failed stats. let mut runner = AgentTestRunner::new().await.expect("runner initializes"); runner .mock_llm() From 4bf72fe041d65973403ce5d887dbfd239d4e2c25 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Mon, 23 Mar 2026 23:41:05 +0530 Subject: [PATCH 05/12] Add agent runner examples and session aware runner updates --- crates/mofa-foundation/src/agent/executor.rs | 4 +- crates/mofa-runtime/src/runner.rs | 5 ++ examples/Cargo.toml | 3 ++ examples/agent_runner_basic/Cargo.toml | 9 ++++ examples/agent_runner_basic/src/main.rs | 29 ++++++++++++ .../agent_runner_custom_session/Cargo.toml | 10 ++++ .../agent_runner_custom_session/src/main.rs | 42 +++++++++++++++++ examples/agent_runner_tools/Cargo.toml | 10 ++++ examples/agent_runner_tools/src/main.rs | 47 +++++++++++++++++++ tests/src/agent_runner.rs | 14 ++++++ tests/tests/agent_runner_tests.rs | 26 ++++++++++ 11 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 examples/agent_runner_basic/Cargo.toml create mode 100644 examples/agent_runner_basic/src/main.rs create mode 100644 examples/agent_runner_custom_session/Cargo.toml create mode 100644 examples/agent_runner_custom_session/src/main.rs create mode 100644 examples/agent_runner_tools/Cargo.toml create mode 100644 examples/agent_runner_tools/src/main.rs diff --git a/crates/mofa-foundation/src/agent/executor.rs b/crates/mofa-foundation/src/agent/executor.rs index 3219657bf..15dca5065 100644 --- a/crates/mofa-foundation/src/agent/executor.rs +++ b/crates/mofa-foundation/src/agent/executor.rs @@ -595,7 +595,9 @@ impl MoFAAgent for AgentExecutor { self.base.initialize(ctx).await?; // Additional executor-specific initialization - self.base.transition_to(AgentState::Ready)?; + if self.base.state() != AgentState::Ready { + self.base.transition_to(AgentState::Ready)?; + } Ok(()) } diff --git a/crates/mofa-runtime/src/runner.rs b/crates/mofa-runtime/src/runner.rs index 91ca5aaff..09a856636 100644 --- a/crates/mofa-runtime/src/runner.rs +++ b/crates/mofa-runtime/src/runner.rs @@ -349,6 +349,11 @@ impl AgentRunner { &self.context } + /// Update session ID in the execution context. + pub fn set_session_id(&mut self, session_id: Option) { + self.context.session_id = session_id; + } + /// 获取运行器状态 /// Get runner state pub async fn state(&self) -> RunnerState { diff --git a/examples/Cargo.toml b/examples/Cargo.toml index f952ce045..3da476607 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -1,6 +1,9 @@ [workspace] resolver = "3" members = [ + "agent_runner_basic", + "agent_runner_custom_session", + "agent_runner_tools", "cli_production_smoke", "cli_agent_logs_demo", "cli_plugin_lifecycle", diff --git a/examples/agent_runner_basic/Cargo.toml b/examples/agent_runner_basic/Cargo.toml new file mode 100644 index 000000000..e49ef8dea --- /dev/null +++ b/examples/agent_runner_basic/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "agent_runner_basic" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +tokio.workspace = true diff --git a/examples/agent_runner_basic/src/main.rs b/examples/agent_runner_basic/src/main.rs new file mode 100644 index 000000000..f49a93ee7 --- /dev/null +++ b/examples/agent_runner_basic/src/main.rs @@ -0,0 +1,29 @@ +use anyhow::Result; +use mofa_testing::AgentTestRunner; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + runner.mock_llm().add_response("Hello from the runner").await; + + let result = runner.run_text("hi").await?; + println!("Output: {}", result.output_text().unwrap_or_default()); + println!( + "Session: {}", + result + .metadata + .session_id + .as_deref() + .unwrap_or("") + ); + println!("Workspace: {}", result.metadata.workspace_root.display()); + println!( + "Runner stats: total={} success={} failed={}", + result.metadata.runner_stats_after.total_executions, + result.metadata.runner_stats_after.successful_executions, + result.metadata.runner_stats_after.failed_executions + ); + + runner.shutdown().await?; + Ok(()) +} diff --git a/examples/agent_runner_custom_session/Cargo.toml b/examples/agent_runner_custom_session/Cargo.toml new file mode 100644 index 000000000..52620ec07 --- /dev/null +++ b/examples/agent_runner_custom_session/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "agent_runner_custom_session" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +mofa-foundation = { path = "../../crates/mofa-foundation" } +tokio.workspace = true diff --git a/examples/agent_runner_custom_session/src/main.rs b/examples/agent_runner_custom_session/src/main.rs new file mode 100644 index 000000000..fc03881b8 --- /dev/null +++ b/examples/agent_runner_custom_session/src/main.rs @@ -0,0 +1,42 @@ +use anyhow::Result; +use mofa_foundation::agent::context::prompt::AgentIdentity; +use mofa_testing::AgentTestRunner; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + + runner.write_bootstrap_file("CUSTOM.md", "Custom bootstrap content.")?; + runner + .configure_prompt( + Some(AgentIdentity { + name: "RunnerDemo".to_string(), + description: "Custom identity for example runs".to_string(), + icon: None, + }), + Some(vec!["CUSTOM.md".to_string()]), + ) + .await; + + runner + .mock_llm() + .add_response("Custom session response") + .await; + + let result = runner + .run_text_with_session("demo-session", "hello session") + .await?; + + println!( + "Session id: {}", + result + .metadata + .session_id + .as_deref() + .unwrap_or("") + ); + println!("Output: {}", result.output_text().unwrap_or_default()); + + runner.shutdown().await?; + Ok(()) +} diff --git a/examples/agent_runner_tools/Cargo.toml b/examples/agent_runner_tools/Cargo.toml new file mode 100644 index 000000000..511578074 --- /dev/null +++ b/examples/agent_runner_tools/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "agent_runner_tools" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +serde_json.workspace = true +tokio.workspace = true diff --git a/examples/agent_runner_tools/src/main.rs b/examples/agent_runner_tools/src/main.rs new file mode 100644 index 000000000..7399abf8b --- /dev/null +++ b/examples/agent_runner_tools/src/main.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use mofa_testing::{AgentTestRunner, MockTool}; +use serde_json::json; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + + let tool = MockTool::new( + "echo_tool", + "Echo the provided input", + json!({ + "type": "object", + "properties": { + "input": { "type": "string" } + }, + "required": ["input"] + }), + ); + + runner.register_mock_tool(tool).await?; + + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner + .mock_llm() + .add_response("Tool response completed") + .await; + + let result = runner.run_text("use the tool").await?; + println!("Output: {}", result.output_text().unwrap_or_default()); + + for record in &result.metadata.tool_calls { + println!( + "Tool call: name={} input={} output={} duration_ms={:?}", + record.tool_name, + record.input, + record.output.as_ref().unwrap_or(&serde_json::Value::Null), + record.duration_ms + ); + } + + runner.shutdown().await?; + Ok(()) +} diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs index 4f3c6a0b5..7af8cfdbe 100644 --- a/tests/src/agent_runner.rs +++ b/tests/src/agent_runner.rs @@ -11,6 +11,7 @@ use mofa_kernel::agent::context::AgentContext; use mofa_kernel::agent::core::MoFAAgent; use mofa_kernel::agent::error::{AgentError, AgentResult}; use mofa_foundation::agent::components::tool::as_tool; +use mofa_foundation::agent::components::tool::SimpleTool; use mofa_foundation::agent::session::{JsonlSessionStorage, Session, SessionStorage}; use crate::tools::MockTool; use mofa_kernel::agent::types::{AgentInput, AgentOutput, ChatCompletionRequest}; @@ -486,6 +487,19 @@ impl AgentTestRunner { self.run_input(AgentInput::text(input)).await } + pub async fn run_text_with_session( + &mut self, + session_id: &str, + input: &str, + ) -> Result { + let original_session = self.runner.context().session_id.clone(); + self.runner + .set_session_id(Some(session_id.to_string())); + let result = self.run_text(input).await; + self.runner.set_session_id(original_session); + result + } + pub async fn run_texts( &mut self, inputs: &[&str], diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs index 2b74a3aea..250b9e62d 100644 --- a/tests/tests/agent_runner_tests.rs +++ b/tests/tests/agent_runner_tests.rs @@ -283,3 +283,29 @@ async fn agent_runner_captures_llm_failure() { runner.shutdown().await.expect("shutdown succeeds"); } + +#[tokio::test] +async fn agent_runner_allows_custom_session_keys() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_response("Custom session response") + .await; + + let result = runner + .run_text_with_session("custom-session", "hello session") + .await + .expect("run should succeed"); + + assert_eq!( + result.metadata.session_id.as_deref(), + Some("custom-session") + ); + let session_path = runner + .workspace() + .join("sessions") + .join("custom-session.jsonl"); + assert!(session_path.exists()); + + runner.shutdown().await.expect("shutdown succeeds"); +} From 5560ff7f59bb62323e2a669b56ec0ccf33da68e8 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Tue, 24 Mar 2026 04:40:44 +0530 Subject: [PATCH 06/12] Normalize workspace snapshot paths for cross platform agent runner tests --- tests/src/agent_runner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs index 7af8cfdbe..ca22c1c1b 100644 --- a/tests/src/agent_runner.rs +++ b/tests/src/agent_runner.rs @@ -361,7 +361,7 @@ fn collect_workspace_files(root: &Path, current: &Path, files: &mut Vec Date: Mon, 30 Mar 2026 16:31:09 +0530 Subject: [PATCH 07/12] feat(testing): add agent runner assertions --- tests/src/assertions.rs | 142 +++++++++++++++++++++++++++ tests/tests/assertion_macro_tests.rs | 90 ++++++++++++++++- 2 files changed, 230 insertions(+), 2 deletions(-) diff --git a/tests/src/assertions.rs b/tests/src/assertions.rs index 7d4f0dbde..946efe134 100644 --- a/tests/src/assertions.rs +++ b/tests/src/assertions.rs @@ -112,3 +112,145 @@ pub fn assert_session_messages( ); } } + +/// Assert the most recent tool result matches the expected JSON output. +/// +/// # Example +/// ```ignore +/// assert_tool_last_result!(tool, json!("done")); +/// ``` +#[macro_export] +macro_rules! assert_tool_last_result { + ($tool:expr, $expected:expr) => {{ + let result = $tool + .last_result() + .await + .expect("Expected tool to have a result, but it was never executed"); + let expected = $expected; + assert_eq!( + result.output, expected, + "Expected latest tool result {:?}, got {:?}", + expected, result.output + ); + }}; +} + +/// Assert the agent run produced the expected output text. +/// +/// # Example +/// ```ignore +/// assert_agent_output_text!(result, "hello"); +/// ``` +#[macro_export] +macro_rules! assert_agent_output_text { + ($result:expr, $expected:expr) => {{ + let expected = $expected; + let actual = $result.output_text(); + assert_eq!( + actual.as_deref(), + Some(expected), + "Expected agent output {:?}, got {:?}", + expected, + actual + ); + }}; +} + +/// Assert the agent run failed with an error containing the given substring. +/// +/// # Example +/// ```ignore +/// assert_run_failed_with!(result, "timeout"); +/// ``` +#[macro_export] +macro_rules! assert_run_failed_with { + ($result:expr, $pattern:expr) => {{ + let pattern = $pattern; + let error = $result + .error + .as_ref() + .expect("Expected run to fail, but it succeeded"); + let message = error.to_string(); + assert!( + message.contains(pattern), + "Expected error containing {:?}, got {:?}", + pattern, + message + ); + }}; +} + +/// Assert the workspace snapshot contains a file with the given relative path. +/// +/// # Example +/// ```ignore +/// assert_workspace_contains_file!(snapshot, "sessions/demo.jsonl"); +/// ``` +#[macro_export] +macro_rules! assert_workspace_contains_file { + ($snapshot:expr, $relative_path:expr) => {{ + let relative_path = $relative_path; + let found = $snapshot + .files + .iter() + .any(|file| file.relative_path == relative_path); + assert!( + found, + "Expected workspace snapshot to contain {:?}, found paths: {:?}", + relative_path, + $snapshot + .files + .iter() + .map(|file| file.relative_path.as_str()) + .collect::>() + ); + }}; +} + +/// Assert the run metadata captured a tool call with the given tool name. +/// +/// # Example +/// ```ignore +/// assert_run_recorded_tool_call!(result, "echo_tool"); +/// ``` +#[macro_export] +macro_rules! assert_run_recorded_tool_call { + ($result:expr, $tool_name:expr) => {{ + let tool_name = $tool_name; + let found = $result + .metadata + .tool_calls + .iter() + .any(|record| record.tool_name == tool_name); + assert!( + found, + "Expected run metadata to contain tool call {:?}, found tool calls: {:?}", + tool_name, + $result + .metadata + .tool_calls + .iter() + .map(|record| record.tool_name.as_str()) + .collect::>() + ); + }}; +} + +/// Assert the runner total execution count after a run matches the expected value. +/// +/// # Example +/// ```ignore +/// assert_runner_total_executions!(result, 1); +/// ``` +#[macro_export] +macro_rules! assert_runner_total_executions { + ($result:expr, $expected:expr) => {{ + let expected = $expected; + let actual = $result.metadata.runner_stats_after.total_executions; + assert_eq!( + actual, expected, + "Expected runner total executions {}, got {}", + expected, actual + ); + }}; +} diff --git a/tests/tests/assertion_macro_tests.rs b/tests/tests/assertion_macro_tests.rs index 55c2af2f4..c27aee010 100644 --- a/tests/tests/assertion_macro_tests.rs +++ b/tests/tests/assertion_macro_tests.rs @@ -1,11 +1,11 @@ -//! Tests for assertion macros: assert_tool_called!, assert_tool_called_with!, -//! assert_infer_called!, assert_bus_message_sent!. +//! Tests for assertion macros and assertion helpers used by the testing crate. use mofa_foundation::agent::components::tool::SimpleTool; use mofa_foundation::orchestrator::{ModelOrchestrator, ModelProviderConfig, ModelType}; use mofa_kernel::agent::components::tool::ToolInput; use mofa_kernel::bus::CommunicationMode; use mofa_kernel::message::AgentMessage; +use mofa_testing::agent_runner::AgentTestRunner; use mofa_testing::backend::MockLLMBackend; use mofa_testing::bus::MockAgentBus; use mofa_testing::tools::MockTool; @@ -121,3 +121,89 @@ async fn assert_bus_message_sent_panics_when_no_message_from_sender() { mofa_testing::assert_bus_message_sent!(bus, "agent-2"); } + +// =================================================================== +// New assertion helpers +// =================================================================== + +#[tokio::test] +async fn assert_tool_last_result_passes_on_matching_output() { + let tool = MockTool::new("search", "Search tool", json!({"type": "object"})); + tool.execute(ToolInput::from_json(json!({"query": "rust"}))) + .await; + + mofa_testing::assert_tool_last_result!(tool, json!("Mock execution default")); +} + +#[tokio::test] +async fn assert_agent_output_text_passes_on_matching_output() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("hello from runner").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + + mofa_testing::assert_agent_output_text!(result, "hello from runner"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_run_failed_with_passes_on_matching_error() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_error_response("mock failure").await; + + let result = runner.run_text("hello").await.expect("run completes"); + + mofa_testing::assert_run_failed_with!(result, "mock failure"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_workspace_contains_file_passes_when_snapshot_has_file() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("workspace ready").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + let expected = format!("sessions/{}.jsonl", runner.session_id()); + + mofa_testing::assert_workspace_contains_file!(result.metadata.workspace_snapshot_after, expected); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_run_recorded_tool_call_passes_when_tool_metadata_exists() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + let tool = MockTool::new( + "echo_tool", + "Echo tool", + json!({ + "type": "object", + "properties": { "input": { "type": "string" } }, + "required": ["input"] + }), + ); + runner + .register_mock_tool(tool) + .await + .expect("tool registered"); + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner.mock_llm().add_response("done").await; + + let result = runner.run_text("use tool").await.expect("run succeeds"); + + mofa_testing::assert_run_recorded_tool_call!(result, "echo_tool"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_runner_total_executions_passes_on_first_run() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("counted").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + + mofa_testing::assert_runner_total_executions!(result, 1); + runner.shutdown().await.expect("shutdown succeeds"); +} From f3768d3ef2976071b967bf25db392bfe7e534e03 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Tue, 31 Mar 2026 02:52:59 +0530 Subject: [PATCH 08/12] feat(testing): add minimal TOML DSL adapter --- Cargo.toml | 3 + tests/Cargo.toml | 1 + tests/examples/simple_agent.toml | 9 +++ tests/src/dsl.rs | 95 ++++++++++++++++++++++++++++++++ tests/src/lib.rs | 2 + tests/tests/dsl_tests.rs | 23 ++++++++ 6 files changed, 133 insertions(+) create mode 100644 tests/examples/simple_agent.toml create mode 100644 tests/src/dsl.rs create mode 100644 tests/tests/dsl_tests.rs diff --git a/Cargo.toml b/Cargo.toml index f71801118..aca372a14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,9 @@ lazy_static = "1.4" # Actor framework for ReAct agents ractor = "0" +# TOML deserialization (also used transitively by config) +toml = "0.8" + # Configuration file support (multi-format) config = { version = "0.14", features = [ "toml", diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 514ce577c..7eec6a9f7 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -19,3 +19,4 @@ chrono = { workspace = true } thiserror = { workspace = true } uuid = { workspace = true } regex = { workspace = true } +toml = { workspace = true } diff --git a/tests/examples/simple_agent.toml b/tests/examples/simple_agent.toml new file mode 100644 index 000000000..dc673d88a --- /dev/null +++ b/tests/examples/simple_agent.toml @@ -0,0 +1,9 @@ +name = "simple_agent_run" +prompt = "Say hello" +expected_text = "hello" + +[llm] +responses = ["hello from DSL"] + +[assert] +contains = "hello" diff --git a/tests/src/dsl.rs b/tests/src/dsl.rs new file mode 100644 index 000000000..dfcb144e4 --- /dev/null +++ b/tests/src/dsl.rs @@ -0,0 +1,95 @@ +//! Minimal TOML DSL support for the testing MVP. +//! +//! This module keeps the schema intentionally small so contributors can define +//! simple agent tests without introducing a full DSL framework yet. + +use crate::agent_runner::{AgentRunResult, AgentRunnerError, AgentTestRunner}; +use serde::Deserialize; +use std::path::Path; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum DslError { + #[error("failed to read DSL file: {0}")] + Io(#[from] std::io::Error), + + #[error("failed to parse TOML DSL: {0}")] + Toml(#[from] toml::de::Error), + + #[error("runner error: {0}")] + Runner(#[from] AgentRunnerError), + + #[error("expected output to contain `{expected}`, got `{actual}`")] + ExpectedContains { expected: String, actual: String }, + + #[error("run produced no text output")] + MissingOutput, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct TestCaseDsl { + pub name: String, + pub prompt: String, + pub expected_text: Option, + pub llm: Option, + #[serde(rename = "assert")] + pub assertions: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct LlmDsl { + #[serde(default)] + pub responses: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct AssertDsl { + pub contains: Option, +} + +impl TestCaseDsl { + pub fn from_toml_str(input: &str) -> Result { + Ok(toml::from_str(input)?) + } + + pub fn from_toml_file(path: impl AsRef) -> Result { + let input = std::fs::read_to_string(path)?; + Self::from_toml_str(&input) + } +} + +pub async fn run_test_case(case: &TestCaseDsl) -> Result { + let mut runner = AgentTestRunner::new().await?; + + // Queue deterministic LLM responses before execution so the DSL stays a thin + // adapter over the existing runner harness. + if let Some(llm) = &case.llm { + for response in &llm.responses { + runner.mock_llm().add_response(response).await; + } + } + + let result = runner.run_text(&case.prompt).await?; + + if let Some(expected) = expected_contains(case) { + let actual = result.output_text().ok_or(DslError::MissingOutput)?; + if !actual.contains(expected) { + return Err(DslError::ExpectedContains { + expected: expected.to_string(), + actual, + }); + } + } + + runner.shutdown().await?; + Ok(result) +} + +fn expected_contains(case: &TestCaseDsl) -> Option<&str> { + // Prefer the explicit assertion block when present, while keeping + // `expected_text` as a lightweight shorthand for the MVP schema. + case.assertions + .as_ref() + .and_then(|assertions| assertions.contains.as_deref()) + .or(case.expected_text.as_deref()) +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 8d8af6fcf..38e588eb5 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -9,12 +9,14 @@ pub mod assertions; pub mod backend; pub mod bus; pub mod clock; +pub mod dsl; pub mod report; pub mod tools; pub use backend::MockLLMBackend; pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; +pub use dsl::{run_test_case, AssertDsl, DslError, LlmDsl, TestCaseDsl}; pub use agent_runner::{ AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot, diff --git a/tests/tests/dsl_tests.rs b/tests/tests/dsl_tests.rs new file mode 100644 index 000000000..6eedf6c36 --- /dev/null +++ b/tests/tests/dsl_tests.rs @@ -0,0 +1,23 @@ +//! Integration test for the minimal TOML DSL adapter. + +use mofa_testing::{run_test_case, TestCaseDsl}; + +#[tokio::test] +async fn toml_dsl_runs_through_agent_runner() { + // Load the example DSL from the crate so the test exercises parsing and + // adapter execution together. + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/simple_agent.toml" + )) + .expect("DSL example should parse"); + + assert_eq!(case.name, "simple_agent_run"); + + let result = run_test_case(&case) + .await + .expect("DSL case should run successfully"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("hello from DSL")); +} From b4d3d08f8da13c9e8729251d7143d1e37e3af835 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Tue, 31 Mar 2026 18:06:20 +0530 Subject: [PATCH 09/12] feat(cli): add test dsl command for TOML agent test cases --- crates/mofa-cli/Cargo.toml | 1 + crates/mofa-cli/src/cli.rs | 12 ++ crates/mofa-cli/src/commands/mod.rs | 1 + crates/mofa-cli/src/commands/test_dsl.rs | 64 +++++++ crates/mofa-cli/src/main.rs | 8 + .../tests/test_dsl_integration_tests.rs | 37 ++++ tests/examples/tool_agent.toml | 27 +++ tests/src/dsl.rs | 169 ++++++++++++++++-- tests/src/lib.rs | 5 +- tests/tests/dsl_tests.rs | 73 +++++++- 10 files changed, 383 insertions(+), 14 deletions(-) create mode 100644 crates/mofa-cli/src/commands/test_dsl.rs create mode 100644 crates/mofa-cli/tests/test_dsl_integration_tests.rs create mode 100644 tests/examples/tool_agent.toml diff --git a/crates/mofa-cli/Cargo.toml b/crates/mofa-cli/Cargo.toml index 875ae62a6..5a9d593d6 100644 --- a/crates/mofa-cli/Cargo.toml +++ b/crates/mofa-cli/Cargo.toml @@ -25,6 +25,7 @@ mofa-kernel = { path = "../mofa-kernel", version = "0.1", features = [ ] } mofa-runtime = { path = "../mofa-runtime", version = "0.1" } mofa-foundation = { path = "../mofa-foundation", version = "0.1" } +mofa-testing = { path = "../../tests", version = "0.1" } config.workspace = true tokio = { workspace = true } thiserror = { workspace = true } diff --git a/crates/mofa-cli/src/cli.rs b/crates/mofa-cli/src/cli.rs index 7e10ad479..b744655e3 100644 --- a/crates/mofa-cli/src/cli.rs +++ b/crates/mofa-cli/src/cli.rs @@ -81,6 +81,12 @@ pub enum Commands { dora: bool, }, + /// Run a testing DSL case file + TestDsl { + /// TOML DSL file to execute + file: PathBuf, + }, + /// Run a dora dataflow #[cfg(feature = "dora")] Dataflow { @@ -725,6 +731,12 @@ mod tests { assert!(parsed.is_ok(), "doctor ci strict json should parse"); } + #[test] + fn test_test_dsl_parses() { + let parsed = Cli::try_parse_from(["mofa", "test-dsl", "tests/examples/simple_agent.toml"]); + assert!(parsed.is_ok(), "test-dsl command should parse"); + } + #[test] fn test_rag_index_parses() { let parsed = Cli::try_parse_from([ diff --git a/crates/mofa-cli/src/commands/mod.rs b/crates/mofa-cli/src/commands/mod.rs index 0fb02a9d3..1b798f63f 100644 --- a/crates/mofa-cli/src/commands/mod.rs +++ b/crates/mofa-cli/src/commands/mod.rs @@ -11,5 +11,6 @@ pub mod new; pub mod plugin; pub mod rag; pub mod run; +pub mod test_dsl; pub mod session; pub mod tool; diff --git a/crates/mofa-cli/src/commands/test_dsl.rs b/crates/mofa-cli/src/commands/test_dsl.rs new file mode 100644 index 000000000..d206e094a --- /dev/null +++ b/crates/mofa-cli/src/commands/test_dsl.rs @@ -0,0 +1,64 @@ +//! `mofa test-dsl` command implementation + +use crate::CliError; +use crate::output::OutputFormat; +use mofa_testing::{DslError, run_test_case, TestCaseDsl}; +use serde::Serialize; +use serde_json::json; +use std::path::Path; + +#[derive(Debug, Serialize)] +struct TestDslSummary { + name: String, + success: bool, + output_text: Option, + duration_ms: u128, + tool_calls: Vec, + workspace_root: String, +} + +/// Execute one TOML DSL test case through the testing runner. +pub async fn run(path: &Path, format: OutputFormat) -> Result<(), CliError> { + let case = TestCaseDsl::from_toml_file(path).map_err(map_dsl_error)?; + let result = run_test_case(&case).await.map_err(map_dsl_error)?; + let summary = TestDslSummary { + name: case.name, + success: result.is_success(), + output_text: result.output_text(), + duration_ms: result.duration.as_millis(), + tool_calls: result + .metadata + .tool_calls + .iter() + .map(|record| record.tool_name.clone()) + .collect(), + workspace_root: result.metadata.workspace_root.display().to_string(), + }; + + match format { + OutputFormat::Json => { + let output = json!({ + "success": true, + "case": summary, + }); + println!("{}", serde_json::to_string_pretty(&output)?); + } + _ => { + println!("case: {}", summary.name); + println!("status: {}", if summary.success { "passed" } else { "failed" }); + if let Some(output_text) = &summary.output_text { + println!("output: {}", output_text); + } + if !summary.tool_calls.is_empty() { + println!("tool_calls: {}", summary.tool_calls.join(", ")); + } + println!("duration_ms: {}", summary.duration_ms); + } + } + + Ok(()) +} + +fn map_dsl_error(error: DslError) -> CliError { + CliError::Other(format!("DSL test failed: {error}")) +} diff --git a/crates/mofa-cli/src/main.rs b/crates/mofa-cli/src/main.rs index 749fe56f7..159343545 100644 --- a/crates/mofa-cli/src/main.rs +++ b/crates/mofa-cli/src/main.rs @@ -75,6 +75,7 @@ fn main() { async fn run_command(cli: Cli) -> CliResult<()> { use cli::Commands; + let output_format = cli.output_format.unwrap_or_default(); // Initialize context for commands that need backend services let needs_context = matches!( @@ -121,6 +122,13 @@ async fn run_command(cli: Cli) -> CliResult<()> { commands::run::run(&config, dora)?; } + Some(Commands::TestDsl { file }) => { + commands::test_dsl::run(&file, output_format) + .await + .into_report() + .attach_with(|| format!("running DSL test case from {}", file.display()))?; + } + #[cfg(feature = "dora")] Some(Commands::Dataflow { file, uv }) => { commands::run::run_dataflow(&file, uv)?; diff --git a/crates/mofa-cli/tests/test_dsl_integration_tests.rs b/crates/mofa-cli/tests/test_dsl_integration_tests.rs new file mode 100644 index 000000000..8c77ea20d --- /dev/null +++ b/crates/mofa-cli/tests/test_dsl_integration_tests.rs @@ -0,0 +1,37 @@ +//! Integration tests for `mofa test-dsl`. + +use assert_cmd::Command; +use predicates::prelude::*; + +#[test] +fn test_dsl_command_runs_example_case() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/simple_agent.toml" + ); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args(["test-dsl", case_path]) + .assert() + .success() + .stdout(predicate::str::contains("status: passed")) + .stdout(predicate::str::contains("output: hello from DSL")); +} + +#[test] +fn test_dsl_command_emits_json() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args(["--output-format", "json", "test-dsl", case_path]) + .assert() + .success() + .stdout(predicate::str::contains("\"success\": true")) + .stdout(predicate::str::contains("\"tool_calls\"")) + .stdout(predicate::str::contains("\"echo_tool\"")); +} diff --git a/tests/examples/tool_agent.toml b/tests/examples/tool_agent.toml new file mode 100644 index 000000000..81be62550 --- /dev/null +++ b/tests/examples/tool_agent.toml @@ -0,0 +1,27 @@ +name = "tool_agent_run" +input = "Use the echo tool and summarize the result." + +[agent] +name = "ToolAgent" +description = "Agent used to validate tool-aware DSL execution." + +[[tools]] +name = "echo_tool" +description = "Echo the provided input." +schema = { type = "object", properties = { input = { type = "string" } }, required = ["input"] } +result = "echoed from tool" + +[assert] +contains = "Tool execution complete" +tool_called = "echo_tool" + +[llm] + +[[llm.steps]] +type = "tool_call" +tool = "echo_tool" +arguments = { input = "ping" } + +[[llm.steps]] +type = "text" +content = "Tool execution complete" diff --git a/tests/src/dsl.rs b/tests/src/dsl.rs index dfcb144e4..1b265b1ac 100644 --- a/tests/src/dsl.rs +++ b/tests/src/dsl.rs @@ -4,7 +4,11 @@ //! simple agent tests without introducing a full DSL framework yet. use crate::agent_runner::{AgentRunResult, AgentRunnerError, AgentTestRunner}; +use crate::tools::MockTool; +use mofa_foundation::agent::context::prompt::AgentIdentity; +use mofa_kernel::agent::components::tool::ToolResult; use serde::Deserialize; +use serde_json::Value; use std::path::Path; use thiserror::Error; @@ -19,9 +23,15 @@ pub enum DslError { #[error("runner error: {0}")] Runner(#[from] AgentRunnerError), + #[error("test case must define either `prompt` or `input`")] + MissingPrompt, + #[error("expected output to contain `{expected}`, got `{actual}`")] ExpectedContains { expected: String, actual: String }, + #[error("expected tool `{tool}` to be called, found tool calls: {actual:?}")] + ExpectedToolCall { tool: String, actual: Vec }, + #[error("run produced no text output")] MissingOutput, } @@ -29,22 +39,67 @@ pub enum DslError { #[derive(Debug, Clone, Deserialize)] pub struct TestCaseDsl { pub name: String, - pub prompt: String, + pub prompt: Option, + pub input: Option, pub expected_text: Option, + #[serde(default)] + pub bootstrap_files: Vec, + pub agent: Option, + #[serde(default)] + pub tools: Vec, pub llm: Option, #[serde(rename = "assert")] pub assertions: Option, } +#[derive(Debug, Clone, Deserialize)] +pub struct BootstrapFileDsl { + pub path: String, + pub content: String, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct AgentDsl { + pub name: Option, + pub description: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct ToolDsl { + pub name: String, + pub description: String, + pub schema: Value, + pub result: Option, +} + #[derive(Debug, Clone, Deserialize)] pub struct LlmDsl { #[serde(default)] pub responses: Vec, + #[serde(default)] + pub steps: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct LlmStepDsl { + #[serde(rename = "type")] + pub kind: LlmStepKind, + pub content: Option, + pub tool: Option, + pub arguments: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum LlmStepKind { + Text, + ToolCall, } #[derive(Debug, Clone, Deserialize)] pub struct AssertDsl { pub contains: Option, + pub tool_called: Option, } impl TestCaseDsl { @@ -56,20 +111,19 @@ impl TestCaseDsl { let input = std::fs::read_to_string(path)?; Self::from_toml_str(&input) } + + fn execution_input(&self) -> Result<&str, DslError> { + self.prompt + .as_deref() + .or(self.input.as_deref()) + .ok_or(DslError::MissingPrompt) + } } pub async fn run_test_case(case: &TestCaseDsl) -> Result { let mut runner = AgentTestRunner::new().await?; - - // Queue deterministic LLM responses before execution so the DSL stays a thin - // adapter over the existing runner harness. - if let Some(llm) = &case.llm { - for response in &llm.responses { - runner.mock_llm().add_response(response).await; - } - } - - let result = runner.run_text(&case.prompt).await?; + configure_runner_from_test_case(case, &mut runner).await?; + let result = runner.run_text(case.execution_input()?).await?; if let Some(expected) = expected_contains(case) { let actual = result.output_text().ok_or(DslError::MissingOutput)?; @@ -81,10 +135,97 @@ pub async fn run_test_case(case: &TestCaseDsl) -> Result>(); + if !actual.iter().any(|tool| tool == expected_tool) { + return Err(DslError::ExpectedToolCall { + tool: expected_tool.to_string(), + actual, + }); + } + } + runner.shutdown().await?; Ok(result) } +pub async fn configure_runner_from_test_case( + case: &TestCaseDsl, + runner: &mut AgentTestRunner, +) -> Result<(), DslError> { + if !case.bootstrap_files.is_empty() { + let mut bootstrap_paths = Vec::with_capacity(case.bootstrap_files.len()); + for file in &case.bootstrap_files { + runner.write_bootstrap_file(&file.path, &file.content)?; + bootstrap_paths.push(file.path.clone()); + } + runner + .configure_prompt(agent_identity(case.agent.as_ref()), Some(bootstrap_paths)) + .await; + } else if case.agent.is_some() { + runner + .configure_prompt(agent_identity(case.agent.as_ref()), None) + .await; + } + + for tool in &case.tools { + let mock_tool = MockTool::new(&tool.name, &tool.description, tool.schema.clone()); + if let Some(result) = &tool.result { + mock_tool + .set_result(ToolResult::success(result.clone())) + .await; + } + runner.register_mock_tool(mock_tool).await?; + } + + // Queue deterministic LLM responses before execution so the DSL stays a thin + // adapter over the existing runner harness. + if let Some(llm) = &case.llm { + if !llm.steps.is_empty() { + for step in &llm.steps { + match step.kind { + LlmStepKind::Text => { + runner + .mock_llm() + .add_response(step.content.clone().unwrap_or_default()) + .await; + } + LlmStepKind::ToolCall => { + runner + .mock_llm() + .add_tool_call_response( + step.tool.as_deref().unwrap_or_default(), + step.arguments.clone().unwrap_or(Value::Null), + step.content.clone(), + ) + .await; + } + } + } + } else { + for response in &llm.responses { + runner.mock_llm().add_response(response).await; + } + } + } + Ok(()) +} + +fn agent_identity(agent: Option<&AgentDsl>) -> Option { + let agent = agent?; + let name = agent.name.clone()?; + Some(AgentIdentity { + name, + description: agent.description.clone().unwrap_or_default(), + icon: None, + }) +} + fn expected_contains(case: &TestCaseDsl) -> Option<&str> { // Prefer the explicit assertion block when present, while keeping // `expected_text` as a lightweight shorthand for the MVP schema. @@ -93,3 +234,9 @@ fn expected_contains(case: &TestCaseDsl) -> Option<&str> { .and_then(|assertions| assertions.contains.as_deref()) .or(case.expected_text.as_deref()) } + +fn expected_tool_call(case: &TestCaseDsl) -> Option<&str> { + case.assertions + .as_ref() + .and_then(|assertions| assertions.tool_called.as_deref()) +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 38e588eb5..91e834b82 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -16,7 +16,10 @@ pub mod tools; pub use backend::MockLLMBackend; pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; -pub use dsl::{run_test_case, AssertDsl, DslError, LlmDsl, TestCaseDsl}; +pub use dsl::{ + configure_runner_from_test_case, run_test_case, AgentDsl, AssertDsl, BootstrapFileDsl, + DslError, LlmDsl, LlmStepDsl, LlmStepKind, TestCaseDsl, ToolDsl, +}; pub use agent_runner::{ AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot, diff --git a/tests/tests/dsl_tests.rs b/tests/tests/dsl_tests.rs index 6eedf6c36..00fdb1a5f 100644 --- a/tests/tests/dsl_tests.rs +++ b/tests/tests/dsl_tests.rs @@ -1,6 +1,6 @@ -//! Integration test for the minimal TOML DSL adapter. +//! Integration tests for the minimal TOML DSL adapter. -use mofa_testing::{run_test_case, TestCaseDsl}; +use mofa_testing::{configure_runner_from_test_case, run_test_case, AgentTestRunner, TestCaseDsl}; #[tokio::test] async fn toml_dsl_runs_through_agent_runner() { @@ -21,3 +21,72 @@ async fn toml_dsl_runs_through_agent_runner() { assert!(result.is_success()); assert_eq!(result.output_text().as_deref(), Some("hello from DSL")); } + +#[tokio::test] +async fn toml_dsl_supports_bootstrap_files() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/bootstrap_agent.toml" + )) + .expect("bootstrap DSL example should parse"); + + let mut runner = AgentTestRunner::new() + .await + .expect("runner should initialize"); + + configure_runner_from_test_case(&case, &mut runner) + .await + .expect("DSL bootstrap config should apply"); + + let _ = runner + .run_text(case.prompt.as_deref().expect("prompt should be present")) + .await + .expect("bootstrap run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request should be captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("AGENTS.md")); + assert!(system_message.contains("Bootstrapped instructions for the DSL test.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn toml_dsl_supports_tool_backed_runs() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/tool_agent.toml" + )) + .expect("tool DSL example should parse"); + + let result = run_test_case(&case) + .await + .expect("tool-backed DSL case should run successfully"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("Tool execution complete")); + assert_eq!(result.metadata.tool_calls.len(), 1); + assert_eq!(result.metadata.tool_calls[0].tool_name, "echo_tool"); + + let request = result + .metadata + .llm_last_request + .as_ref() + .expect("request should be captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + assert!(system_message.contains("ToolAgent")); + assert!(system_message.contains("Agent used to validate tool-aware DSL execution.")); +} From 3dcceb501f4d0ae36cff01e76926bd6b46c13f24 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Tue, 31 Mar 2026 20:30:27 +0530 Subject: [PATCH 10/12] feat(cli): add report output for test dsl command --- crates/mofa-cli/src/cli.rs | 28 ++++++++ crates/mofa-cli/src/commands/test_dsl.rs | 64 ++++++++++++++++++- crates/mofa-cli/src/main.rs | 8 ++- .../tests/test_dsl_integration_tests.rs | 57 +++++++++++++++++ 4 files changed, 153 insertions(+), 4 deletions(-) diff --git a/crates/mofa-cli/src/cli.rs b/crates/mofa-cli/src/cli.rs index b744655e3..8f6ef1301 100644 --- a/crates/mofa-cli/src/cli.rs +++ b/crates/mofa-cli/src/cli.rs @@ -85,6 +85,14 @@ pub enum Commands { TestDsl { /// TOML DSL file to execute file: PathBuf, + + /// Optional report file path + #[arg(long)] + report_out: Option, + + /// Report file format + #[arg(long, value_enum, default_value_t = TestDslReportFormat::Json)] + report_format: TestDslReportFormat, }, /// Run a dora dataflow @@ -225,6 +233,12 @@ pub enum DatabaseType { Sqlite, } +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq)] +pub enum TestDslReportFormat { + Json, + Text, +} + impl std::fmt::Display for DatabaseType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -737,6 +751,20 @@ mod tests { assert!(parsed.is_ok(), "test-dsl command should parse"); } + #[test] + fn test_test_dsl_report_flags_parse() { + let parsed = Cli::try_parse_from([ + "mofa", + "test-dsl", + "tests/examples/simple_agent.toml", + "--report-out", + "/tmp/report.json", + "--report-format", + "json", + ]); + assert!(parsed.is_ok(), "test-dsl report flags should parse"); + } + #[test] fn test_rag_index_parses() { let parsed = Cli::try_parse_from([ diff --git a/crates/mofa-cli/src/commands/test_dsl.rs b/crates/mofa-cli/src/commands/test_dsl.rs index d206e094a..61cde45d6 100644 --- a/crates/mofa-cli/src/commands/test_dsl.rs +++ b/crates/mofa-cli/src/commands/test_dsl.rs @@ -1,8 +1,12 @@ //! `mofa test-dsl` command implementation use crate::CliError; +use crate::cli::TestDslReportFormat; use crate::output::OutputFormat; -use mofa_testing::{DslError, run_test_case, TestCaseDsl}; +use mofa_testing::{ + DslError, JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestStatus, + TextFormatter, run_test_case, TestCaseDsl, +}; use serde::Serialize; use serde_json::json; use std::path::Path; @@ -18,9 +22,20 @@ struct TestDslSummary { } /// Execute one TOML DSL test case through the testing runner. -pub async fn run(path: &Path, format: OutputFormat) -> Result<(), CliError> { +pub async fn run( + path: &Path, + format: OutputFormat, + report_out: Option<&Path>, + report_format: TestDslReportFormat, +) -> Result<(), CliError> { let case = TestCaseDsl::from_toml_file(path).map_err(map_dsl_error)?; let result = run_test_case(&case).await.map_err(map_dsl_error)?; + let report = build_report(&case.name, &result); + + if let Some(report_out) = report_out { + write_report(report_out, report_format, &report)?; + } + let summary = TestDslSummary { name: case.name, success: result.is_success(), @@ -59,6 +74,51 @@ pub async fn run(path: &Path, format: OutputFormat) -> Result<(), CliError> { Ok(()) } +fn build_report(case_name: &str, result: &mofa_testing::AgentRunResult) -> TestReport { + let status = if result.is_success() { + TestStatus::Passed + } else { + TestStatus::Failed + }; + let error = result.error.as_ref().map(ToString::to_string); + let metadata = vec![ + ( + "execution_id".to_string(), + result.metadata.execution_id.clone(), + ), + ( + "workspace_root".to_string(), + result.metadata.workspace_root.display().to_string(), + ), + ( + "tool_calls".to_string(), + result.metadata.tool_calls.len().to_string(), + ), + ]; + + TestReport { + suite_name: "dsl".to_string(), + results: vec![TestCaseResult { + name: case_name.to_string(), + status, + duration: result.duration, + error, + metadata, + }], + total_duration: result.duration, + timestamp: result.metadata.started_at.timestamp_millis() as u64, + } +} + +fn write_report(path: &Path, format: TestDslReportFormat, report: &TestReport) -> Result<(), CliError> { + let body = match format { + TestDslReportFormat::Json => JsonFormatter.format(report), + TestDslReportFormat::Text => TextFormatter.format(report), + }; + std::fs::write(path, body)?; + Ok(()) +} + fn map_dsl_error(error: DslError) -> CliError { CliError::Other(format!("DSL test failed: {error}")) } diff --git a/crates/mofa-cli/src/main.rs b/crates/mofa-cli/src/main.rs index 159343545..44c5e16f8 100644 --- a/crates/mofa-cli/src/main.rs +++ b/crates/mofa-cli/src/main.rs @@ -122,8 +122,12 @@ async fn run_command(cli: Cli) -> CliResult<()> { commands::run::run(&config, dora)?; } - Some(Commands::TestDsl { file }) => { - commands::test_dsl::run(&file, output_format) + Some(Commands::TestDsl { + file, + report_out, + report_format, + }) => { + commands::test_dsl::run(&file, output_format, report_out.as_deref(), report_format) .await .into_report() .attach_with(|| format!("running DSL test case from {}", file.display()))?; diff --git a/crates/mofa-cli/tests/test_dsl_integration_tests.rs b/crates/mofa-cli/tests/test_dsl_integration_tests.rs index 8c77ea20d..cb4ac0784 100644 --- a/crates/mofa-cli/tests/test_dsl_integration_tests.rs +++ b/crates/mofa-cli/tests/test_dsl_integration_tests.rs @@ -2,6 +2,7 @@ use assert_cmd::Command; use predicates::prelude::*; +use tempfile::tempdir; #[test] fn test_dsl_command_runs_example_case() { @@ -35,3 +36,59 @@ fn test_dsl_command_emits_json() { .stdout(predicate::str::contains("\"tool_calls\"")) .stdout(predicate::str::contains("\"echo_tool\"")); } + +#[test] +fn test_dsl_command_writes_json_report_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/simple_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let report_path = temp.path().join("dsl-report.json"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--report-out", + report_path.to_str().expect("utf8 report path"), + "--report-format", + "json", + ]) + .assert() + .success(); + + let report = std::fs::read_to_string(&report_path).expect("report file exists"); + assert!(report.contains("\"suite\": \"dsl\"")); + assert!(report.contains("\"name\": \"simple_agent_run\"")); + assert!(report.contains("\"status\": \"passed\"")); +} + +#[test] +fn test_dsl_command_writes_text_report_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let report_path = temp.path().join("dsl-report.txt"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--report-out", + report_path.to_str().expect("utf8 report path"), + "--report-format", + "text", + ]) + .assert() + .success(); + + let report = std::fs::read_to_string(&report_path).expect("report file exists"); + assert!(report.contains("=== dsl ===")); + assert!(report.contains("tool_agent_run")); + assert!(report.contains("[+]")); +} From c7099b94435e8fc2b5c32f336eb2b35dca73ae58 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Wed, 1 Apr 2026 01:39:52 +0530 Subject: [PATCH 11/12] feat(testing): add canonical run artifact for test dsl execution --- crates/mofa-cli/src/cli.rs | 16 ++ crates/mofa-cli/src/commands/test_dsl.rs | 55 +++- crates/mofa-cli/src/main.rs | 9 +- .../tests/test_dsl_integration_tests.rs | 27 ++ tests/src/artifact.rs | 255 ++++++++++++++++++ tests/src/dsl.rs | 121 +++++++-- tests/src/lib.rs | 9 +- tests/tests/artifact_tests.rs | 60 +++++ 8 files changed, 509 insertions(+), 43 deletions(-) create mode 100644 tests/src/artifact.rs create mode 100644 tests/tests/artifact_tests.rs diff --git a/crates/mofa-cli/src/cli.rs b/crates/mofa-cli/src/cli.rs index 8f6ef1301..866ef80e9 100644 --- a/crates/mofa-cli/src/cli.rs +++ b/crates/mofa-cli/src/cli.rs @@ -86,6 +86,10 @@ pub enum Commands { /// TOML DSL file to execute file: PathBuf, + /// Optional canonical artifact file path + #[arg(long)] + artifact_out: Option, + /// Optional report file path #[arg(long)] report_out: Option, @@ -765,6 +769,18 @@ mod tests { assert!(parsed.is_ok(), "test-dsl report flags should parse"); } + #[test] + fn test_test_dsl_artifact_flag_parses() { + let parsed = Cli::try_parse_from([ + "mofa", + "test-dsl", + "tests/examples/simple_agent.toml", + "--artifact-out", + "/tmp/artifact.json", + ]); + assert!(parsed.is_ok(), "test-dsl artifact flag should parse"); + } + #[test] fn test_rag_index_parses() { let parsed = Cli::try_parse_from([ diff --git a/crates/mofa-cli/src/commands/test_dsl.rs b/crates/mofa-cli/src/commands/test_dsl.rs index 61cde45d6..397e55a39 100644 --- a/crates/mofa-cli/src/commands/test_dsl.rs +++ b/crates/mofa-cli/src/commands/test_dsl.rs @@ -4,8 +4,9 @@ use crate::CliError; use crate::cli::TestDslReportFormat; use crate::output::OutputFormat; use mofa_testing::{ - DslError, JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestStatus, - TextFormatter, run_test_case, TestCaseDsl, + AgentRunArtifact, DslError, JsonFormatter, ReportFormatter, TestCaseResult, TestReport, + TestStatus, TextFormatter, TestCaseDsl, assertion_error_from_outcomes, + collect_assertion_outcomes, execute_test_case, }; use serde::Serialize; use serde_json::json; @@ -25,12 +26,19 @@ struct TestDslSummary { pub async fn run( path: &Path, format: OutputFormat, + artifact_out: Option<&Path>, report_out: Option<&Path>, report_format: TestDslReportFormat, ) -> Result<(), CliError> { let case = TestCaseDsl::from_toml_file(path).map_err(map_dsl_error)?; - let result = run_test_case(&case).await.map_err(map_dsl_error)?; - let report = build_report(&case.name, &result); + let result = execute_test_case(&case).await.map_err(map_dsl_error)?; + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions.clone()); + let report = build_report(&artifact); + + if let Some(artifact_out) = artifact_out { + write_artifact(artifact_out, &artifact)?; + } if let Some(report_out) = report_out { write_report(report_out, report_format, &report)?; @@ -71,45 +79,64 @@ pub async fn run( } } + if let Some(error) = assertion_error_from_outcomes(&assertions) { + return Err(map_dsl_error(error)); + } + Ok(()) } -fn build_report(case_name: &str, result: &mofa_testing::AgentRunResult) -> TestReport { - let status = if result.is_success() { +fn build_report(artifact: &AgentRunArtifact) -> TestReport { + let status = if artifact.status == "passed" { TestStatus::Passed } else { TestStatus::Failed }; - let error = result.error.as_ref().map(ToString::to_string); + let error = artifact + .runner_error + .clone() + .or_else(|| { + artifact + .assertions + .iter() + .find(|item| !item.passed) + .map(|item| format!("assertion failed: {}", item.kind)) + }); let metadata = vec![ ( "execution_id".to_string(), - result.metadata.execution_id.clone(), + artifact.execution_id.clone(), ), ( "workspace_root".to_string(), - result.metadata.workspace_root.display().to_string(), + artifact.workspace_root.clone(), ), ( "tool_calls".to_string(), - result.metadata.tool_calls.len().to_string(), + artifact.tool_calls.len().to_string(), ), ]; TestReport { suite_name: "dsl".to_string(), results: vec![TestCaseResult { - name: case_name.to_string(), + name: artifact.case_name.clone(), status, - duration: result.duration, + duration: std::time::Duration::from_millis(artifact.duration_ms), error, metadata, }], - total_duration: result.duration, - timestamp: result.metadata.started_at.timestamp_millis() as u64, + total_duration: std::time::Duration::from_millis(artifact.duration_ms), + timestamp: artifact.started_at_ms, } } +fn write_artifact(path: &Path, artifact: &AgentRunArtifact) -> Result<(), CliError> { + let body = serde_json::to_string_pretty(artifact)?; + std::fs::write(path, body)?; + Ok(()) +} + fn write_report(path: &Path, format: TestDslReportFormat, report: &TestReport) -> Result<(), CliError> { let body = match format { TestDslReportFormat::Json => JsonFormatter.format(report), diff --git a/crates/mofa-cli/src/main.rs b/crates/mofa-cli/src/main.rs index 44c5e16f8..089bcc6ac 100644 --- a/crates/mofa-cli/src/main.rs +++ b/crates/mofa-cli/src/main.rs @@ -124,10 +124,17 @@ async fn run_command(cli: Cli) -> CliResult<()> { Some(Commands::TestDsl { file, + artifact_out, report_out, report_format, }) => { - commands::test_dsl::run(&file, output_format, report_out.as_deref(), report_format) + commands::test_dsl::run( + &file, + output_format, + artifact_out.as_deref(), + report_out.as_deref(), + report_format, + ) .await .into_report() .attach_with(|| format!("running DSL test case from {}", file.display()))?; diff --git a/crates/mofa-cli/tests/test_dsl_integration_tests.rs b/crates/mofa-cli/tests/test_dsl_integration_tests.rs index cb4ac0784..1da5a294e 100644 --- a/crates/mofa-cli/tests/test_dsl_integration_tests.rs +++ b/crates/mofa-cli/tests/test_dsl_integration_tests.rs @@ -92,3 +92,30 @@ fn test_dsl_command_writes_text_report_file() { assert!(report.contains("tool_agent_run")); assert!(report.contains("[+]")); } + +#[test] +fn test_dsl_command_writes_canonical_artifact_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let artifact_path = temp.path().join("dsl-artifact.json"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--artifact-out", + artifact_path.to_str().expect("utf8 artifact path"), + ]) + .assert() + .success(); + + let artifact = std::fs::read_to_string(&artifact_path).expect("artifact file exists"); + assert!(artifact.contains("\"case_name\": \"tool_agent_run\"")); + assert!(artifact.contains("\"status\": \"passed\"")); + assert!(artifact.contains("\"assertions\"")); + assert!(artifact.contains("\"tool_calls\"")); +} diff --git a/tests/src/artifact.rs b/tests/src/artifact.rs new file mode 100644 index 000000000..b121cd515 --- /dev/null +++ b/tests/src/artifact.rs @@ -0,0 +1,255 @@ +//! Canonical run artifacts for DSL-backed agent test execution. +//! +//! These types provide the stable, serializable output model for DSL runs, +//! built from the existing runner result. + +use crate::agent_runner::{AgentRunResult, ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot}; +use crate::dsl::{AssertionOutcome, TestCaseDsl}; +use mofa_foundation::agent::session::Session; +use serde::{Deserialize, Serialize}; + +// Top-level artifact emitted for a single DSL-backed case execution. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentRunArtifact { + pub case_name: String, + pub status: String, + pub output_text: Option, + pub runner_error: Option, + pub duration_ms: u64, + pub started_at_ms: u64, + pub execution_id: String, + pub session_id: Option, + pub workspace_root: String, + pub agent: AgentArtifact, + pub assertions: Vec, + pub tool_calls: Vec, + pub llm_request: Option, + pub llm_response: Option, + pub session_snapshot: Option, + pub workspace_before: WorkspaceSnapshotArtifact, + pub workspace_after: WorkspaceSnapshotArtifact, +} + +// Compact identity data for the agent used by the run. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentArtifact { + pub id: String, + pub name: String, +} + +// Tool execution records are flattened into the artifact for downstream checks. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolCallArtifact { + pub tool_name: String, + pub input: serde_json::Value, + pub output: Option, + pub success: bool, + pub duration_ms: Option, + pub timed_out: bool, +} + +// LLM request/response types keep only the fields needed for stable inspection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmRequestArtifact { + pub model: Option, + pub temperature: Option, + pub max_tokens: Option, + pub messages: Vec, + pub tool_names: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmResponseArtifact { + pub content: Option, + pub tool_calls: Vec, + pub usage: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmMessageArtifact { + pub role: String, + pub content: Option, + pub tool_call_id: Option, + pub tool_calls: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmToolCallArtifact { + pub id: String, + pub name: String, + pub arguments: serde_json::Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TokenUsageArtifact { + pub prompt_tokens: u32, + pub completion_tokens: u32, + pub total_tokens: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SessionArtifact { + pub messages: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SessionMessageArtifact { + pub role: String, + pub content: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkspaceSnapshotArtifact { + pub files: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkspaceFileArtifact { + pub relative_path: String, + pub size_bytes: u64, + pub modified_ms: Option, + pub checksum: u64, +} + +impl AgentRunArtifact { + // Build the canonical artifact from the current runner result plus DSL assertion outcomes. + pub fn from_run_result( + case: &TestCaseDsl, + result: &AgentRunResult, + assertions: Vec, + ) -> Self { + Self { + case_name: case.name.clone(), + status: if result.is_success() && assertions.iter().all(|item| item.passed) { + "passed".to_string() + } else { + "failed".to_string() + }, + output_text: result.output_text(), + runner_error: result.error.as_ref().map(ToString::to_string), + duration_ms: result.duration.as_millis() as u64, + started_at_ms: result.metadata.started_at.timestamp_millis() as u64, + execution_id: result.metadata.execution_id.clone(), + session_id: result.metadata.session_id.clone(), + workspace_root: result.metadata.workspace_root.display().to_string(), + agent: AgentArtifact { + id: result.metadata.agent_id.clone(), + name: result.metadata.agent_name.clone(), + }, + assertions, + tool_calls: result + .metadata + .tool_calls + .iter() + .map(tool_call_artifact) + .collect(), + llm_request: result + .metadata + .llm_last_request + .as_ref() + .map(|request| LlmRequestArtifact { + model: request.model.clone(), + temperature: request.temperature, + max_tokens: request.max_tokens, + messages: request + .messages + .iter() + .map(|message| LlmMessageArtifact { + role: message.role.clone(), + content: message.content.clone(), + tool_call_id: message.tool_call_id.clone(), + tool_calls: message + .tool_calls + .clone() + .unwrap_or_default() + .into_iter() + .map(llm_tool_call_artifact) + .collect(), + }) + .collect(), + tool_names: request + .tools + .clone() + .unwrap_or_default() + .into_iter() + .map(|tool| tool.name) + .collect(), + }), + llm_response: result + .metadata + .llm_last_response + .as_ref() + .map(|response| LlmResponseArtifact { + content: response.content.clone(), + tool_calls: response + .tool_calls + .clone() + .unwrap_or_default() + .into_iter() + .map(llm_tool_call_artifact) + .collect(), + usage: response.usage.as_ref().map(|usage| TokenUsageArtifact { + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + }), + }), + session_snapshot: result + .metadata + .session_snapshot + .as_ref() + .map(session_artifact), + workspace_before: workspace_snapshot_artifact(&result.metadata.workspace_snapshot_before), + workspace_after: workspace_snapshot_artifact(&result.metadata.workspace_snapshot_after), + } + } +} + +fn tool_call_artifact(record: &ToolCallRecord) -> ToolCallArtifact { + ToolCallArtifact { + tool_name: record.tool_name.clone(), + input: record.input.clone(), + output: record.output.clone(), + success: record.success, + duration_ms: record.duration_ms, + timed_out: record.timed_out, + } +} + +fn llm_tool_call_artifact(tool_call: mofa_kernel::agent::types::ToolCall) -> LlmToolCallArtifact { + LlmToolCallArtifact { + id: tool_call.id, + name: tool_call.name, + arguments: tool_call.arguments, + } +} + +// Session snapshots are reduced to ordered role/content pairs for stable comparisons. +fn session_artifact(session: &Session) -> SessionArtifact { + SessionArtifact { + messages: session + .messages + .iter() + .map(|message| SessionMessageArtifact { + role: message.role.clone(), + content: message.content.clone(), + }) + .collect(), + } +} + +// Workspace snapshots preserve a compact file-level view before and after execution. +fn workspace_snapshot_artifact(snapshot: &WorkspaceSnapshot) -> WorkspaceSnapshotArtifact { + WorkspaceSnapshotArtifact { + files: snapshot.files.iter().map(workspace_file_artifact).collect(), + } +} + +fn workspace_file_artifact(file: &WorkspaceFileSnapshot) -> WorkspaceFileArtifact { + WorkspaceFileArtifact { + relative_path: file.relative_path.clone(), + size_bytes: file.size_bytes, + modified_ms: file.modified_ms, + checksum: file.checksum, + } +} diff --git a/tests/src/dsl.rs b/tests/src/dsl.rs index 1b265b1ac..deb730f57 100644 --- a/tests/src/dsl.rs +++ b/tests/src/dsl.rs @@ -7,7 +7,7 @@ use crate::agent_runner::{AgentRunResult, AgentRunnerError, AgentTestRunner}; use crate::tools::MockTool; use mofa_foundation::agent::context::prompt::AgentIdentity; use mofa_kernel::agent::components::tool::ToolResult; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use std::path::Path; use thiserror::Error; @@ -102,6 +102,14 @@ pub struct AssertDsl { pub tool_called: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AssertionOutcome { + pub kind: String, + pub expected: Value, + pub actual: Value, + pub passed: bool, +} + impl TestCaseDsl { pub fn from_toml_str(input: &str) -> Result { Ok(toml::from_str(input)?) @@ -121,35 +129,18 @@ impl TestCaseDsl { } pub async fn run_test_case(case: &TestCaseDsl) -> Result { + let result = execute_test_case(case).await?; + let assertions = collect_assertion_outcomes(case, &result); + if let Some(error) = assertion_error_from_outcomes(&assertions) { + return Err(error); + } + Ok(result) +} + +pub async fn execute_test_case(case: &TestCaseDsl) -> Result { let mut runner = AgentTestRunner::new().await?; configure_runner_from_test_case(case, &mut runner).await?; let result = runner.run_text(case.execution_input()?).await?; - - if let Some(expected) = expected_contains(case) { - let actual = result.output_text().ok_or(DslError::MissingOutput)?; - if !actual.contains(expected) { - return Err(DslError::ExpectedContains { - expected: expected.to_string(), - actual, - }); - } - } - - if let Some(expected_tool) = expected_tool_call(case) { - let actual = result - .metadata - .tool_calls - .iter() - .map(|record| record.tool_name.clone()) - .collect::>(); - if !actual.iter().any(|tool| tool == expected_tool) { - return Err(DslError::ExpectedToolCall { - tool: expected_tool.to_string(), - actual, - }); - } - } - runner.shutdown().await?; Ok(result) } @@ -240,3 +231,79 @@ fn expected_tool_call(case: &TestCaseDsl) -> Option<&str> { .as_ref() .and_then(|assertions| assertions.tool_called.as_deref()) } + +pub fn collect_assertion_outcomes(case: &TestCaseDsl, result: &AgentRunResult) -> Vec { + let mut outcomes = Vec::new(); + + if let Some(expected) = expected_contains(case) { + let actual = result.output_text(); + outcomes.push(AssertionOutcome { + kind: "contains".to_string(), + expected: Value::String(expected.to_string()), + actual: actual + .clone() + .map(Value::String) + .unwrap_or(Value::Null), + passed: actual + .as_ref() + .map(|value| value.contains(expected)) + .unwrap_or(false), + }); + } + + if let Some(expected_tool) = expected_tool_call(case) { + let actual = result + .metadata + .tool_calls + .iter() + .map(|record| Value::String(record.tool_name.clone())) + .collect::>(); + outcomes.push(AssertionOutcome { + kind: "tool_called".to_string(), + expected: Value::String(expected_tool.to_string()), + actual: Value::Array(actual.clone()), + passed: actual + .iter() + .any(|tool| tool.as_str() == Some(expected_tool)), + }); + } + + outcomes +} + +pub fn assertion_error_from_outcomes(outcomes: &[AssertionOutcome]) -> Option { + for outcome in outcomes { + if outcome.passed { + continue; + } + + match outcome.kind.as_str() { + "contains" => { + return if outcome.actual.is_null() { + Some(DslError::MissingOutput) + } else { + Some(DslError::ExpectedContains { + expected: outcome.expected.as_str().unwrap_or_default().to_string(), + actual: outcome.actual.as_str().unwrap_or_default().to_string(), + }) + }; + } + "tool_called" => { + let actual = outcome + .actual + .as_array() + .into_iter() + .flatten() + .filter_map(|value| value.as_str().map(ToString::to_string)) + .collect::>(); + return Some(DslError::ExpectedToolCall { + tool: outcome.expected.as_str().unwrap_or_default().to_string(), + actual, + }); + } + _ => continue, + } + } + + None +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 91e834b82..2490ae668 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -5,6 +5,7 @@ pub mod adversarial; pub mod agent_runner; +pub mod artifact; pub mod assertions; pub mod backend; pub mod bus; @@ -17,13 +18,19 @@ pub use backend::MockLLMBackend; pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; pub use dsl::{ - configure_runner_from_test_case, run_test_case, AgentDsl, AssertDsl, BootstrapFileDsl, + assertion_error_from_outcomes, collect_assertion_outcomes, configure_runner_from_test_case, + execute_test_case, run_test_case, AgentDsl, AssertDsl, AssertionOutcome, BootstrapFileDsl, DslError, LlmDsl, LlmStepDsl, LlmStepKind, TestCaseDsl, ToolDsl, }; pub use agent_runner::{ AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot, }; +pub use artifact::{ + AgentArtifact, AgentRunArtifact, LlmMessageArtifact, LlmRequestArtifact, LlmResponseArtifact, + LlmToolCallArtifact, SessionArtifact, SessionMessageArtifact, TokenUsageArtifact, + ToolCallArtifact, WorkspaceFileArtifact, WorkspaceSnapshotArtifact, +}; pub use report::{ JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestReportBuilder, TestStatus, TextFormatter, diff --git a/tests/tests/artifact_tests.rs b/tests/tests/artifact_tests.rs new file mode 100644 index 000000000..6876ef661 --- /dev/null +++ b/tests/tests/artifact_tests.rs @@ -0,0 +1,60 @@ +//! Tests for canonical DSL run artifact generation. + +use mofa_testing::{ + AgentRunArtifact, TestCaseDsl, assertion_error_from_outcomes, collect_assertion_outcomes, + execute_test_case, +}; + +#[tokio::test] +async fn artifact_contains_core_run_data() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/tool_agent.toml" + )) + .expect("tool DSL example should parse"); + + let result = execute_test_case(&case) + .await + .expect("DSL case should execute"); + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions); + + assert_eq!(artifact.case_name, "tool_agent_run"); + assert_eq!(artifact.status, "passed"); + assert_eq!(artifact.output_text.as_deref(), Some("Tool execution complete")); + assert_eq!(artifact.tool_calls.len(), 1); + assert_eq!(artifact.tool_calls[0].tool_name, "echo_tool"); + assert!(!artifact.agent.name.is_empty()); + assert!(artifact.llm_request.is_some()); + assert!(artifact.workspace_after.files.iter().any(|file| { + file.relative_path.ends_with(".jsonl") + })); +} + +#[tokio::test] +async fn artifact_captures_failed_assertions() { + let case = TestCaseDsl::from_toml_str( + r#" +name = "failing_case" +prompt = "Say hello" + +[llm] +responses = ["wrong output"] + +[assert] +contains = "expected text" +"#, + ) + .expect("inline DSL should parse"); + + let result = execute_test_case(&case) + .await + .expect("DSL case should execute"); + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions.clone()); + + assert_eq!(artifact.status, "failed"); + assert_eq!(artifact.assertions.len(), 1); + assert!(!artifact.assertions[0].passed); + assert!(assertion_error_from_outcomes(&assertions).is_some()); +} From 70b6708d879ec6a4116dc200717fde98e5b56206 Mon Sep 17 00:00:00 2001 From: AdityaShome Date: Wed, 1 Apr 2026 10:03:21 +0530 Subject: [PATCH 12/12] test(testing): add missing bootstrap DSL fixture --- tests/examples/bootstrap_agent.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/examples/bootstrap_agent.toml diff --git a/tests/examples/bootstrap_agent.toml b/tests/examples/bootstrap_agent.toml new file mode 100644 index 000000000..9dd75291c --- /dev/null +++ b/tests/examples/bootstrap_agent.toml @@ -0,0 +1,13 @@ +name = "bootstrap_agent_run" +prompt = "What file was loaded?" +expected_text = "Bootstrapped" + +[[bootstrap_files]] +path = "AGENTS.md" +content = "Bootstrapped instructions for the DSL test." + +[llm] +responses = ["Bootstrapped response"] + +[assert] +contains = "Bootstrapped" \ No newline at end of file