diff --git a/Cargo.toml b/Cargo.toml index f71801118..aca372a14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,9 @@ lazy_static = "1.4" # Actor framework for ReAct agents ractor = "0" +# TOML deserialization (also used transitively by config) +toml = "0.8" + # Configuration file support (multi-format) config = { version = "0.14", features = [ "toml", diff --git a/crates/mofa-cli/Cargo.toml b/crates/mofa-cli/Cargo.toml index 875ae62a6..5a9d593d6 100644 --- a/crates/mofa-cli/Cargo.toml +++ b/crates/mofa-cli/Cargo.toml @@ -25,6 +25,7 @@ mofa-kernel = { path = "../mofa-kernel", version = "0.1", features = [ ] } mofa-runtime = { path = "../mofa-runtime", version = "0.1" } mofa-foundation = { path = "../mofa-foundation", version = "0.1" } +mofa-testing = { path = "../../tests", version = "0.1" } config.workspace = true tokio = { workspace = true } thiserror = { workspace = true } diff --git a/crates/mofa-cli/src/cli.rs b/crates/mofa-cli/src/cli.rs index 7e10ad479..866ef80e9 100644 --- a/crates/mofa-cli/src/cli.rs +++ b/crates/mofa-cli/src/cli.rs @@ -81,6 +81,24 @@ pub enum Commands { dora: bool, }, + /// Run a testing DSL case file + TestDsl { + /// TOML DSL file to execute + file: PathBuf, + + /// Optional canonical artifact file path + #[arg(long)] + artifact_out: Option, + + /// Optional report file path + #[arg(long)] + report_out: Option, + + /// Report file format + #[arg(long, value_enum, default_value_t = TestDslReportFormat::Json)] + report_format: TestDslReportFormat, + }, + /// Run a dora dataflow #[cfg(feature = "dora")] Dataflow { @@ -219,6 +237,12 @@ pub enum DatabaseType { Sqlite, } +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq)] +pub enum TestDslReportFormat { + Json, + Text, +} + impl std::fmt::Display for DatabaseType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -725,6 +749,38 @@ mod tests { assert!(parsed.is_ok(), "doctor ci strict json should parse"); } + #[test] + fn test_test_dsl_parses() { + let parsed = Cli::try_parse_from(["mofa", "test-dsl", "tests/examples/simple_agent.toml"]); + assert!(parsed.is_ok(), "test-dsl command should parse"); + } + + #[test] + fn test_test_dsl_report_flags_parse() { + let parsed = Cli::try_parse_from([ + "mofa", + "test-dsl", + "tests/examples/simple_agent.toml", + "--report-out", + "/tmp/report.json", + "--report-format", + "json", + ]); + assert!(parsed.is_ok(), "test-dsl report flags should parse"); + } + + #[test] + fn test_test_dsl_artifact_flag_parses() { + let parsed = Cli::try_parse_from([ + "mofa", + "test-dsl", + "tests/examples/simple_agent.toml", + "--artifact-out", + "/tmp/artifact.json", + ]); + assert!(parsed.is_ok(), "test-dsl artifact flag should parse"); + } + #[test] fn test_rag_index_parses() { let parsed = Cli::try_parse_from([ diff --git a/crates/mofa-cli/src/commands/mod.rs b/crates/mofa-cli/src/commands/mod.rs index 0fb02a9d3..1b798f63f 100644 --- a/crates/mofa-cli/src/commands/mod.rs +++ b/crates/mofa-cli/src/commands/mod.rs @@ -11,5 +11,6 @@ pub mod new; pub mod plugin; pub mod rag; pub mod run; +pub mod test_dsl; pub mod session; pub mod tool; diff --git a/crates/mofa-cli/src/commands/test_dsl.rs b/crates/mofa-cli/src/commands/test_dsl.rs new file mode 100644 index 000000000..397e55a39 --- /dev/null +++ b/crates/mofa-cli/src/commands/test_dsl.rs @@ -0,0 +1,151 @@ +//! `mofa test-dsl` command implementation + +use crate::CliError; +use crate::cli::TestDslReportFormat; +use crate::output::OutputFormat; +use mofa_testing::{ + AgentRunArtifact, DslError, JsonFormatter, ReportFormatter, TestCaseResult, TestReport, + TestStatus, TextFormatter, TestCaseDsl, assertion_error_from_outcomes, + collect_assertion_outcomes, execute_test_case, +}; +use serde::Serialize; +use serde_json::json; +use std::path::Path; + +#[derive(Debug, Serialize)] +struct TestDslSummary { + name: String, + success: bool, + output_text: Option, + duration_ms: u128, + tool_calls: Vec, + workspace_root: String, +} + +/// Execute one TOML DSL test case through the testing runner. +pub async fn run( + path: &Path, + format: OutputFormat, + artifact_out: Option<&Path>, + report_out: Option<&Path>, + report_format: TestDslReportFormat, +) -> Result<(), CliError> { + let case = TestCaseDsl::from_toml_file(path).map_err(map_dsl_error)?; + let result = execute_test_case(&case).await.map_err(map_dsl_error)?; + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions.clone()); + let report = build_report(&artifact); + + if let Some(artifact_out) = artifact_out { + write_artifact(artifact_out, &artifact)?; + } + + if let Some(report_out) = report_out { + write_report(report_out, report_format, &report)?; + } + + let summary = TestDslSummary { + name: case.name, + success: result.is_success(), + output_text: result.output_text(), + duration_ms: result.duration.as_millis(), + tool_calls: result + .metadata + .tool_calls + .iter() + .map(|record| record.tool_name.clone()) + .collect(), + workspace_root: result.metadata.workspace_root.display().to_string(), + }; + + match format { + OutputFormat::Json => { + let output = json!({ + "success": true, + "case": summary, + }); + println!("{}", serde_json::to_string_pretty(&output)?); + } + _ => { + println!("case: {}", summary.name); + println!("status: {}", if summary.success { "passed" } else { "failed" }); + if let Some(output_text) = &summary.output_text { + println!("output: {}", output_text); + } + if !summary.tool_calls.is_empty() { + println!("tool_calls: {}", summary.tool_calls.join(", ")); + } + println!("duration_ms: {}", summary.duration_ms); + } + } + + if let Some(error) = assertion_error_from_outcomes(&assertions) { + return Err(map_dsl_error(error)); + } + + Ok(()) +} + +fn build_report(artifact: &AgentRunArtifact) -> TestReport { + let status = if artifact.status == "passed" { + TestStatus::Passed + } else { + TestStatus::Failed + }; + let error = artifact + .runner_error + .clone() + .or_else(|| { + artifact + .assertions + .iter() + .find(|item| !item.passed) + .map(|item| format!("assertion failed: {}", item.kind)) + }); + let metadata = vec![ + ( + "execution_id".to_string(), + artifact.execution_id.clone(), + ), + ( + "workspace_root".to_string(), + artifact.workspace_root.clone(), + ), + ( + "tool_calls".to_string(), + artifact.tool_calls.len().to_string(), + ), + ]; + + TestReport { + suite_name: "dsl".to_string(), + results: vec![TestCaseResult { + name: artifact.case_name.clone(), + status, + duration: std::time::Duration::from_millis(artifact.duration_ms), + error, + metadata, + }], + total_duration: std::time::Duration::from_millis(artifact.duration_ms), + timestamp: artifact.started_at_ms, + } +} + +fn write_artifact(path: &Path, artifact: &AgentRunArtifact) -> Result<(), CliError> { + let body = serde_json::to_string_pretty(artifact)?; + std::fs::write(path, body)?; + Ok(()) +} + +fn write_report(path: &Path, format: TestDslReportFormat, report: &TestReport) -> Result<(), CliError> { + let body = match format { + TestDslReportFormat::Json => JsonFormatter.format(report), + TestDslReportFormat::Text => TextFormatter.format(report), + }; + std::fs::write(path, body)?; + Ok(()) +} + +fn map_dsl_error(error: DslError) -> CliError { + CliError::Other(format!("DSL test failed: {error}")) +} diff --git a/crates/mofa-cli/src/main.rs b/crates/mofa-cli/src/main.rs index 749fe56f7..089bcc6ac 100644 --- a/crates/mofa-cli/src/main.rs +++ b/crates/mofa-cli/src/main.rs @@ -75,6 +75,7 @@ fn main() { async fn run_command(cli: Cli) -> CliResult<()> { use cli::Commands; + let output_format = cli.output_format.unwrap_or_default(); // Initialize context for commands that need backend services let needs_context = matches!( @@ -121,6 +122,24 @@ async fn run_command(cli: Cli) -> CliResult<()> { commands::run::run(&config, dora)?; } + Some(Commands::TestDsl { + file, + artifact_out, + report_out, + report_format, + }) => { + commands::test_dsl::run( + &file, + output_format, + artifact_out.as_deref(), + report_out.as_deref(), + report_format, + ) + .await + .into_report() + .attach_with(|| format!("running DSL test case from {}", file.display()))?; + } + #[cfg(feature = "dora")] Some(Commands::Dataflow { file, uv }) => { commands::run::run_dataflow(&file, uv)?; diff --git a/crates/mofa-cli/tests/test_dsl_integration_tests.rs b/crates/mofa-cli/tests/test_dsl_integration_tests.rs new file mode 100644 index 000000000..1da5a294e --- /dev/null +++ b/crates/mofa-cli/tests/test_dsl_integration_tests.rs @@ -0,0 +1,121 @@ +//! Integration tests for `mofa test-dsl`. + +use assert_cmd::Command; +use predicates::prelude::*; +use tempfile::tempdir; + +#[test] +fn test_dsl_command_runs_example_case() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/simple_agent.toml" + ); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args(["test-dsl", case_path]) + .assert() + .success() + .stdout(predicate::str::contains("status: passed")) + .stdout(predicate::str::contains("output: hello from DSL")); +} + +#[test] +fn test_dsl_command_emits_json() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args(["--output-format", "json", "test-dsl", case_path]) + .assert() + .success() + .stdout(predicate::str::contains("\"success\": true")) + .stdout(predicate::str::contains("\"tool_calls\"")) + .stdout(predicate::str::contains("\"echo_tool\"")); +} + +#[test] +fn test_dsl_command_writes_json_report_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/simple_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let report_path = temp.path().join("dsl-report.json"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--report-out", + report_path.to_str().expect("utf8 report path"), + "--report-format", + "json", + ]) + .assert() + .success(); + + let report = std::fs::read_to_string(&report_path).expect("report file exists"); + assert!(report.contains("\"suite\": \"dsl\"")); + assert!(report.contains("\"name\": \"simple_agent_run\"")); + assert!(report.contains("\"status\": \"passed\"")); +} + +#[test] +fn test_dsl_command_writes_text_report_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let report_path = temp.path().join("dsl-report.txt"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--report-out", + report_path.to_str().expect("utf8 report path"), + "--report-format", + "text", + ]) + .assert() + .success(); + + let report = std::fs::read_to_string(&report_path).expect("report file exists"); + assert!(report.contains("=== dsl ===")); + assert!(report.contains("tool_agent_run")); + assert!(report.contains("[+]")); +} + +#[test] +fn test_dsl_command_writes_canonical_artifact_file() { + let case_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/examples/tool_agent.toml" + ); + let temp = tempdir().expect("temp dir"); + let artifact_path = temp.path().join("dsl-artifact.json"); + + Command::cargo_bin("mofa") + .expect("mofa bin") + .args([ + "test-dsl", + case_path, + "--artifact-out", + artifact_path.to_str().expect("utf8 artifact path"), + ]) + .assert() + .success(); + + let artifact = std::fs::read_to_string(&artifact_path).expect("artifact file exists"); + assert!(artifact.contains("\"case_name\": \"tool_agent_run\"")); + assert!(artifact.contains("\"status\": \"passed\"")); + assert!(artifact.contains("\"assertions\"")); + assert!(artifact.contains("\"tool_calls\"")); +} diff --git a/crates/mofa-foundation/src/agent/context/prompt.rs b/crates/mofa-foundation/src/agent/context/prompt.rs index 04ec30bd6..87cc8a127 100644 --- a/crates/mofa-foundation/src/agent/context/prompt.rs +++ b/crates/mofa-foundation/src/agent/context/prompt.rs @@ -142,6 +142,17 @@ impl PromptContext { self } + /// Replace the agent identity. + pub fn set_identity(&mut self, identity: AgentIdentity) { + self.agent_name = identity.name.clone(); + self.identity = identity; + } + + /// Replace the bootstrap file list. + pub fn set_bootstrap_files(&mut self, files: Vec) { + self.bootstrap_files = files; + } + /// Set skills that should always be loaded pub fn with_always_load(mut self, skills: Vec) -> Self { self.always_load = skills; diff --git a/crates/mofa-foundation/src/agent/executor.rs b/crates/mofa-foundation/src/agent/executor.rs index 605ab8fe0..15dca5065 100644 --- a/crates/mofa-foundation/src/agent/executor.rs +++ b/crates/mofa-foundation/src/agent/executor.rs @@ -548,6 +548,15 @@ impl AgentExecutor { &self.config } + /// Update the prompt context (system prompt builder). + pub async fn update_prompt_context(&self, updater: F) + where + F: FnOnce(&mut PromptContext), + { + let mut ctx = self.context.write().await; + updater(&mut ctx); + } + /// Get mutable reference to base agent pub fn base_mut(&mut self) -> &mut BaseAgent { &mut self.base @@ -586,7 +595,9 @@ impl MoFAAgent for AgentExecutor { self.base.initialize(ctx).await?; // Additional executor-specific initialization - self.base.transition_to(AgentState::Ready)?; + if self.base.state() != AgentState::Ready { + self.base.transition_to(AgentState::Ready)?; + } Ok(()) } @@ -643,7 +654,10 @@ mod tests { "mock" } - async fn chat(&self, _request: ChatCompletionRequest) -> AgentResult { + async fn chat( + &self, + _request: ChatCompletionRequest, + ) -> AgentResult { Ok(ChatCompletionResponse { content: Some("ok".to_string()), tool_calls: Some(Vec::::new()), diff --git a/crates/mofa-runtime/src/runner.rs b/crates/mofa-runtime/src/runner.rs index 91ca5aaff..09a856636 100644 --- a/crates/mofa-runtime/src/runner.rs +++ b/crates/mofa-runtime/src/runner.rs @@ -349,6 +349,11 @@ impl AgentRunner { &self.context } + /// Update session ID in the execution context. + pub fn set_session_id(&mut self, session_id: Option) { + self.context.session_id = session_id; + } + /// 获取运行器状态 /// Get runner state pub async fn state(&self) -> RunnerState { diff --git a/examples/Cargo.toml b/examples/Cargo.toml index f952ce045..3da476607 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -1,6 +1,9 @@ [workspace] resolver = "3" members = [ + "agent_runner_basic", + "agent_runner_custom_session", + "agent_runner_tools", "cli_production_smoke", "cli_agent_logs_demo", "cli_plugin_lifecycle", diff --git a/examples/agent_runner_basic/Cargo.toml b/examples/agent_runner_basic/Cargo.toml new file mode 100644 index 000000000..e49ef8dea --- /dev/null +++ b/examples/agent_runner_basic/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "agent_runner_basic" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +tokio.workspace = true diff --git a/examples/agent_runner_basic/src/main.rs b/examples/agent_runner_basic/src/main.rs new file mode 100644 index 000000000..f49a93ee7 --- /dev/null +++ b/examples/agent_runner_basic/src/main.rs @@ -0,0 +1,29 @@ +use anyhow::Result; +use mofa_testing::AgentTestRunner; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + runner.mock_llm().add_response("Hello from the runner").await; + + let result = runner.run_text("hi").await?; + println!("Output: {}", result.output_text().unwrap_or_default()); + println!( + "Session: {}", + result + .metadata + .session_id + .as_deref() + .unwrap_or("") + ); + println!("Workspace: {}", result.metadata.workspace_root.display()); + println!( + "Runner stats: total={} success={} failed={}", + result.metadata.runner_stats_after.total_executions, + result.metadata.runner_stats_after.successful_executions, + result.metadata.runner_stats_after.failed_executions + ); + + runner.shutdown().await?; + Ok(()) +} diff --git a/examples/agent_runner_custom_session/Cargo.toml b/examples/agent_runner_custom_session/Cargo.toml new file mode 100644 index 000000000..52620ec07 --- /dev/null +++ b/examples/agent_runner_custom_session/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "agent_runner_custom_session" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +mofa-foundation = { path = "../../crates/mofa-foundation" } +tokio.workspace = true diff --git a/examples/agent_runner_custom_session/src/main.rs b/examples/agent_runner_custom_session/src/main.rs new file mode 100644 index 000000000..fc03881b8 --- /dev/null +++ b/examples/agent_runner_custom_session/src/main.rs @@ -0,0 +1,42 @@ +use anyhow::Result; +use mofa_foundation::agent::context::prompt::AgentIdentity; +use mofa_testing::AgentTestRunner; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + + runner.write_bootstrap_file("CUSTOM.md", "Custom bootstrap content.")?; + runner + .configure_prompt( + Some(AgentIdentity { + name: "RunnerDemo".to_string(), + description: "Custom identity for example runs".to_string(), + icon: None, + }), + Some(vec!["CUSTOM.md".to_string()]), + ) + .await; + + runner + .mock_llm() + .add_response("Custom session response") + .await; + + let result = runner + .run_text_with_session("demo-session", "hello session") + .await?; + + println!( + "Session id: {}", + result + .metadata + .session_id + .as_deref() + .unwrap_or("") + ); + println!("Output: {}", result.output_text().unwrap_or_default()); + + runner.shutdown().await?; + Ok(()) +} diff --git a/examples/agent_runner_tools/Cargo.toml b/examples/agent_runner_tools/Cargo.toml new file mode 100644 index 000000000..511578074 --- /dev/null +++ b/examples/agent_runner_tools/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "agent_runner_tools" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +mofa-testing = { path = "../../tests" } +serde_json.workspace = true +tokio.workspace = true diff --git a/examples/agent_runner_tools/src/main.rs b/examples/agent_runner_tools/src/main.rs new file mode 100644 index 000000000..7399abf8b --- /dev/null +++ b/examples/agent_runner_tools/src/main.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use mofa_testing::{AgentTestRunner, MockTool}; +use serde_json::json; + +#[tokio::main] +async fn main() -> Result<()> { + let mut runner = AgentTestRunner::new().await?; + + let tool = MockTool::new( + "echo_tool", + "Echo the provided input", + json!({ + "type": "object", + "properties": { + "input": { "type": "string" } + }, + "required": ["input"] + }), + ); + + runner.register_mock_tool(tool).await?; + + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner + .mock_llm() + .add_response("Tool response completed") + .await; + + let result = runner.run_text("use the tool").await?; + println!("Output: {}", result.output_text().unwrap_or_default()); + + for record in &result.metadata.tool_calls { + println!( + "Tool call: name={} input={} output={} duration_ms={:?}", + record.tool_name, + record.input, + record.output.as_ref().unwrap_or(&serde_json::Value::Null), + record.duration_ms + ); + } + + runner.shutdown().await?; + Ok(()) +} diff --git a/tests/Cargo.toml b/tests/Cargo.toml index a0597654f..7eec6a9f7 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -9,10 +9,14 @@ description = "Testing utilities for the MoFA agent framework" [dependencies] mofa-kernel = { path = "../crates/mofa-kernel" } mofa-foundation = { path = "../crates/mofa-foundation" } +mofa-runtime = { path = "../crates/mofa-runtime" } tokio = { workspace = true } async-trait = { workspace = true } anyhow = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } chrono = { workspace = true } +thiserror = { workspace = true } +uuid = { workspace = true } regex = { workspace = true } +toml = { workspace = true } diff --git a/tests/examples/bootstrap_agent.toml b/tests/examples/bootstrap_agent.toml new file mode 100644 index 000000000..9dd75291c --- /dev/null +++ b/tests/examples/bootstrap_agent.toml @@ -0,0 +1,13 @@ +name = "bootstrap_agent_run" +prompt = "What file was loaded?" +expected_text = "Bootstrapped" + +[[bootstrap_files]] +path = "AGENTS.md" +content = "Bootstrapped instructions for the DSL test." + +[llm] +responses = ["Bootstrapped response"] + +[assert] +contains = "Bootstrapped" \ No newline at end of file diff --git a/tests/examples/simple_agent.toml b/tests/examples/simple_agent.toml new file mode 100644 index 000000000..dc673d88a --- /dev/null +++ b/tests/examples/simple_agent.toml @@ -0,0 +1,9 @@ +name = "simple_agent_run" +prompt = "Say hello" +expected_text = "hello" + +[llm] +responses = ["hello from DSL"] + +[assert] +contains = "hello" diff --git a/tests/examples/tool_agent.toml b/tests/examples/tool_agent.toml new file mode 100644 index 000000000..81be62550 --- /dev/null +++ b/tests/examples/tool_agent.toml @@ -0,0 +1,27 @@ +name = "tool_agent_run" +input = "Use the echo tool and summarize the result." + +[agent] +name = "ToolAgent" +description = "Agent used to validate tool-aware DSL execution." + +[[tools]] +name = "echo_tool" +description = "Echo the provided input." +schema = { type = "object", properties = { input = { type = "string" } }, required = ["input"] } +result = "echoed from tool" + +[assert] +contains = "Tool execution complete" +tool_called = "echo_tool" + +[llm] + +[[llm.steps]] +type = "tool_call" +tool = "echo_tool" +arguments = { input = "ping" } + +[[llm.steps]] +type = "text" +content = "Tool execution complete" diff --git a/tests/src/agent_runner.rs b/tests/src/agent_runner.rs new file mode 100644 index 000000000..ca22c1c1b --- /dev/null +++ b/tests/src/agent_runner.rs @@ -0,0 +1,619 @@ +//! Real agent runner harness for integration-style tests. +//! +//! Provides a lightweight wrapper around the MoFA runtime `AgentRunner` +//! with an isolated workspace and deterministic mock LLM. + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use mofa_foundation::agent::context::prompt::AgentIdentity; +use mofa_foundation::agent::executor::{AgentExecutor, AgentExecutorConfig}; +use mofa_kernel::agent::context::AgentContext; +use mofa_kernel::agent::core::MoFAAgent; +use mofa_kernel::agent::error::{AgentError, AgentResult}; +use mofa_foundation::agent::components::tool::as_tool; +use mofa_foundation::agent::components::tool::SimpleTool; +use mofa_foundation::agent::session::{JsonlSessionStorage, Session, SessionStorage}; +use crate::tools::MockTool; +use mofa_kernel::agent::types::{AgentInput, AgentOutput, ChatCompletionRequest}; +use mofa_kernel::agent::types::{ChatCompletionResponse, ToolCall}; +use mofa_kernel::agent::AgentCapabilities; +use mofa_kernel::agent::AgentState; +use mofa_runtime::runner::{AgentRunner, RunnerState, RunnerStats}; +use std::collections::VecDeque; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use thiserror::Error; +use tokio::sync::RwLock; +use uuid::Uuid; + +/// Errors returned by the agent runner harness itself. +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum AgentRunnerError { + #[error("failed to create test workspace: {0}")] + WorkspaceIo(#[from] std::io::Error), + + #[error("agent runner failure: {0}")] + Agent(#[from] AgentError), +} + +/// Metadata captured for each run. +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct AgentRunMetadata { + pub agent_id: String, + pub agent_name: String, + pub execution_id: String, + pub session_id: Option, + pub workspace_root: PathBuf, + pub runner_state_before: RunnerState, + pub runner_state_after: RunnerState, + pub runner_stats_before: RunnerStats, + pub runner_stats_after: RunnerStats, + pub agent_state_before: AgentState, + pub agent_state_after: AgentState, + pub started_at: DateTime, + pub session_snapshot: Option, + pub tool_calls: Vec, + pub llm_last_request: Option, + pub llm_last_response: Option, + pub workspace_snapshot_before: WorkspaceSnapshot, + pub workspace_snapshot_after: WorkspaceSnapshot, +} + +/// Result of a single agent run. +#[derive(Debug)] +#[non_exhaustive] +pub struct AgentRunResult { + pub output: Option, + pub error: Option, + pub duration: Duration, + pub metadata: AgentRunMetadata, +} + +/// Captures a tool call with its input and output. +#[derive(Debug, Clone)] +pub struct ToolCallRecord { + pub tool_name: String, + pub input: serde_json::Value, + pub output: Option, + pub success: bool, + pub duration_ms: Option, + pub timed_out: bool, +} + +/// Snapshot of files in the test workspace. +#[derive(Debug, Clone)] +pub struct WorkspaceSnapshot { + pub files: Vec, +} + +#[derive(Debug, Clone)] +pub struct WorkspaceFileSnapshot { + pub relative_path: String, + pub size_bytes: u64, + pub modified_ms: Option, + pub checksum: u64, +} + +impl AgentRunResult { + pub fn is_success(&self) -> bool { + self.error.is_none() + } + + pub fn output_text(&self) -> Option { + self.output.as_ref().map(AgentOutput::to_text) + } +} + +/// Simple deterministic LLM provider for tests. +#[derive(Debug)] +pub struct MockAgentLLMProvider { + name: String, + responses: RwLock>, + default_response: RwLock, + last_request: RwLock>, + last_response: RwLock>, +} + +#[derive(Debug, Clone)] +enum MockLlmResponse { + Text(String), + ToolCall { + content: Option, + tool_calls: Vec, + }, + Error(String), +} + +impl MockAgentLLMProvider { + pub fn new(name: impl Into) -> Self { + Self { + name: name.into(), + responses: RwLock::new(VecDeque::new()), + default_response: RwLock::new("This is a mock response.".to_string()), + last_request: RwLock::new(None), + last_response: RwLock::new(None), + } + } + + pub async fn add_response(&self, response: impl Into) { + self.responses + .write() + .await + .push_back(MockLlmResponse::Text(response.into())); + } + + pub async fn add_tool_call_response( + &self, + tool_name: &str, + arguments: serde_json::Value, + content: Option, + ) { + let tool_call = ToolCall { + id: Uuid::now_v7().to_string(), + name: tool_name.to_string(), + arguments, + }; + self.responses.write().await.push_back(MockLlmResponse::ToolCall { + content, + tool_calls: vec![tool_call], + }); + } + + pub async fn add_error_response(&self, message: impl Into) { + self.responses + .write() + .await + .push_back(MockLlmResponse::Error(message.into())); + } + + pub async fn set_default_response(&self, response: impl Into) { + *self.default_response.write().await = response.into(); + } + + pub async fn pending_responses(&self) -> usize { + self.responses.read().await.len() + } + + pub async fn last_request(&self) -> Option { + self.last_request.read().await.clone() + } + + pub async fn last_response(&self) -> Option { + self.last_response.read().await.clone() + } +} + +#[async_trait] +impl mofa_kernel::agent::types::LLMProvider for MockAgentLLMProvider { + fn name(&self) -> &str { + &self.name + } + + async fn chat( + &self, + request: ChatCompletionRequest, + ) -> AgentResult { + *self.last_request.write().await = Some(request); + let response = { + let mut responses = self.responses.write().await; + if let Some(next) = responses.pop_front() { + next + } else { + MockLlmResponse::Text(self.default_response.read().await.clone()) + } + }; + + let response = match response { + MockLlmResponse::Text(content) => Ok(ChatCompletionResponse { + content: Some(content), + tool_calls: Some(Vec::::new()), + usage: None, + }), + MockLlmResponse::ToolCall { content, tool_calls } => Ok(ChatCompletionResponse { + content, + tool_calls: Some(tool_calls), + usage: None, + }), + MockLlmResponse::Error(message) => Err(AgentError::ExecutionFailed(message)), + }?; + + *self.last_response.write().await = Some(response.clone()); + Ok(response) + } +} + +struct SessionAwareExecutor { + executor: AgentExecutor, +} + +impl SessionAwareExecutor { + fn new(executor: AgentExecutor) -> Self { + Self { executor } + } + + async fn register_tool( + &self, + tool: Arc, + ) -> AgentResult<()> { + self.executor.register_tool(tool).await + } + + async fn update_prompt_context(&self, updater: F) + where + F: FnOnce(&mut mofa_foundation::agent::context::prompt::PromptContext), + { + self.executor.update_prompt_context(updater).await; + } +} + +#[async_trait] +impl MoFAAgent for SessionAwareExecutor { + fn id(&self) -> &str { + self.executor.id() + } + + fn name(&self) -> &str { + self.executor.name() + } + + fn capabilities(&self) -> &AgentCapabilities { + self.executor.capabilities() + } + + fn state(&self) -> mofa_kernel::agent::AgentState { + self.executor.state() + } + + async fn initialize(&mut self, ctx: &AgentContext) -> AgentResult<()> { + self.executor.initialize(ctx).await + } + + async fn execute( + &mut self, + input: AgentInput, + ctx: &AgentContext, + ) -> AgentResult { + let message = input.as_text().unwrap_or(""); + let session_key = ctx.session_id.as_deref().unwrap_or("default"); + let response = self.executor.process_message(session_key, message).await?; + Ok(AgentOutput::text(response)) + } + + async fn shutdown(&mut self) -> AgentResult<()> { + self.executor.shutdown().await + } +} + +struct TempWorkspace { + root: PathBuf, +} + +impl TempWorkspace { + fn new(prefix: &str) -> Result { + let root = std::env::temp_dir().join(format!("{}-{}", prefix, Uuid::now_v7())); + std::fs::create_dir_all(&root)?; + Ok(Self { root }) + } + + fn path(&self) -> &Path { + &self.root + } + + fn write_file(&self, relative_path: &Path, content: &str) -> Result { + let path = self.root.join(relative_path); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&path, content)?; + Ok(path) + } + + fn snapshot(&self) -> WorkspaceSnapshot { + let mut files = Vec::new(); + collect_workspace_files(&self.root, &self.root, &mut files); + files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path)); + WorkspaceSnapshot { files } + } +} + +impl Drop for TempWorkspace { + fn drop(&mut self) { + let _ = std::fs::remove_dir_all(&self.root); + } +} + +fn collect_workspace_files(root: &Path, current: &Path, files: &mut Vec) { + let entries = match std::fs::read_dir(current) { + Ok(entries) => entries, + Err(_) => return, + }; + + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + collect_workspace_files(root, &path, files); + continue; + } + + let metadata = match entry.metadata() { + Ok(metadata) => metadata, + Err(_) => continue, + }; + + let size_bytes = metadata.len(); + let modified_ms = metadata + .modified() + .ok() + .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|duration| duration.as_millis() as u64); + + let bytes = match std::fs::read(&path) { + Ok(bytes) => bytes, + Err(_) => Vec::new(), + }; + let checksum = hash_bytes(&bytes); + let relative_path = path + .strip_prefix(root) + .unwrap_or(&path) + .to_string_lossy() + .replace('\\', "/"); + + files.push(WorkspaceFileSnapshot { + relative_path, + size_bytes, + modified_ms, + checksum, + }); + } +} + +fn hash_bytes(bytes: &[u8]) -> u64 { + let mut hasher = DefaultHasher::new(); + bytes.hash(&mut hasher); + hasher.finish() +} + +/// Test harness for running real agent execution paths. +pub struct AgentTestRunner { + workspace: TempWorkspace, + session_id: String, + execution_id: String, + llm: Arc, + runner: AgentRunner, + mock_tools: Vec, +} + +impl AgentTestRunner { + pub async fn new() -> Result { + Self::with_config(AgentExecutorConfig::default()).await + } + + pub async fn with_config(config: AgentExecutorConfig) -> Result { + let workspace = TempWorkspace::new("mofa-agent-test")?; + let llm = Arc::new(MockAgentLLMProvider::new("mock-llm")); + let executor = AgentExecutor::with_config(llm.clone(), workspace.path(), config).await?; + let agent = SessionAwareExecutor::new(executor); + + let execution_id = Uuid::now_v7().to_string(); + let session_id = Uuid::now_v7().to_string(); + let context = AgentContext::with_session(&execution_id, &session_id); + + let runner = AgentRunner::with_context(agent, context).await?; + + Ok(Self { + workspace, + session_id, + execution_id, + llm, + runner, + mock_tools: Vec::new(), + }) + } + + pub fn workspace(&self) -> &Path { + self.workspace.path() + } + + pub fn session_id(&self) -> &str { + &self.session_id + } + + pub fn execution_id(&self) -> &str { + &self.execution_id + } + + pub fn mock_llm(&self) -> Arc { + Arc::clone(&self.llm) + } + + pub fn write_bootstrap_file( + &self, + filename: &str, + content: &str, + ) -> Result { + self.workspace.write_file(Path::new(filename), content) + } + + pub fn write_workspace_file( + &self, + relative_path: impl AsRef, + content: &str, + ) -> Result { + self.workspace.write_file(relative_path.as_ref(), content) + } + + pub async fn register_simple_tool(&self, tool: T) -> Result<(), AgentRunnerError> + where + T: mofa_foundation::agent::components::tool::SimpleTool + Send + Sync + 'static, + { + let tool_ref = as_tool(tool); + self.runner + .agent() + .register_tool(tool_ref) + .await + .map_err(AgentRunnerError::from) + } + + pub async fn register_mock_tool(&mut self, tool: MockTool) -> Result<(), AgentRunnerError> { + self.register_simple_tool(tool.clone()).await?; + self.mock_tools.push(tool); + Ok(()) + } + + pub async fn configure_prompt( + &self, + identity: Option, + bootstrap_files: Option>, + ) { + self.runner + .agent() + .update_prompt_context(|ctx| { + if let Some(identity) = identity { + ctx.set_identity(identity); + } + if let Some(files) = bootstrap_files { + ctx.set_bootstrap_files(files); + } + }) + .await; + } + + pub async fn run_text(&mut self, input: &str) -> Result { + self.run_input(AgentInput::text(input)).await + } + + pub async fn run_text_with_session( + &mut self, + session_id: &str, + input: &str, + ) -> Result { + let original_session = self.runner.context().session_id.clone(); + self.runner + .set_session_id(Some(session_id.to_string())); + let result = self.run_text(input).await; + self.runner.set_session_id(original_session); + result + } + + pub async fn run_texts( + &mut self, + inputs: &[&str], + ) -> Result, AgentRunnerError> { + let mut results = Vec::with_capacity(inputs.len()); + for input in inputs { + results.push(self.run_text(input).await?); + } + Ok(results) + } + + pub async fn run_input( + &mut self, + input: AgentInput, + ) -> Result { + let started_at = Utc::now(); + let runner_state_before = self.runner.state().await; + let runner_stats_before = self.runner.stats().await; + let agent_state_before = self.runner.agent_state(); + let workspace_snapshot_before = self.workspace.snapshot(); + let timer = Instant::now(); + let result = self.runner.execute(input).await; + let duration = timer.elapsed(); + let runner_state_after = self.runner.state().await; + let runner_stats_after = self.runner.stats().await; + let agent_state_after = self.runner.agent_state(); + let session_snapshot = self.load_session_snapshot().await; + let workspace_snapshot_after = self.workspace.snapshot(); + let tool_calls = self.collect_tool_calls().await; + let llm_last_request = self.llm.last_request().await; + let llm_last_response = self.llm.last_response().await; + + let (output, error) = match result { + Ok(output) => (Some(output), None), + Err(err) => (None, Some(err)), + }; + + let metadata = AgentRunMetadata { + agent_id: self.runner.agent().id().to_string(), + agent_name: self.runner.agent().name().to_string(), + execution_id: self.runner.context().execution_id.clone(), + session_id: self.runner.context().session_id.clone(), + workspace_root: self.workspace.path().to_path_buf(), + runner_state_before, + runner_state_after, + runner_stats_before, + runner_stats_after, + agent_state_before, + agent_state_after, + started_at, + session_snapshot, + tool_calls, + llm_last_request, + llm_last_response, + workspace_snapshot_before, + workspace_snapshot_after, + }; + + Ok(AgentRunResult { + output, + error, + duration, + metadata, + }) + } + + pub async fn shutdown(self) -> Result<(), AgentRunnerError> { + self.runner.shutdown().await?; + Ok(()) + } + + async fn load_session_snapshot(&self) -> Option { + let session_id = self.runner.context().session_id.as_deref()?; + let storage = JsonlSessionStorage::new(self.workspace.path()).await.ok()?; + storage.load(session_id).await.ok()? + } + + async fn collect_tool_calls(&self) -> Vec { + let mut records = Vec::new(); + for tool in &self.mock_tools { + let calls = tool.history().await; + let results = tool.results().await; + for (idx, call) in calls.into_iter().enumerate() { + let result = results.get(idx).cloned(); + let (output, success, duration_ms, timed_out) = match result { + Some(result) => { + let duration_ms = result + .metadata + .get("duration_ms") + .and_then(|value| value.parse::().ok()); + let timed_out = result + .error + .as_ref() + .map(|err| err.contains("timed out")) + .unwrap_or(false); + ( + Some(result.output.clone()), + result.success, + duration_ms, + timed_out, + ) + } + None => (None, false, None, false), + }; + records.push(ToolCallRecord { + tool_name: tool.name().to_string(), + input: call.arguments, + output, + success, + duration_ms, + timed_out, + }); + } + } + records + } +} diff --git a/tests/src/artifact.rs b/tests/src/artifact.rs new file mode 100644 index 000000000..b121cd515 --- /dev/null +++ b/tests/src/artifact.rs @@ -0,0 +1,255 @@ +//! Canonical run artifacts for DSL-backed agent test execution. +//! +//! These types provide the stable, serializable output model for DSL runs, +//! built from the existing runner result. + +use crate::agent_runner::{AgentRunResult, ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot}; +use crate::dsl::{AssertionOutcome, TestCaseDsl}; +use mofa_foundation::agent::session::Session; +use serde::{Deserialize, Serialize}; + +// Top-level artifact emitted for a single DSL-backed case execution. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentRunArtifact { + pub case_name: String, + pub status: String, + pub output_text: Option, + pub runner_error: Option, + pub duration_ms: u64, + pub started_at_ms: u64, + pub execution_id: String, + pub session_id: Option, + pub workspace_root: String, + pub agent: AgentArtifact, + pub assertions: Vec, + pub tool_calls: Vec, + pub llm_request: Option, + pub llm_response: Option, + pub session_snapshot: Option, + pub workspace_before: WorkspaceSnapshotArtifact, + pub workspace_after: WorkspaceSnapshotArtifact, +} + +// Compact identity data for the agent used by the run. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentArtifact { + pub id: String, + pub name: String, +} + +// Tool execution records are flattened into the artifact for downstream checks. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolCallArtifact { + pub tool_name: String, + pub input: serde_json::Value, + pub output: Option, + pub success: bool, + pub duration_ms: Option, + pub timed_out: bool, +} + +// LLM request/response types keep only the fields needed for stable inspection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmRequestArtifact { + pub model: Option, + pub temperature: Option, + pub max_tokens: Option, + pub messages: Vec, + pub tool_names: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmResponseArtifact { + pub content: Option, + pub tool_calls: Vec, + pub usage: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmMessageArtifact { + pub role: String, + pub content: Option, + pub tool_call_id: Option, + pub tool_calls: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmToolCallArtifact { + pub id: String, + pub name: String, + pub arguments: serde_json::Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TokenUsageArtifact { + pub prompt_tokens: u32, + pub completion_tokens: u32, + pub total_tokens: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SessionArtifact { + pub messages: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SessionMessageArtifact { + pub role: String, + pub content: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkspaceSnapshotArtifact { + pub files: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkspaceFileArtifact { + pub relative_path: String, + pub size_bytes: u64, + pub modified_ms: Option, + pub checksum: u64, +} + +impl AgentRunArtifact { + // Build the canonical artifact from the current runner result plus DSL assertion outcomes. + pub fn from_run_result( + case: &TestCaseDsl, + result: &AgentRunResult, + assertions: Vec, + ) -> Self { + Self { + case_name: case.name.clone(), + status: if result.is_success() && assertions.iter().all(|item| item.passed) { + "passed".to_string() + } else { + "failed".to_string() + }, + output_text: result.output_text(), + runner_error: result.error.as_ref().map(ToString::to_string), + duration_ms: result.duration.as_millis() as u64, + started_at_ms: result.metadata.started_at.timestamp_millis() as u64, + execution_id: result.metadata.execution_id.clone(), + session_id: result.metadata.session_id.clone(), + workspace_root: result.metadata.workspace_root.display().to_string(), + agent: AgentArtifact { + id: result.metadata.agent_id.clone(), + name: result.metadata.agent_name.clone(), + }, + assertions, + tool_calls: result + .metadata + .tool_calls + .iter() + .map(tool_call_artifact) + .collect(), + llm_request: result + .metadata + .llm_last_request + .as_ref() + .map(|request| LlmRequestArtifact { + model: request.model.clone(), + temperature: request.temperature, + max_tokens: request.max_tokens, + messages: request + .messages + .iter() + .map(|message| LlmMessageArtifact { + role: message.role.clone(), + content: message.content.clone(), + tool_call_id: message.tool_call_id.clone(), + tool_calls: message + .tool_calls + .clone() + .unwrap_or_default() + .into_iter() + .map(llm_tool_call_artifact) + .collect(), + }) + .collect(), + tool_names: request + .tools + .clone() + .unwrap_or_default() + .into_iter() + .map(|tool| tool.name) + .collect(), + }), + llm_response: result + .metadata + .llm_last_response + .as_ref() + .map(|response| LlmResponseArtifact { + content: response.content.clone(), + tool_calls: response + .tool_calls + .clone() + .unwrap_or_default() + .into_iter() + .map(llm_tool_call_artifact) + .collect(), + usage: response.usage.as_ref().map(|usage| TokenUsageArtifact { + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + }), + }), + session_snapshot: result + .metadata + .session_snapshot + .as_ref() + .map(session_artifact), + workspace_before: workspace_snapshot_artifact(&result.metadata.workspace_snapshot_before), + workspace_after: workspace_snapshot_artifact(&result.metadata.workspace_snapshot_after), + } + } +} + +fn tool_call_artifact(record: &ToolCallRecord) -> ToolCallArtifact { + ToolCallArtifact { + tool_name: record.tool_name.clone(), + input: record.input.clone(), + output: record.output.clone(), + success: record.success, + duration_ms: record.duration_ms, + timed_out: record.timed_out, + } +} + +fn llm_tool_call_artifact(tool_call: mofa_kernel::agent::types::ToolCall) -> LlmToolCallArtifact { + LlmToolCallArtifact { + id: tool_call.id, + name: tool_call.name, + arguments: tool_call.arguments, + } +} + +// Session snapshots are reduced to ordered role/content pairs for stable comparisons. +fn session_artifact(session: &Session) -> SessionArtifact { + SessionArtifact { + messages: session + .messages + .iter() + .map(|message| SessionMessageArtifact { + role: message.role.clone(), + content: message.content.clone(), + }) + .collect(), + } +} + +// Workspace snapshots preserve a compact file-level view before and after execution. +fn workspace_snapshot_artifact(snapshot: &WorkspaceSnapshot) -> WorkspaceSnapshotArtifact { + WorkspaceSnapshotArtifact { + files: snapshot.files.iter().map(workspace_file_artifact).collect(), + } +} + +fn workspace_file_artifact(file: &WorkspaceFileSnapshot) -> WorkspaceFileArtifact { + WorkspaceFileArtifact { + relative_path: file.relative_path.clone(), + size_bytes: file.size_bytes, + modified_ms: file.modified_ms, + checksum: file.checksum, + } +} diff --git a/tests/src/assertions.rs b/tests/src/assertions.rs index 1a0b58594..946efe134 100644 --- a/tests/src/assertions.rs +++ b/tests/src/assertions.rs @@ -84,3 +84,173 @@ macro_rules! assert_bus_message_sent { ); }}; } + +/// Assert a session's messages match the expected (role, content) pairs. +pub fn assert_session_messages( + session: &mofa_foundation::agent::session::Session, + expected: &[(&str, &str)], +) { + assert_eq!( + session.messages.len(), + expected.len(), + "Expected {} session messages, got {}", + expected.len(), + session.messages.len() + ); + + for (idx, (role, content)) in expected.iter().enumerate() { + let msg = &session.messages[idx]; + assert_eq!( + msg.role, *role, + "Expected role '{}' at index {}, got '{}'", + role, idx, msg.role + ); + assert_eq!( + msg.content, *content, + "Expected content '{}' at index {}, got '{}'", + content, idx, msg.content + ); + } +} + +/// Assert the most recent tool result matches the expected JSON output. +/// +/// # Example +/// ```ignore +/// assert_tool_last_result!(tool, json!("done")); +/// ``` +#[macro_export] +macro_rules! assert_tool_last_result { + ($tool:expr, $expected:expr) => {{ + let result = $tool + .last_result() + .await + .expect("Expected tool to have a result, but it was never executed"); + let expected = $expected; + assert_eq!( + result.output, expected, + "Expected latest tool result {:?}, got {:?}", + expected, result.output + ); + }}; +} + +/// Assert the agent run produced the expected output text. +/// +/// # Example +/// ```ignore +/// assert_agent_output_text!(result, "hello"); +/// ``` +#[macro_export] +macro_rules! assert_agent_output_text { + ($result:expr, $expected:expr) => {{ + let expected = $expected; + let actual = $result.output_text(); + assert_eq!( + actual.as_deref(), + Some(expected), + "Expected agent output {:?}, got {:?}", + expected, + actual + ); + }}; +} + +/// Assert the agent run failed with an error containing the given substring. +/// +/// # Example +/// ```ignore +/// assert_run_failed_with!(result, "timeout"); +/// ``` +#[macro_export] +macro_rules! assert_run_failed_with { + ($result:expr, $pattern:expr) => {{ + let pattern = $pattern; + let error = $result + .error + .as_ref() + .expect("Expected run to fail, but it succeeded"); + let message = error.to_string(); + assert!( + message.contains(pattern), + "Expected error containing {:?}, got {:?}", + pattern, + message + ); + }}; +} + +/// Assert the workspace snapshot contains a file with the given relative path. +/// +/// # Example +/// ```ignore +/// assert_workspace_contains_file!(snapshot, "sessions/demo.jsonl"); +/// ``` +#[macro_export] +macro_rules! assert_workspace_contains_file { + ($snapshot:expr, $relative_path:expr) => {{ + let relative_path = $relative_path; + let found = $snapshot + .files + .iter() + .any(|file| file.relative_path == relative_path); + assert!( + found, + "Expected workspace snapshot to contain {:?}, found paths: {:?}", + relative_path, + $snapshot + .files + .iter() + .map(|file| file.relative_path.as_str()) + .collect::>() + ); + }}; +} + +/// Assert the run metadata captured a tool call with the given tool name. +/// +/// # Example +/// ```ignore +/// assert_run_recorded_tool_call!(result, "echo_tool"); +/// ``` +#[macro_export] +macro_rules! assert_run_recorded_tool_call { + ($result:expr, $tool_name:expr) => {{ + let tool_name = $tool_name; + let found = $result + .metadata + .tool_calls + .iter() + .any(|record| record.tool_name == tool_name); + assert!( + found, + "Expected run metadata to contain tool call {:?}, found tool calls: {:?}", + tool_name, + $result + .metadata + .tool_calls + .iter() + .map(|record| record.tool_name.as_str()) + .collect::>() + ); + }}; +} + +/// Assert the runner total execution count after a run matches the expected value. +/// +/// # Example +/// ```ignore +/// assert_runner_total_executions!(result, 1); +/// ``` +#[macro_export] +macro_rules! assert_runner_total_executions { + ($result:expr, $expected:expr) => {{ + let expected = $expected; + let actual = $result.metadata.runner_stats_after.total_executions; + assert_eq!( + actual, expected, + "Expected runner total executions {}, got {}", + expected, actual + ); + }}; +} diff --git a/tests/src/dsl.rs b/tests/src/dsl.rs new file mode 100644 index 000000000..deb730f57 --- /dev/null +++ b/tests/src/dsl.rs @@ -0,0 +1,309 @@ +//! Minimal TOML DSL support for the testing MVP. +//! +//! This module keeps the schema intentionally small so contributors can define +//! simple agent tests without introducing a full DSL framework yet. + +use crate::agent_runner::{AgentRunResult, AgentRunnerError, AgentTestRunner}; +use crate::tools::MockTool; +use mofa_foundation::agent::context::prompt::AgentIdentity; +use mofa_kernel::agent::components::tool::ToolResult; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::path::Path; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum DslError { + #[error("failed to read DSL file: {0}")] + Io(#[from] std::io::Error), + + #[error("failed to parse TOML DSL: {0}")] + Toml(#[from] toml::de::Error), + + #[error("runner error: {0}")] + Runner(#[from] AgentRunnerError), + + #[error("test case must define either `prompt` or `input`")] + MissingPrompt, + + #[error("expected output to contain `{expected}`, got `{actual}`")] + ExpectedContains { expected: String, actual: String }, + + #[error("expected tool `{tool}` to be called, found tool calls: {actual:?}")] + ExpectedToolCall { tool: String, actual: Vec }, + + #[error("run produced no text output")] + MissingOutput, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct TestCaseDsl { + pub name: String, + pub prompt: Option, + pub input: Option, + pub expected_text: Option, + #[serde(default)] + pub bootstrap_files: Vec, + pub agent: Option, + #[serde(default)] + pub tools: Vec, + pub llm: Option, + #[serde(rename = "assert")] + pub assertions: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct BootstrapFileDsl { + pub path: String, + pub content: String, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct AgentDsl { + pub name: Option, + pub description: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct ToolDsl { + pub name: String, + pub description: String, + pub schema: Value, + pub result: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct LlmDsl { + #[serde(default)] + pub responses: Vec, + #[serde(default)] + pub steps: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct LlmStepDsl { + #[serde(rename = "type")] + pub kind: LlmStepKind, + pub content: Option, + pub tool: Option, + pub arguments: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum LlmStepKind { + Text, + ToolCall, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct AssertDsl { + pub contains: Option, + pub tool_called: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AssertionOutcome { + pub kind: String, + pub expected: Value, + pub actual: Value, + pub passed: bool, +} + +impl TestCaseDsl { + pub fn from_toml_str(input: &str) -> Result { + Ok(toml::from_str(input)?) + } + + pub fn from_toml_file(path: impl AsRef) -> Result { + let input = std::fs::read_to_string(path)?; + Self::from_toml_str(&input) + } + + fn execution_input(&self) -> Result<&str, DslError> { + self.prompt + .as_deref() + .or(self.input.as_deref()) + .ok_or(DslError::MissingPrompt) + } +} + +pub async fn run_test_case(case: &TestCaseDsl) -> Result { + let result = execute_test_case(case).await?; + let assertions = collect_assertion_outcomes(case, &result); + if let Some(error) = assertion_error_from_outcomes(&assertions) { + return Err(error); + } + Ok(result) +} + +pub async fn execute_test_case(case: &TestCaseDsl) -> Result { + let mut runner = AgentTestRunner::new().await?; + configure_runner_from_test_case(case, &mut runner).await?; + let result = runner.run_text(case.execution_input()?).await?; + runner.shutdown().await?; + Ok(result) +} + +pub async fn configure_runner_from_test_case( + case: &TestCaseDsl, + runner: &mut AgentTestRunner, +) -> Result<(), DslError> { + if !case.bootstrap_files.is_empty() { + let mut bootstrap_paths = Vec::with_capacity(case.bootstrap_files.len()); + for file in &case.bootstrap_files { + runner.write_bootstrap_file(&file.path, &file.content)?; + bootstrap_paths.push(file.path.clone()); + } + runner + .configure_prompt(agent_identity(case.agent.as_ref()), Some(bootstrap_paths)) + .await; + } else if case.agent.is_some() { + runner + .configure_prompt(agent_identity(case.agent.as_ref()), None) + .await; + } + + for tool in &case.tools { + let mock_tool = MockTool::new(&tool.name, &tool.description, tool.schema.clone()); + if let Some(result) = &tool.result { + mock_tool + .set_result(ToolResult::success(result.clone())) + .await; + } + runner.register_mock_tool(mock_tool).await?; + } + + // Queue deterministic LLM responses before execution so the DSL stays a thin + // adapter over the existing runner harness. + if let Some(llm) = &case.llm { + if !llm.steps.is_empty() { + for step in &llm.steps { + match step.kind { + LlmStepKind::Text => { + runner + .mock_llm() + .add_response(step.content.clone().unwrap_or_default()) + .await; + } + LlmStepKind::ToolCall => { + runner + .mock_llm() + .add_tool_call_response( + step.tool.as_deref().unwrap_or_default(), + step.arguments.clone().unwrap_or(Value::Null), + step.content.clone(), + ) + .await; + } + } + } + } else { + for response in &llm.responses { + runner.mock_llm().add_response(response).await; + } + } + } + Ok(()) +} + +fn agent_identity(agent: Option<&AgentDsl>) -> Option { + let agent = agent?; + let name = agent.name.clone()?; + Some(AgentIdentity { + name, + description: agent.description.clone().unwrap_or_default(), + icon: None, + }) +} + +fn expected_contains(case: &TestCaseDsl) -> Option<&str> { + // Prefer the explicit assertion block when present, while keeping + // `expected_text` as a lightweight shorthand for the MVP schema. + case.assertions + .as_ref() + .and_then(|assertions| assertions.contains.as_deref()) + .or(case.expected_text.as_deref()) +} + +fn expected_tool_call(case: &TestCaseDsl) -> Option<&str> { + case.assertions + .as_ref() + .and_then(|assertions| assertions.tool_called.as_deref()) +} + +pub fn collect_assertion_outcomes(case: &TestCaseDsl, result: &AgentRunResult) -> Vec { + let mut outcomes = Vec::new(); + + if let Some(expected) = expected_contains(case) { + let actual = result.output_text(); + outcomes.push(AssertionOutcome { + kind: "contains".to_string(), + expected: Value::String(expected.to_string()), + actual: actual + .clone() + .map(Value::String) + .unwrap_or(Value::Null), + passed: actual + .as_ref() + .map(|value| value.contains(expected)) + .unwrap_or(false), + }); + } + + if let Some(expected_tool) = expected_tool_call(case) { + let actual = result + .metadata + .tool_calls + .iter() + .map(|record| Value::String(record.tool_name.clone())) + .collect::>(); + outcomes.push(AssertionOutcome { + kind: "tool_called".to_string(), + expected: Value::String(expected_tool.to_string()), + actual: Value::Array(actual.clone()), + passed: actual + .iter() + .any(|tool| tool.as_str() == Some(expected_tool)), + }); + } + + outcomes +} + +pub fn assertion_error_from_outcomes(outcomes: &[AssertionOutcome]) -> Option { + for outcome in outcomes { + if outcome.passed { + continue; + } + + match outcome.kind.as_str() { + "contains" => { + return if outcome.actual.is_null() { + Some(DslError::MissingOutput) + } else { + Some(DslError::ExpectedContains { + expected: outcome.expected.as_str().unwrap_or_default().to_string(), + actual: outcome.actual.as_str().unwrap_or_default().to_string(), + }) + }; + } + "tool_called" => { + let actual = outcome + .actual + .as_array() + .into_iter() + .flatten() + .filter_map(|value| value.as_str().map(ToString::to_string)) + .collect::>(); + return Some(DslError::ExpectedToolCall { + tool: outcome.expected.as_str().unwrap_or_default().to_string(), + actual, + }); + } + _ => continue, + } + } + + None +} diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 3368dc49b..2490ae668 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -4,16 +4,33 @@ //! control for testing MoFA agents. pub mod adversarial; +pub mod agent_runner; +pub mod artifact; pub mod assertions; pub mod backend; pub mod bus; pub mod clock; +pub mod dsl; pub mod report; pub mod tools; pub use backend::MockLLMBackend; pub use bus::MockAgentBus; pub use clock::{Clock, MockClock, SystemClock}; +pub use dsl::{ + assertion_error_from_outcomes, collect_assertion_outcomes, configure_runner_from_test_case, + execute_test_case, run_test_case, AgentDsl, AssertDsl, AssertionOutcome, BootstrapFileDsl, + DslError, LlmDsl, LlmStepDsl, LlmStepKind, TestCaseDsl, ToolDsl, +}; +pub use agent_runner::{ + AgentRunMetadata, AgentRunResult, AgentRunnerError, AgentTestRunner, MockAgentLLMProvider, + ToolCallRecord, WorkspaceFileSnapshot, WorkspaceSnapshot, +}; +pub use artifact::{ + AgentArtifact, AgentRunArtifact, LlmMessageArtifact, LlmRequestArtifact, LlmResponseArtifact, + LlmToolCallArtifact, SessionArtifact, SessionMessageArtifact, TokenUsageArtifact, + ToolCallArtifact, WorkspaceFileArtifact, WorkspaceSnapshotArtifact, +}; pub use report::{ JsonFormatter, ReportFormatter, TestCaseResult, TestReport, TestReportBuilder, TestStatus, TextFormatter, diff --git a/tests/src/tools.rs b/tests/src/tools.rs index 48356b551..fbc6bb5b6 100644 --- a/tests/src/tools.rs +++ b/tests/src/tools.rs @@ -20,6 +20,7 @@ pub struct MockTool { category: ToolCategory, pub stubbed_result: Arc>, pub call_history: Arc>>, + pub result_history: Arc>>, failure_queue: Arc>>, failure_patterns: Arc>>, result_sequence: Arc>>, @@ -37,6 +38,7 @@ impl MockTool { "Mock execution default", ))), call_history: Arc::new(RwLock::new(Vec::new())), + result_history: Arc::new(RwLock::new(Vec::new())), failure_queue: Arc::new(RwLock::new(VecDeque::new())), failure_patterns: Arc::new(RwLock::new(Vec::new())), result_sequence: Arc::new(RwLock::new(VecDeque::new())), @@ -58,6 +60,16 @@ impl MockTool { self.call_history.read().await.len() } + /// Retrieve a clone of the full result history. + pub async fn results(&self) -> Vec { + self.result_history.read().await.clone() + } + + /// Returns the most recent result, or `None` if never executed. + pub async fn last_result(&self) -> Option { + self.result_history.read().await.last().cloned() + } + /// Queue failures for the next N calls. pub async fn fail_next(&self, count: usize, error_msg: &str) { let mut queue = self.failure_queue.write().await; @@ -110,12 +122,18 @@ impl SimpleTool for MockTool { async fn execute(&self, input: ToolInput) -> ToolResult { self.call_history.write().await.push(input.clone()); + let start = std::time::Instant::now(); // 1. Drain failure queue { let mut queue = self.failure_queue.write().await; if let Some(err) = queue.pop_front() { - return ToolResult::failure(err); + let mut result = ToolResult::failure(err); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); + self.result_history.write().await.push(result.clone()); + return result; } } @@ -124,7 +142,12 @@ impl SimpleTool for MockTool { let patterns = self.failure_patterns.read().await; for (pattern, err) in patterns.iter() { if input.arguments == *pattern { - return ToolResult::failure(err); + let mut result = ToolResult::failure(err); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); + self.result_history.write().await.push(result.clone()); + return result; } } } @@ -133,11 +156,21 @@ impl SimpleTool for MockTool { { let mut seq = self.result_sequence.write().await; if let Some(result) = seq.pop_front() { + let mut result = result; + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); + self.result_history.write().await.push(result.clone()); return result; } } - self.stubbed_result.read().await.clone() + let mut result = self.stubbed_result.read().await.clone(); + result + .metadata + .insert("duration_ms".to_string(), start.elapsed().as_millis().to_string()); + self.result_history.write().await.push(result.clone()); + result } fn category(&self) -> ToolCategory { diff --git a/tests/tests/agent_runner_tests.rs b/tests/tests/agent_runner_tests.rs new file mode 100644 index 000000000..250b9e62d --- /dev/null +++ b/tests/tests/agent_runner_tests.rs @@ -0,0 +1,311 @@ +use mofa_testing::agent_runner::AgentTestRunner; +use mofa_testing::assertions::assert_session_messages; +use mofa_testing::tools::MockTool; +use serde_json::json; + +#[tokio::test] +async fn agent_runner_executes_and_captures_output() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_response("Mocked response") + .await; + + let result = runner + .run_text("hello") + .await + .expect("run should succeed"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("Mocked response")); + assert_eq!( + result.metadata.session_id.as_deref(), + Some(runner.session_id()) + ); + assert_eq!(result.metadata.execution_id, runner.execution_id()); + assert_eq!(result.metadata.runner_stats_before.total_executions, 0); + assert_eq!(result.metadata.runner_stats_after.total_executions, 1); + assert!(result.metadata.session_snapshot.is_some()); + let snapshot = result.metadata.session_snapshot.as_ref().unwrap(); + assert_eq!(snapshot.len(), 2); + assert_session_messages(snapshot, &[("user", "hello"), ("assistant", "Mocked response")]); + + let expected_session_path = format!("sessions/{}.jsonl", runner.session_id()); + assert!( + !result + .metadata + .workspace_snapshot_before + .files + .iter() + .any(|file| file.relative_path == expected_session_path) + ); + assert!( + result + .metadata + .workspace_snapshot_after + .files + .iter() + .any(|file| file.relative_path == expected_session_path) + ); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_creates_isolated_workspaces() { + let mut runner_a = AgentTestRunner::new().await.expect("runner A initializes"); + let mut runner_b = AgentTestRunner::new().await.expect("runner B initializes"); + + assert_ne!(runner_a.workspace(), runner_b.workspace()); + + runner_a + .mock_llm() + .add_response("Response A") + .await; + runner_b + .mock_llm() + .add_response("Response B") + .await; + + let _ = runner_a + .run_text("hi") + .await + .expect("runner A executes"); + let _ = runner_b + .run_text("hi") + .await + .expect("runner B executes"); + + // Session files should exist in each separate workspace. + let session_a = runner_a + .workspace() + .join("sessions") + .join(format!("{}.jsonl", runner_a.session_id())); + let session_b = runner_b + .workspace() + .join("sessions") + .join(format!("{}.jsonl", runner_b.session_id())); + + assert!(session_a.exists()); + assert!(session_b.exists()); + + runner_a.shutdown().await.expect("shutdown A"); + runner_b.shutdown().await.expect("shutdown B"); +} + +#[tokio::test] +async fn agent_runner_executes_tool_calls() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + + let tool = MockTool::new( + "echo_tool", + "Echo the provided input", + json!({ + "type": "object", + "properties": { + "input": { "type": "string" } + }, + "required": ["input"] + }), + ); + + runner + .register_mock_tool(tool.clone()) + .await + .expect("tool registered"); + + // First response triggers a tool call; second response is the final answer. + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner + .mock_llm() + .add_response("Final response") + .await; + + let result = runner + .run_text("use tool") + .await + .expect("run should succeed"); + + // Tool call should be captured in both tool history and run metadata. + assert_eq!(result.output_text().as_deref(), Some("Final response")); + assert_eq!(tool.call_count().await, 1); + let last_call = tool.last_call().await.expect("tool call captured"); + assert_eq!(last_call.arguments, json!({ "input": "ping" })); + assert_eq!(result.metadata.tool_calls.len(), 1); + let record = &result.metadata.tool_calls[0]; + assert_eq!(record.tool_name, "echo_tool"); + assert_eq!(record.input, json!({ "input": "ping" })); + assert!(record.success); + assert_eq!( + record.output, + Some(json!("Mock execution default")) + ); + assert!(record.duration_ms.is_some()); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_loads_bootstrap_files() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .write_bootstrap_file("AGENTS.md", "Bootstrap content for agent test.") + .expect("bootstrap file written"); + + runner + .mock_llm() + .add_response("Bootstrapped response") + .await; + + let _ = runner + .run_text("check prompt") + .await + .expect("run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request captured"); + // Validate the system message includes the bootstrap content. + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("AGENTS.md")); + assert!(system_message.contains("Bootstrap content for agent test.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_supports_multi_turn_runs() { + // Multi-turn helper should keep the same session and extend history. + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("First reply").await; + runner.mock_llm().add_response("Second reply").await; + + let results = runner + .run_texts(&["turn one", "turn two"]) + .await + .expect("multi-turn run succeeds"); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].output_text().as_deref(), Some("First reply")); + assert_eq!(results[1].output_text().as_deref(), Some("Second reply")); + + // Session snapshot should contain two user/assistant pairs. + let snapshot = results + .last() + .and_then(|result| result.metadata.session_snapshot.as_ref()) + .expect("session snapshot captured"); + assert_eq!(snapshot.len(), 4); + assert_session_messages( + snapshot, + &[ + ("user", "turn one"), + ("assistant", "First reply"), + ("user", "turn two"), + ("assistant", "Second reply"), + ], + ); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_customizes_prompt_identity_and_bootstraps() { + // Custom identity + bootstrap list should appear in the system prompt. + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .write_bootstrap_file("CUSTOM.md", "Custom bootstrap content.") + .expect("bootstrap file written"); + runner + .configure_prompt( + Some(mofa_foundation::agent::context::prompt::AgentIdentity { + name: "TestAgent".to_string(), + description: "Custom identity".to_string(), + icon: None, + }), + Some(vec!["CUSTOM.md".to_string()]), + ) + .await; + + runner.mock_llm().add_response("Custom response").await; + let _ = runner + .run_text("custom prompt") + .await + .expect("run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request captured"); + // Validate custom identity and bootstrap content. + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("TestAgent")); + assert!(system_message.contains("Custom identity")); + assert!(system_message.contains("CUSTOM.md")); + assert!(system_message.contains("Custom bootstrap content.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_captures_llm_failure() { + // LLM failures should surface in AgentRunResult with failed stats. + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_error_response("mock failure") + .await; + + let result = runner + .run_text("trigger failure") + .await + .expect("run should return result"); + + assert!(!result.is_success()); + let error = result.error.expect("error captured"); + assert!(error.to_string().contains("mock failure")); + assert_eq!(result.metadata.runner_stats_after.failed_executions, 1); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn agent_runner_allows_custom_session_keys() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner + .mock_llm() + .add_response("Custom session response") + .await; + + let result = runner + .run_text_with_session("custom-session", "hello session") + .await + .expect("run should succeed"); + + assert_eq!( + result.metadata.session_id.as_deref(), + Some("custom-session") + ); + let session_path = runner + .workspace() + .join("sessions") + .join("custom-session.jsonl"); + assert!(session_path.exists()); + + runner.shutdown().await.expect("shutdown succeeds"); +} diff --git a/tests/tests/artifact_tests.rs b/tests/tests/artifact_tests.rs new file mode 100644 index 000000000..6876ef661 --- /dev/null +++ b/tests/tests/artifact_tests.rs @@ -0,0 +1,60 @@ +//! Tests for canonical DSL run artifact generation. + +use mofa_testing::{ + AgentRunArtifact, TestCaseDsl, assertion_error_from_outcomes, collect_assertion_outcomes, + execute_test_case, +}; + +#[tokio::test] +async fn artifact_contains_core_run_data() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/tool_agent.toml" + )) + .expect("tool DSL example should parse"); + + let result = execute_test_case(&case) + .await + .expect("DSL case should execute"); + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions); + + assert_eq!(artifact.case_name, "tool_agent_run"); + assert_eq!(artifact.status, "passed"); + assert_eq!(artifact.output_text.as_deref(), Some("Tool execution complete")); + assert_eq!(artifact.tool_calls.len(), 1); + assert_eq!(artifact.tool_calls[0].tool_name, "echo_tool"); + assert!(!artifact.agent.name.is_empty()); + assert!(artifact.llm_request.is_some()); + assert!(artifact.workspace_after.files.iter().any(|file| { + file.relative_path.ends_with(".jsonl") + })); +} + +#[tokio::test] +async fn artifact_captures_failed_assertions() { + let case = TestCaseDsl::from_toml_str( + r#" +name = "failing_case" +prompt = "Say hello" + +[llm] +responses = ["wrong output"] + +[assert] +contains = "expected text" +"#, + ) + .expect("inline DSL should parse"); + + let result = execute_test_case(&case) + .await + .expect("DSL case should execute"); + let assertions = collect_assertion_outcomes(&case, &result); + let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions.clone()); + + assert_eq!(artifact.status, "failed"); + assert_eq!(artifact.assertions.len(), 1); + assert!(!artifact.assertions[0].passed); + assert!(assertion_error_from_outcomes(&assertions).is_some()); +} diff --git a/tests/tests/assertion_macro_tests.rs b/tests/tests/assertion_macro_tests.rs index 55c2af2f4..c27aee010 100644 --- a/tests/tests/assertion_macro_tests.rs +++ b/tests/tests/assertion_macro_tests.rs @@ -1,11 +1,11 @@ -//! Tests for assertion macros: assert_tool_called!, assert_tool_called_with!, -//! assert_infer_called!, assert_bus_message_sent!. +//! Tests for assertion macros and assertion helpers used by the testing crate. use mofa_foundation::agent::components::tool::SimpleTool; use mofa_foundation::orchestrator::{ModelOrchestrator, ModelProviderConfig, ModelType}; use mofa_kernel::agent::components::tool::ToolInput; use mofa_kernel::bus::CommunicationMode; use mofa_kernel::message::AgentMessage; +use mofa_testing::agent_runner::AgentTestRunner; use mofa_testing::backend::MockLLMBackend; use mofa_testing::bus::MockAgentBus; use mofa_testing::tools::MockTool; @@ -121,3 +121,89 @@ async fn assert_bus_message_sent_panics_when_no_message_from_sender() { mofa_testing::assert_bus_message_sent!(bus, "agent-2"); } + +// =================================================================== +// New assertion helpers +// =================================================================== + +#[tokio::test] +async fn assert_tool_last_result_passes_on_matching_output() { + let tool = MockTool::new("search", "Search tool", json!({"type": "object"})); + tool.execute(ToolInput::from_json(json!({"query": "rust"}))) + .await; + + mofa_testing::assert_tool_last_result!(tool, json!("Mock execution default")); +} + +#[tokio::test] +async fn assert_agent_output_text_passes_on_matching_output() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("hello from runner").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + + mofa_testing::assert_agent_output_text!(result, "hello from runner"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_run_failed_with_passes_on_matching_error() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_error_response("mock failure").await; + + let result = runner.run_text("hello").await.expect("run completes"); + + mofa_testing::assert_run_failed_with!(result, "mock failure"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_workspace_contains_file_passes_when_snapshot_has_file() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("workspace ready").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + let expected = format!("sessions/{}.jsonl", runner.session_id()); + + mofa_testing::assert_workspace_contains_file!(result.metadata.workspace_snapshot_after, expected); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_run_recorded_tool_call_passes_when_tool_metadata_exists() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + let tool = MockTool::new( + "echo_tool", + "Echo tool", + json!({ + "type": "object", + "properties": { "input": { "type": "string" } }, + "required": ["input"] + }), + ); + runner + .register_mock_tool(tool) + .await + .expect("tool registered"); + runner + .mock_llm() + .add_tool_call_response("echo_tool", json!({ "input": "ping" }), None) + .await; + runner.mock_llm().add_response("done").await; + + let result = runner.run_text("use tool").await.expect("run succeeds"); + + mofa_testing::assert_run_recorded_tool_call!(result, "echo_tool"); + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn assert_runner_total_executions_passes_on_first_run() { + let mut runner = AgentTestRunner::new().await.expect("runner initializes"); + runner.mock_llm().add_response("counted").await; + + let result = runner.run_text("hello").await.expect("run succeeds"); + + mofa_testing::assert_runner_total_executions!(result, 1); + runner.shutdown().await.expect("shutdown succeeds"); +} diff --git a/tests/tests/dsl_tests.rs b/tests/tests/dsl_tests.rs new file mode 100644 index 000000000..00fdb1a5f --- /dev/null +++ b/tests/tests/dsl_tests.rs @@ -0,0 +1,92 @@ +//! Integration tests for the minimal TOML DSL adapter. + +use mofa_testing::{configure_runner_from_test_case, run_test_case, AgentTestRunner, TestCaseDsl}; + +#[tokio::test] +async fn toml_dsl_runs_through_agent_runner() { + // Load the example DSL from the crate so the test exercises parsing and + // adapter execution together. + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/simple_agent.toml" + )) + .expect("DSL example should parse"); + + assert_eq!(case.name, "simple_agent_run"); + + let result = run_test_case(&case) + .await + .expect("DSL case should run successfully"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("hello from DSL")); +} + +#[tokio::test] +async fn toml_dsl_supports_bootstrap_files() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/bootstrap_agent.toml" + )) + .expect("bootstrap DSL example should parse"); + + let mut runner = AgentTestRunner::new() + .await + .expect("runner should initialize"); + + configure_runner_from_test_case(&case, &mut runner) + .await + .expect("DSL bootstrap config should apply"); + + let _ = runner + .run_text(case.prompt.as_deref().expect("prompt should be present")) + .await + .expect("bootstrap run should succeed"); + + let request = runner + .mock_llm() + .last_request() + .await + .expect("request should be captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + + assert!(system_message.contains("AGENTS.md")); + assert!(system_message.contains("Bootstrapped instructions for the DSL test.")); + + runner.shutdown().await.expect("shutdown succeeds"); +} + +#[tokio::test] +async fn toml_dsl_supports_tool_backed_runs() { + let case = TestCaseDsl::from_toml_file(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/tool_agent.toml" + )) + .expect("tool DSL example should parse"); + + let result = run_test_case(&case) + .await + .expect("tool-backed DSL case should run successfully"); + + assert!(result.is_success()); + assert_eq!(result.output_text().as_deref(), Some("Tool execution complete")); + assert_eq!(result.metadata.tool_calls.len(), 1); + assert_eq!(result.metadata.tool_calls[0].tool_name, "echo_tool"); + + let request = result + .metadata + .llm_last_request + .as_ref() + .expect("request should be captured"); + let system_message = request + .messages + .first() + .and_then(|msg| msg.content.as_deref()) + .expect("system message content"); + assert!(system_message.contains("ToolAgent")); + assert!(system_message.contains("Agent used to validate tool-aware DSL execution.")); +}