mofa-org · AdityaShome · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -86,6 +86,9 @@ lazy_static = "1.4"
 # Actor framework for ReAct agents
 ractor = "0"
 
+# TOML deserialization (also used transitively by config)
+toml = "0.8"
+
 # Configuration file support (multi-format)
 config = { version = "0.14", features = [
     "toml",

diff --git a/crates/mofa-cli/Cargo.toml b/crates/mofa-cli/Cargo.toml
@@ -25,6 +25,7 @@ mofa-kernel = { path = "../mofa-kernel", version = "0.1", features = [
 ] }
 mofa-runtime = { path = "../mofa-runtime", version = "0.1" }
 mofa-foundation = { path = "../mofa-foundation", version = "0.1" }
+mofa-testing = { path = "../../tests", version = "0.1" }
 config.workspace = true
 tokio = { workspace = true }
 thiserror = { workspace = true }
@@ -90,6 +91,7 @@ tokio-stream = "0.1"
 assert_cmd = "2"
 predicates = "3"
 tempfile = "3"
+axum = { workspace = true }
 
 [features]
 default = []

diff --git a/crates/mofa-cli/src/cli.rs b/crates/mofa-cli/src/cli.rs
@@ -81,6 +81,40 @@ pub enum Commands {
         dora: bool,
     },
 
+    /// Run a testing DSL case file
+    TestDsl {
+        /// TOML DSL file to execute
+        file: PathBuf,
+
+        /// Optional canonical artifact file path
+        #[arg(long)]
+        artifact_out: Option<PathBuf>,
+
+        /// Optional report file path
+        #[arg(long)]
+        report_out: Option<PathBuf>,
+
+        /// Compare the current artifact against a saved baseline artifact
+        #[arg(long)]
+        baseline_in: Option<PathBuf>,
+
+        /// Write the current artifact to a baseline file
+        #[arg(long)]
+        baseline_out: Option<PathBuf>,
+
+        /// Write machine-readable comparison output (requires --baseline-in)
+        #[arg(long)]
+        comparison_out: Option<PathBuf>,
+
+        /// Exit non-zero when baseline comparison mismatches
+        #[arg(long)]
+        fail_on_diff: bool,
+
+        /// Report file format
+        #[arg(long, value_enum, default_value_t = TestDslReportFormat::Json)]
+        report_format: TestDslReportFormat,
+    },
+
     /// Run a dora dataflow
     #[cfg(feature = "dora")]
     Dataflow {
@@ -219,6 +253,12 @@ pub enum DatabaseType {
     Sqlite,
 }
 
+#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq)]
+pub enum TestDslReportFormat {
+    Json,
+    Text,
+}
+
 impl std::fmt::Display for DatabaseType {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -725,6 +765,77 @@ mod tests {
         assert!(parsed.is_ok(), "doctor ci strict json should parse");
     }
 
+    #[test]
+    fn test_test_dsl_parses() {
+        let parsed = Cli::try_parse_from(["mofa", "test-dsl", "tests/examples/simple_agent.toml"]);
+        assert!(parsed.is_ok(), "test-dsl command should parse");
+    }
+
+    #[test]
+    fn test_test_dsl_report_flags_parse() {
+        let parsed = Cli::try_parse_from([
+            "mofa",
+            "test-dsl",
+            "tests/examples/simple_agent.toml",
+            "--report-out",
+            "/tmp/report.json",
+            "--report-format",
+            "json",
+        ]);
+        assert!(parsed.is_ok(), "test-dsl report flags should parse");
+    }
+
+    #[test]
+    fn test_test_dsl_artifact_flag_parses() {
+        let parsed = Cli::try_parse_from([
+            "mofa",
+            "test-dsl",
+            "tests/examples/simple_agent.toml",
+            "--artifact-out",
+            "/tmp/artifact.json",
+        ]);
+        assert!(parsed.is_ok(), "test-dsl artifact flag should parse");
+    }
+
+    #[test]
+    fn test_test_dsl_baseline_flags_parse() {
+        let parsed = Cli::try_parse_from([
+            "mofa",
+            "test-dsl",
+            "tests/examples/simple_agent.toml",
+            "--baseline-in",
+            "/tmp/baseline.json",
+            "--baseline-out",
+            "/tmp/new-baseline.json",
+        ]);
+        assert!(parsed.is_ok(), "test-dsl baseline flags should parse");
+    }
+
+    #[test]
+    fn test_test_dsl_comparison_flag_parse() {
+        let parsed = Cli::try_parse_from([
+            "mofa",
+            "test-dsl",
+            "tests/examples/simple_agent.toml",
+            "--baseline-in",
+            "/tmp/baseline.json",
+            "--comparison-out",
+            "/tmp/comparison.json",
+        ]);
+        assert!(parsed.is_ok(), "test-dsl comparison flag should parse");
+    }
+
+    #[test]
+    fn test_test_dsl_fail_on_diff_flag_parse() {
+        let parsed = Cli::try_parse_from([
+            "mofa",
+            "test-dsl",
+            "tests/examples/simple_agent.toml",
+            "--fail-on-diff",
+        ]);
+        assert!(parsed.is_ok(), "test-dsl fail-on-diff flag should parse");
+    }
+
     #[test]
     fn test_rag_index_parses() {
         let parsed = Cli::try_parse_from([

diff --git a/crates/mofa-cli/src/commands/mod.rs b/crates/mofa-cli/src/commands/mod.rs
@@ -11,5 +11,6 @@ pub mod new;
 pub mod plugin;
 pub mod rag;
 pub mod run;
+pub mod test_dsl;
 pub mod session;
 pub mod tool;
diff --git a/crates/mofa-cli/src/commands/test_dsl.rs b/crates/mofa-cli/src/commands/test_dsl.rs
@@ -0,0 +1,213 @@
+//! `mofa test-dsl` command implementation
+
+use crate::CliError;
+use crate::cli::TestDslReportFormat;
+use crate::output::OutputFormat;
+use mofa_testing::{
+    AgentRunArtifact, AgentRunArtifactComparison, DslError, JsonFormatter,
+    ReportFormatter,
+    TestCaseResult, TestReport, TestStatus, TextFormatter, TestCaseDsl,
+    assertion_error_from_outcomes, collect_assertion_outcomes, execute_test_case,
+};
+use serde::Serialize;
+use serde_json::json;
+use std::path::Path;
+
+#[derive(Debug, Serialize)]
+struct TestDslSummary {
+    name: String,
+    success: bool,
+    output_text: Option<String>,
+    duration_ms: u128,
+    tool_calls: Vec<String>,
+    workspace_root: String,
+    baseline_matches: Option<bool>,
+}
+
+/// Execute one TOML DSL test case through the testing runner.
+pub async fn run(
+    path: &Path,
+    format: OutputFormat,
+    artifact_out: Option<&Path>,
+    report_out: Option<&Path>,
+    baseline_in: Option<&Path>,
+    baseline_out: Option<&Path>,
+    comparison_out: Option<&Path>,
+    fail_on_diff: bool,
+    report_format: TestDslReportFormat,
+) -> Result<(), CliError> {
+    let case = TestCaseDsl::from_toml_file(path).map_err(map_dsl_error)?;
+    let result = execute_test_case(&case).await.map_err(map_dsl_error)?;
+    let assertions = collect_assertion_outcomes(&case, &result);
+    let artifact = AgentRunArtifact::from_run_result(&case, &result, assertions.clone());
+    let report = build_report(&artifact);
+    let baseline = if let Some(baseline_in) = baseline_in {
+        Some(read_artifact(baseline_in)?)
+    } else {
+        None
+    };
+    let baseline_diff = baseline.as_ref().map(|baseline| artifact.compare_to(baseline));
+
+    if let Some(artifact_out) = artifact_out {
+        write_artifact(artifact_out, &artifact)?;
+    }
+
+    if let Some(baseline_out) = baseline_out {
+        write_artifact(baseline_out, &artifact)?;
+    }
+
+    if let Some(comparison_out) = comparison_out {
+        let baseline = baseline.as_ref().ok_or_else(|| {
+            CliError::Other("comparison output requires --baseline-in".to_string())
+        })?;
+        let diff = baseline_diff.as_ref().ok_or_else(|| {
+            CliError::Other("comparison output requires --baseline-in".to_string())
+        })?;
+        let comparison = AgentRunArtifactComparison::from_artifacts(
+            &artifact,
+            baseline,
+            diff.clone(),
+        );
+        write_comparison(comparison_out, &comparison)?;
+    }
+
+    if let Some(report_out) = report_out {
+        write_report(report_out, report_format, &report)?;
+    }
+
+    let summary = TestDslSummary {
+        name: case.name,
+        success: result.is_success(),
+        output_text: result.output_text(),
+        duration_ms: result.duration.as_millis(),
+        tool_calls: result
+            .metadata
+            .tool_calls
+            .iter()
+            .map(|record| record.tool_name.clone())
+            .collect(),
+        workspace_root: result.metadata.workspace_root.display().to_string(),
+        baseline_matches: baseline_diff.as_ref().map(|diff| diff.matches),
+    };
+
+    match format {
+        OutputFormat::Json => {
+            let output = json!({
+                "success": true,
+                "case": summary,
+                "baseline": baseline_diff,
+            });
+            println!("{}", serde_json::to_string_pretty(&output)?);
+        }
+        _ => {
+            println!("case: {}", summary.name);
+            println!("status: {}", if summary.success { "passed" } else { "failed" });
+            if let Some(output_text) = &summary.output_text {
+                println!("output: {}", output_text);
+            }
+            if !summary.tool_calls.is_empty() {
+                println!("tool_calls: {}", summary.tool_calls.join(", "));
+            }
+            println!("duration_ms: {}", summary.duration_ms);
+            if let Some(diff) = &baseline_diff {
+                println!("baseline: {}", if diff.matches { "matched" } else { "mismatch" });
+                for difference in &diff.differences {
+                    println!("difference: {}", difference.field);
+                }
+            }
+        }
+    }
+
+    if fail_on_diff {
+        if let Some(diff) = &baseline_diff {
+            if !diff.matches {
+                return Err(CliError::Other("baseline comparison mismatch".to_string()));
+            }
+        }
+    }
+
+    if let Some(error) = assertion_error_from_outcomes(&assertions) {
+        return Err(map_dsl_error(error));
+    }
+
+    Ok(())
+}
+
+fn build_report(artifact: &AgentRunArtifact) -> TestReport {
+    let status = if artifact.status == "passed" {
+        TestStatus::Passed
+    } else {
+        TestStatus::Failed
+    };
+    let error = artifact
+        .runner_error
+        .clone()
+        .or_else(|| {
+            artifact
+                .assertions
+                .iter()
+                .find(|item| !item.passed)
+                .map(|item| format!("assertion failed: {}", item.kind))
+        });
+    let metadata = vec![
+        (
+            "execution_id".to_string(),
+            artifact.execution_id.clone(),
+        ),
+        (
+            "workspace_root".to_string(),
+            artifact.workspace_root.clone(),
+        ),
+        (
+            "tool_calls".to_string(),
+            artifact.tool_calls.len().to_string(),
+        ),
+    ];
+
+    TestReport {
+        suite_name: "dsl".to_string(),
+        results: vec![TestCaseResult {
+            name: artifact.case_name.clone(),
+            status,
+            duration: std::time::Duration::from_millis(artifact.duration_ms),
+            error,
+            metadata,
+        }],
+        total_duration: std::time::Duration::from_millis(artifact.duration_ms),
+        timestamp: artifact.started_at_ms,
+    }
+}
+
+fn write_artifact(path: &Path, artifact: &AgentRunArtifact) -> Result<(), CliError> {
+    let body = serde_json::to_string_pretty(artifact)?;
+    std::fs::write(path, body)?;
+    Ok(())
+}
+
+fn write_comparison(
+    path: &Path,
+    comparison: &AgentRunArtifactComparison,
+) -> Result<(), CliError> {
+    // Emit machine readable baseline comparison output.
+    let body = serde_json::to_string_pretty(comparison)?;
+    std::fs::write(path, body)?;
+    Ok(())
+}
+
+fn read_artifact(path: &Path) -> Result<AgentRunArtifact, CliError> {
+    let body = std::fs::read_to_string(path)?;
+    Ok(serde_json::from_str(&body)?)
+}
+
+fn write_report(path: &Path, format: TestDslReportFormat, report: &TestReport) -> Result<(), CliError> {
+    let body = match format {
+        TestDslReportFormat::Json => JsonFormatter.format(report),
+        TestDslReportFormat::Text => TextFormatter.format(report),
+    };
+    std::fs::write(path, body)?;
+    Ok(())
+}
+
+fn map_dsl_error(error: DslError) -> CliError {
+    CliError::Other(format!("DSL test failed: {error}"))
+}