diff --git a/examples/01_forecasting_workflow.py b/examples/01_forecasting_workflow.py index a1cc7d71..65c869b3 100644 --- a/examples/01_forecasting_workflow.py +++ b/examples/01_forecasting_workflow.py @@ -110,9 +110,9 @@ def main(): if result.warnings: print(f" Warnings: {result.warnings}") - # Invalid pipeline: Forecaster -> Forecaster - print("\n❌ Testing: ['NaiveForecaster', 'ExponentialSmoothing']") - result = validator.validate_pipeline(["NaiveForecaster", "ExponentialSmoothing"]) + # Invalid pipeline: Forecaster -> Transformer + print("\n❌ Testing: ['NaiveForecaster', 'Imputer']") + result = validator.validate_pipeline(["NaiveForecaster", "Imputer"]) print(f" Valid: {result.valid}") if result.errors: print(f" Errors: {result.errors}") diff --git a/examples/02_llm_query_simulation.py b/examples/02_llm_query_simulation.py index d147e961..27d1ff28 100644 --- a/examples/02_llm_query_simulation.py +++ b/examples/02_llm_query_simulation.py @@ -97,15 +97,15 @@ def simulate_query_1(): def simulate_query_2(): """ - Query: "Compare ARIMA and Theta for my sunspot data" + Query: "Compare ARIMA and Theta for my airline data" """ print("\n" + "=" * 70) print(" QUERY 2: Compare Two Forecasters") print("=" * 70) - print('\nUser: "Compare NaiveForecaster and ThetaForecaster for sunspot data"') + print('\nUser: "Compare NaiveForecaster and ThetaForecaster for airline data"') # Step 1: LLM plans comparison - print_llm_thought("I'll describe both estimators and run them on sunspot data") + print_llm_thought("I'll describe both estimators and run them on airline data") # Step 2: Describe first estimator print_tool_call("describe_estimator", {"estimator": "NaiveForecaster"}) @@ -138,21 +138,21 @@ def simulate_query_2(): # Step 5: Run predictions if h1: print_tool_call( - "fit_predict", {"estimator_handle": h1, "dataset": "sunspots", "horizon": 6} + "fit_predict", {"estimator_handle": h1, "dataset": "airline", "horizon": 6} ) - pred1 = fit_predict_tool(h1, "sunspots", 6) + pred1 = fit_predict_tool(h1, "airline", 6) print_result({"success": pred1["success"], "horizon": pred1.get("horizon")}) if h2: print_tool_call( - "fit_predict", {"estimator_handle": h2, "dataset": "sunspots", "horizon": 6} + "fit_predict", {"estimator_handle": h2, "dataset": "airline", "horizon": 6} ) - pred2 = fit_predict_tool(h2, "sunspots", 6) + pred2 = fit_predict_tool(h2, "airline", 6) print_result({"success": pred2["success"], "horizon": pred2.get("horizon")}) # Step 6: Generate comparison print("\n🤖 LLM Response:") - print(" Comparison of NaiveForecaster vs ThetaForecaster on Sunspots:") + print(" Comparison of NaiveForecaster vs ThetaForecaster on airline:") print(" - NaiveForecaster: Simple baseline, uses last season's values") print(" - ThetaForecaster: Decomposition-based, better for trended data") if h1 and pred1["success"]: @@ -163,19 +163,19 @@ def simulate_query_2(): def simulate_query_3(): """ - Query: "Can I use ARIMA after LogTransformer?" + Query: "Can I use ARIMA after Detrender?" """ print("\n" + "=" * 70) print(" QUERY 3: Validate Pipeline Composition") print("=" * 70) - print('\nUser: "Can I build a pipeline with Imputer -> Detrend -> NaiveForecaster?"') + print('\nUser: "Can I build a pipeline with Imputer -> Detrender -> NaiveForecaster?"') # Step 1: LLM uses composition validator print_llm_thought("Let me validate this pipeline composition...") validator = get_composition_validator() - pipeline = ["Imputer", "Detrend", "NaiveForecaster"] + pipeline = ["Imputer", "Detrender", "NaiveForecaster"] print_tool_call("validate_pipeline", {"components": pipeline}) result = validator.validate_pipeline(pipeline) print_result(result.to_dict()) diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 00000000..8e45589d --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,48 @@ +"""Smoke tests for documented agentic/MCP workflow examples.""" + +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +EXAMPLES_DIR = REPO_ROOT / "examples" + + +def _run_example(name: str) -> str: + """Run an example script and return stdout.""" + result = subprocess.run( + [sys.executable, str(EXAMPLES_DIR / name)], + cwd=REPO_ROOT, + capture_output=True, + text=True, + check=True, + ) + return result.stdout + + +def test_forecasting_workflow_example_runs_successfully(): + """The end-to-end forecasting workflow example should finish cleanly.""" + output = _run_example("01_forecasting_workflow.py") + + assert "Demo Complete" in output + assert "Valid: False" in output + assert "Error:" not in output + + +def test_llm_query_simulation_example_runs_successfully(): + """The LLM-style query simulation should not hide failed workflow steps.""" + output = _run_example("02_llm_query_simulation.py") + + assert "All LLM Query Simulations Complete" in output + assert "ThetaForecaster predictions generated successfully" in output + assert "NaiveForecaster predictions generated successfully" in output + assert '"success": false' not in output + assert "Unknown estimator: Detrend" not in output + + +def test_pipeline_demo_example_runs_successfully(): + """The pipeline demo should continue to show a successful two-call workflow.""" + output = _run_example("04_mcp_pipeline_demo.py") + + assert "SUCCESS! LLM created and used a complete pipeline" in output + assert '"success": true' in output