sktime · biru-codeastromer · Apr 30, 2026
diff --git a/examples/sql_example.py b/examples/sql_example.py
@@ -5,6 +5,7 @@
 functionality with SQL databases (SQLite in this example).
 """
 
+import importlib.util
 import sqlite3
 import tempfile
 from pathlib import Path
@@ -13,103 +14,116 @@
 
 from sktime_mcp.runtime.executor import get_executor
 
-# Create a sample SQLite database
-db_path = Path(tempfile.gettempdir()) / "sample_sales.db"
-
-# Create sample data
-sample_data = pd.DataFrame(
-    {
-        "date": pd.date_range(start="2020-01-01", periods=200, freq="D"),
-        "sales": [100 + i + (i % 7) * 5 for i in range(200)],
-        "temperature": [20 + (i % 10) for i in range(200)],
-        "region": ["North" if i % 2 == 0 else "South" for i in range(200)],
-    }
-)
-
-# Write to SQLite
-conn = sqlite3.connect(db_path)
-sample_data.to_sql("sales", conn, if_exists="replace", index=False)
-conn.close()
-
-print(f"Created sample SQLite database: {db_path}")
-print("Table: sales")
-print(f"Rows: {len(sample_data)}")
-
-# Example 1: Load all data with SQL query
-print("\n" + "=" * 60)
-print("Example 1: Load with SQL query")
-print("=" * 60)
-
-config = {
-    "type": "sql",
-    "connection_string": f"sqlite:///{db_path}",
-    "query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'",
-    "time_column": "date",
-    "target_column": "sales",
-    "exog_columns": ["temperature"],
-}
-
-executor = get_executor()
-
-result = executor.load_data_source(config)
-print(f"\nLoad result: {result['success']}")
 
-if result["success"]:
-    metadata = result["metadata"]
-    print("\nMetadata:")
-    print(f"  Source: {metadata['source']}")
-    print(f"  Rows: {metadata['rows']}")
-    print(f"  Columns: {metadata['columns']}")
-    print(f"  Date range: {metadata['start_date']} to {metadata['end_date']}")
-
-    # Fit and predict
-    estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)})
-
-    if estimator_result["success"]:
-        predictions = executor.fit_predict(
-            estimator_result["handle"],
-            "",
-            7,
-            data_handle=result["data_handle"],
+def _has_sql_support() -> bool:
+    """Return whether the optional SQL dependency is available."""
+    return importlib.util.find_spec("sqlalchemy") is not None
+
+
+def main():
+    """Run the SQL example."""
+    # Example 3: PostgreSQL connection (commented out - requires PostgreSQL)
+    if not _has_sql_support():
+        print("Optional dependency missing: SQLAlchemy is not installed.")
+        print("Install SQL support with: pip install 'sktime-mcp[sql]'")
+        print("Skipping runnable SQLite examples and showing connection templates only.\n")
+    else:
+        # Create a sample SQLite database
+        db_path = Path(tempfile.gettempdir()) / "sample_sales.db"
+
+        # Create sample data
+        sample_data = pd.DataFrame(
+            {
+                "date": pd.date_range(start="2020-01-01", periods=200, freq="D"),
+                "sales": [100 + i + (i % 7) * 5 for i in range(200)],
+                "temperature": [20 + (i % 10) for i in range(200)],
+                "region": ["North" if i % 2 == 0 else "South" for i in range(200)],
+            }
         )
 
-        if predictions["success"]:
-            print("\nForecast for next 7 days (North region):")
-            for step, value in list(predictions["predictions"].items())[:7]:
-                print(f"  Day {step}: {value:.2f}")
-
-    executor.release_data_handle(result["data_handle"])
-
-# Example 2: Load with table name and filters
-print("\n" + "=" * 60)
-print("Example 2: Load with table name and filters")
-print("=" * 60)
-
-config2 = {
-    "type": "sql",
-    "dialect": "sqlite",
-    "database": str(db_path),
-    "table": "sales",
-    "filters": {
-        "region": "South",
-    },
-    "time_column": "date",
-    "target_column": "sales",
-}
-
-result2 = executor.load_data_source(config2)
-print(f"\nLoad result: {result2['success']}")
-
-if result2["success"]:
-    print(f"Rows loaded: {result2['metadata']['rows']}")
-    executor.release_data_handle(result2["data_handle"])
-
-# Example 3: PostgreSQL connection (commented out - requires PostgreSQL)
-print("\n" + "=" * 60)
-print("Example 3: PostgreSQL connection (template)")
-print("=" * 60)
-
-print("""
+        # Write to SQLite
+        conn = sqlite3.connect(db_path)
+        sample_data.to_sql("sales", conn, if_exists="replace", index=False)
+        conn.close()
+
+        print(f"Created sample SQLite database: {db_path}")
+        print("Table: sales")
+        print(f"Rows: {len(sample_data)}")
+
+        # Example 1: Load all data with SQL query
+        print("\n" + "=" * 60)
+        print("Example 1: Load with SQL query")
+        print("=" * 60)
+
+        config = {
+            "type": "sql",
+            "connection_string": f"sqlite:///{db_path}",
+            "query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'",
+            "time_column": "date",
+            "target_column": "sales",
+            "exog_columns": ["temperature"],
+        }
+
+        executor = get_executor()
+
+        result = executor.load_data_source(config)
+        print(f"\nLoad result: {result['success']}")
+
+        if result["success"]:
+            metadata = result["metadata"]
+            print("\nMetadata:")
+            print(f"  Source: {metadata['source']}")
+            print(f"  Rows: {metadata['rows']}")
+            print(f"  Columns: {metadata['columns']}")
+            print(f"  Date range: {metadata['start_date']} to {metadata['end_date']}")
+
+            # Fit and predict
+            estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)})
+
+            if estimator_result["success"]:
+                predictions = executor.fit_predict(
+                    estimator_result["handle"],
+                    "",
+                    7,
+                    data_handle=result["data_handle"],
+                )
+
+                if predictions["success"]:
+                    print("\nForecast for next 7 days (North region):")
+                    for step, value in list(predictions["predictions"].items())[:7]:
+                        print(f"  Day {step}: {value:.2f}")
+
+            executor.release_data_handle(result["data_handle"])
+
+        # Example 2: Load with table name and filters
+        print("\n" + "=" * 60)
+        print("Example 2: Load with table name and filters")
+        print("=" * 60)
+
+        config2 = {
+            "type": "sql",
+            "dialect": "sqlite",
+            "database": str(db_path),
+            "table": "sales",
+            "filters": {
+                "region": "South",
+            },
+            "time_column": "date",
+            "target_column": "sales",
+        }
+
+        result2 = executor.load_data_source(config2)
+        print(f"\nLoad result: {result2['success']}")
+
+        if result2["success"]:
+            print(f"Rows loaded: {result2['metadata']['rows']}")
+            executor.release_data_handle(result2["data_handle"])
+
+    print("\n" + "=" * 60)
+    print("Example 3: PostgreSQL connection (template)")
+    print("=" * 60)
+
+    print("""
 # PostgreSQL example (requires psycopg2-binary):
 config_postgres = {
     "type": "sql",
@@ -130,12 +144,12 @@
 result = executor.load_data_source(config_postgres)
 """)
 
-# Example 4: MySQL connection (commented out - requires MySQL)
-print("\n" + "=" * 60)
-print("Example 4: MySQL connection (template)")
-print("=" * 60)
+    # Example 4: MySQL connection (commented out - requires MySQL)
+    print("\n" + "=" * 60)
+    print("Example 4: MySQL connection (template)")
+    print("=" * 60)
 
-print("""
+    print("""
 # MySQL example (requires pymysql):
 config_mysql = {
     "type": "sql",
@@ -156,4 +170,8 @@
 result = executor.load_data_source(config_mysql)
 """)
 
-print("\nExample completed!")
+    print("\nExample completed!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_example_data_workflows.py b/tests/test_example_data_workflows.py
@@ -0,0 +1,65 @@
+"""Smoke tests for pipeline and own-data workflow examples."""
+
+import importlib.util
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+EXAMPLES_DIR = REPO_ROOT / "examples"
+
+
+def _run_example(name: str) -> str:
+    """Run an example script and return stdout."""
+    result = subprocess.run(
+        [sys.executable, str(EXAMPLES_DIR / name)],
+        cwd=REPO_ROOT,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    return result.stdout
+
+
+def test_pipeline_instantiation_example_runs_successfully():
+    """The pipeline-instantiation example should remain runnable."""
+    output = _run_example("03_pipeline_instantiation.py")
+
+    assert "All examples completed!" in output
+    assert "Success: True" in output
+    assert "Predictions:" in output
+    assert "Validation errors:" in output
+
+
+def test_pandas_example_runs_successfully():
+    """The in-memory own-data example should continue to work end to end."""
+    output = _run_example("pandas_example.py")
+
+    assert "Method 1: Using DataSourceRegistry" in output
+    assert "Method 2: Using Executor" in output
+    assert "Predictions: True" in output
+    assert "Cleanup:" in output
+
+
+def test_csv_example_runs_successfully():
+    """The CSV/TSV own-data example should remain runnable."""
+    output = _run_example("csv_example.py")
+
+    assert "Loading data from CSV file" in output
+    assert "Loading data from TSV file" in output
+    assert "Forecast for next 10 days:" in output
+    assert "Example completed!" in output
+
+
+def test_sql_example_handles_optional_dependency_cleanly():
+    """The SQL example should either run or explain the missing optional dependency clearly."""
+    output = _run_example("sql_example.py")
+
+    if importlib.util.find_spec("sqlalchemy") is None:
+        assert "Optional dependency missing: SQLAlchemy is not installed." in output
+        assert "Skipping runnable SQLite examples" in output
+    else:
+        assert "Example 1: Load with SQL query" in output
+        assert "Load result: True" in output
+
+    assert "Example completed!" in output