diff --git a/examples/sql_example.py b/examples/sql_example.py index 82a0275e..6e9bafd1 100644 --- a/examples/sql_example.py +++ b/examples/sql_example.py @@ -5,6 +5,7 @@ functionality with SQL databases (SQLite in this example). """ +import importlib.util import sqlite3 import tempfile from pathlib import Path @@ -13,103 +14,116 @@ from sktime_mcp.runtime.executor import get_executor -# Create a sample SQLite database -db_path = Path(tempfile.gettempdir()) / "sample_sales.db" - -# Create sample data -sample_data = pd.DataFrame( - { - "date": pd.date_range(start="2020-01-01", periods=200, freq="D"), - "sales": [100 + i + (i % 7) * 5 for i in range(200)], - "temperature": [20 + (i % 10) for i in range(200)], - "region": ["North" if i % 2 == 0 else "South" for i in range(200)], - } -) - -# Write to SQLite -conn = sqlite3.connect(db_path) -sample_data.to_sql("sales", conn, if_exists="replace", index=False) -conn.close() - -print(f"Created sample SQLite database: {db_path}") -print("Table: sales") -print(f"Rows: {len(sample_data)}") - -# Example 1: Load all data with SQL query -print("\n" + "=" * 60) -print("Example 1: Load with SQL query") -print("=" * 60) - -config = { - "type": "sql", - "connection_string": f"sqlite:///{db_path}", - "query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'", - "time_column": "date", - "target_column": "sales", - "exog_columns": ["temperature"], -} - -executor = get_executor() - -result = executor.load_data_source(config) -print(f"\nLoad result: {result['success']}") -if result["success"]: - metadata = result["metadata"] - print("\nMetadata:") - print(f" Source: {metadata['source']}") - print(f" Rows: {metadata['rows']}") - print(f" Columns: {metadata['columns']}") - print(f" Date range: {metadata['start_date']} to {metadata['end_date']}") - - # Fit and predict - estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)}) - - if estimator_result["success"]: - predictions = executor.fit_predict( - estimator_result["handle"], - "", - 7, - data_handle=result["data_handle"], +def _has_sql_support() -> bool: + """Return whether the optional SQL dependency is available.""" + return importlib.util.find_spec("sqlalchemy") is not None + + +def main(): + """Run the SQL example.""" + # Example 3: PostgreSQL connection (commented out - requires PostgreSQL) + if not _has_sql_support(): + print("Optional dependency missing: SQLAlchemy is not installed.") + print("Install SQL support with: pip install 'sktime-mcp[sql]'") + print("Skipping runnable SQLite examples and showing connection templates only.\n") + else: + # Create a sample SQLite database + db_path = Path(tempfile.gettempdir()) / "sample_sales.db" + + # Create sample data + sample_data = pd.DataFrame( + { + "date": pd.date_range(start="2020-01-01", periods=200, freq="D"), + "sales": [100 + i + (i % 7) * 5 for i in range(200)], + "temperature": [20 + (i % 10) for i in range(200)], + "region": ["North" if i % 2 == 0 else "South" for i in range(200)], + } ) - if predictions["success"]: - print("\nForecast for next 7 days (North region):") - for step, value in list(predictions["predictions"].items())[:7]: - print(f" Day {step}: {value:.2f}") - - executor.release_data_handle(result["data_handle"]) - -# Example 2: Load with table name and filters -print("\n" + "=" * 60) -print("Example 2: Load with table name and filters") -print("=" * 60) - -config2 = { - "type": "sql", - "dialect": "sqlite", - "database": str(db_path), - "table": "sales", - "filters": { - "region": "South", - }, - "time_column": "date", - "target_column": "sales", -} - -result2 = executor.load_data_source(config2) -print(f"\nLoad result: {result2['success']}") - -if result2["success"]: - print(f"Rows loaded: {result2['metadata']['rows']}") - executor.release_data_handle(result2["data_handle"]) - -# Example 3: PostgreSQL connection (commented out - requires PostgreSQL) -print("\n" + "=" * 60) -print("Example 3: PostgreSQL connection (template)") -print("=" * 60) - -print(""" + # Write to SQLite + conn = sqlite3.connect(db_path) + sample_data.to_sql("sales", conn, if_exists="replace", index=False) + conn.close() + + print(f"Created sample SQLite database: {db_path}") + print("Table: sales") + print(f"Rows: {len(sample_data)}") + + # Example 1: Load all data with SQL query + print("\n" + "=" * 60) + print("Example 1: Load with SQL query") + print("=" * 60) + + config = { + "type": "sql", + "connection_string": f"sqlite:///{db_path}", + "query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'", + "time_column": "date", + "target_column": "sales", + "exog_columns": ["temperature"], + } + + executor = get_executor() + + result = executor.load_data_source(config) + print(f"\nLoad result: {result['success']}") + + if result["success"]: + metadata = result["metadata"] + print("\nMetadata:") + print(f" Source: {metadata['source']}") + print(f" Rows: {metadata['rows']}") + print(f" Columns: {metadata['columns']}") + print(f" Date range: {metadata['start_date']} to {metadata['end_date']}") + + # Fit and predict + estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)}) + + if estimator_result["success"]: + predictions = executor.fit_predict( + estimator_result["handle"], + "", + 7, + data_handle=result["data_handle"], + ) + + if predictions["success"]: + print("\nForecast for next 7 days (North region):") + for step, value in list(predictions["predictions"].items())[:7]: + print(f" Day {step}: {value:.2f}") + + executor.release_data_handle(result["data_handle"]) + + # Example 2: Load with table name and filters + print("\n" + "=" * 60) + print("Example 2: Load with table name and filters") + print("=" * 60) + + config2 = { + "type": "sql", + "dialect": "sqlite", + "database": str(db_path), + "table": "sales", + "filters": { + "region": "South", + }, + "time_column": "date", + "target_column": "sales", + } + + result2 = executor.load_data_source(config2) + print(f"\nLoad result: {result2['success']}") + + if result2["success"]: + print(f"Rows loaded: {result2['metadata']['rows']}") + executor.release_data_handle(result2["data_handle"]) + + print("\n" + "=" * 60) + print("Example 3: PostgreSQL connection (template)") + print("=" * 60) + + print(""" # PostgreSQL example (requires psycopg2-binary): config_postgres = { "type": "sql", @@ -130,12 +144,12 @@ result = executor.load_data_source(config_postgres) """) -# Example 4: MySQL connection (commented out - requires MySQL) -print("\n" + "=" * 60) -print("Example 4: MySQL connection (template)") -print("=" * 60) + # Example 4: MySQL connection (commented out - requires MySQL) + print("\n" + "=" * 60) + print("Example 4: MySQL connection (template)") + print("=" * 60) -print(""" + print(""" # MySQL example (requires pymysql): config_mysql = { "type": "sql", @@ -156,4 +170,8 @@ result = executor.load_data_source(config_mysql) """) -print("\nExample completed!") + print("\nExample completed!") + + +if __name__ == "__main__": + main() diff --git a/tests/test_example_data_workflows.py b/tests/test_example_data_workflows.py new file mode 100644 index 00000000..a4857610 --- /dev/null +++ b/tests/test_example_data_workflows.py @@ -0,0 +1,65 @@ +"""Smoke tests for pipeline and own-data workflow examples.""" + +import importlib.util +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +EXAMPLES_DIR = REPO_ROOT / "examples" + + +def _run_example(name: str) -> str: + """Run an example script and return stdout.""" + result = subprocess.run( + [sys.executable, str(EXAMPLES_DIR / name)], + cwd=REPO_ROOT, + capture_output=True, + text=True, + check=True, + ) + return result.stdout + + +def test_pipeline_instantiation_example_runs_successfully(): + """The pipeline-instantiation example should remain runnable.""" + output = _run_example("03_pipeline_instantiation.py") + + assert "All examples completed!" in output + assert "Success: True" in output + assert "Predictions:" in output + assert "Validation errors:" in output + + +def test_pandas_example_runs_successfully(): + """The in-memory own-data example should continue to work end to end.""" + output = _run_example("pandas_example.py") + + assert "Method 1: Using DataSourceRegistry" in output + assert "Method 2: Using Executor" in output + assert "Predictions: True" in output + assert "Cleanup:" in output + + +def test_csv_example_runs_successfully(): + """The CSV/TSV own-data example should remain runnable.""" + output = _run_example("csv_example.py") + + assert "Loading data from CSV file" in output + assert "Loading data from TSV file" in output + assert "Forecast for next 10 days:" in output + assert "Example completed!" in output + + +def test_sql_example_handles_optional_dependency_cleanly(): + """The SQL example should either run or explain the missing optional dependency clearly.""" + output = _run_example("sql_example.py") + + if importlib.util.find_spec("sqlalchemy") is None: + assert "Optional dependency missing: SQLAlchemy is not installed." in output + assert "Skipping runnable SQLite examples" in output + else: + assert "Example 1: Load with SQL query" in output + assert "Load result: True" in output + + assert "Example completed!" in output