Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 118 additions & 100 deletions examples/sql_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
functionality with SQL databases (SQLite in this example).
"""

import importlib.util
import sqlite3
import tempfile
from pathlib import Path
Expand All @@ -13,103 +14,116 @@

from sktime_mcp.runtime.executor import get_executor

# Create a sample SQLite database
db_path = Path(tempfile.gettempdir()) / "sample_sales.db"

# Create sample data
sample_data = pd.DataFrame(
{
"date": pd.date_range(start="2020-01-01", periods=200, freq="D"),
"sales": [100 + i + (i % 7) * 5 for i in range(200)],
"temperature": [20 + (i % 10) for i in range(200)],
"region": ["North" if i % 2 == 0 else "South" for i in range(200)],
}
)

# Write to SQLite
conn = sqlite3.connect(db_path)
sample_data.to_sql("sales", conn, if_exists="replace", index=False)
conn.close()

print(f"Created sample SQLite database: {db_path}")
print("Table: sales")
print(f"Rows: {len(sample_data)}")

# Example 1: Load all data with SQL query
print("\n" + "=" * 60)
print("Example 1: Load with SQL query")
print("=" * 60)

config = {
"type": "sql",
"connection_string": f"sqlite:///{db_path}",
"query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'",
"time_column": "date",
"target_column": "sales",
"exog_columns": ["temperature"],
}

executor = get_executor()

result = executor.load_data_source(config)
print(f"\nLoad result: {result['success']}")

if result["success"]:
metadata = result["metadata"]
print("\nMetadata:")
print(f" Source: {metadata['source']}")
print(f" Rows: {metadata['rows']}")
print(f" Columns: {metadata['columns']}")
print(f" Date range: {metadata['start_date']} to {metadata['end_date']}")

# Fit and predict
estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)})

if estimator_result["success"]:
predictions = executor.fit_predict(
estimator_result["handle"],
"",
7,
data_handle=result["data_handle"],
def _has_sql_support() -> bool:
"""Return whether the optional SQL dependency is available."""
return importlib.util.find_spec("sqlalchemy") is not None


def main():
"""Run the SQL example."""
# Example 3: PostgreSQL connection (commented out - requires PostgreSQL)
if not _has_sql_support():
print("Optional dependency missing: SQLAlchemy is not installed.")
print("Install SQL support with: pip install 'sktime-mcp[sql]'")
print("Skipping runnable SQLite examples and showing connection templates only.\n")
else:
# Create a sample SQLite database
db_path = Path(tempfile.gettempdir()) / "sample_sales.db"

# Create sample data
sample_data = pd.DataFrame(
{
"date": pd.date_range(start="2020-01-01", periods=200, freq="D"),
"sales": [100 + i + (i % 7) * 5 for i in range(200)],
"temperature": [20 + (i % 10) for i in range(200)],
"region": ["North" if i % 2 == 0 else "South" for i in range(200)],
}
)

if predictions["success"]:
print("\nForecast for next 7 days (North region):")
for step, value in list(predictions["predictions"].items())[:7]:
print(f" Day {step}: {value:.2f}")

executor.release_data_handle(result["data_handle"])

# Example 2: Load with table name and filters
print("\n" + "=" * 60)
print("Example 2: Load with table name and filters")
print("=" * 60)

config2 = {
"type": "sql",
"dialect": "sqlite",
"database": str(db_path),
"table": "sales",
"filters": {
"region": "South",
},
"time_column": "date",
"target_column": "sales",
}

result2 = executor.load_data_source(config2)
print(f"\nLoad result: {result2['success']}")

if result2["success"]:
print(f"Rows loaded: {result2['metadata']['rows']}")
executor.release_data_handle(result2["data_handle"])

# Example 3: PostgreSQL connection (commented out - requires PostgreSQL)
print("\n" + "=" * 60)
print("Example 3: PostgreSQL connection (template)")
print("=" * 60)

print("""
# Write to SQLite
conn = sqlite3.connect(db_path)
sample_data.to_sql("sales", conn, if_exists="replace", index=False)
conn.close()

print(f"Created sample SQLite database: {db_path}")
print("Table: sales")
print(f"Rows: {len(sample_data)}")

# Example 1: Load all data with SQL query
print("\n" + "=" * 60)
print("Example 1: Load with SQL query")
print("=" * 60)

config = {
"type": "sql",
"connection_string": f"sqlite:///{db_path}",
"query": "SELECT date, sales, temperature FROM sales WHERE region = 'North'",
"time_column": "date",
"target_column": "sales",
"exog_columns": ["temperature"],
}

executor = get_executor()

result = executor.load_data_source(config)
print(f"\nLoad result: {result['success']}")

if result["success"]:
metadata = result["metadata"]
print("\nMetadata:")
print(f" Source: {metadata['source']}")
print(f" Rows: {metadata['rows']}")
print(f" Columns: {metadata['columns']}")
print(f" Date range: {metadata['start_date']} to {metadata['end_date']}")

# Fit and predict
estimator_result = executor.instantiate("ARIMA", {"order": (1, 1, 1)})

if estimator_result["success"]:
predictions = executor.fit_predict(
estimator_result["handle"],
"",
7,
data_handle=result["data_handle"],
)

if predictions["success"]:
print("\nForecast for next 7 days (North region):")
for step, value in list(predictions["predictions"].items())[:7]:
print(f" Day {step}: {value:.2f}")

executor.release_data_handle(result["data_handle"])

# Example 2: Load with table name and filters
print("\n" + "=" * 60)
print("Example 2: Load with table name and filters")
print("=" * 60)

config2 = {
"type": "sql",
"dialect": "sqlite",
"database": str(db_path),
"table": "sales",
"filters": {
"region": "South",
},
"time_column": "date",
"target_column": "sales",
}

result2 = executor.load_data_source(config2)
print(f"\nLoad result: {result2['success']}")

if result2["success"]:
print(f"Rows loaded: {result2['metadata']['rows']}")
executor.release_data_handle(result2["data_handle"])

print("\n" + "=" * 60)
print("Example 3: PostgreSQL connection (template)")
print("=" * 60)

print("""
# PostgreSQL example (requires psycopg2-binary):
config_postgres = {
"type": "sql",
Expand All @@ -130,12 +144,12 @@
result = executor.load_data_source(config_postgres)
""")

# Example 4: MySQL connection (commented out - requires MySQL)
print("\n" + "=" * 60)
print("Example 4: MySQL connection (template)")
print("=" * 60)
# Example 4: MySQL connection (commented out - requires MySQL)
print("\n" + "=" * 60)
print("Example 4: MySQL connection (template)")
print("=" * 60)

print("""
print("""
# MySQL example (requires pymysql):
config_mysql = {
"type": "sql",
Expand All @@ -156,4 +170,8 @@
result = executor.load_data_source(config_mysql)
""")

print("\nExample completed!")
print("\nExample completed!")


if __name__ == "__main__":
main()
65 changes: 65 additions & 0 deletions tests/test_example_data_workflows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Smoke tests for pipeline and own-data workflow examples."""

import importlib.util
import subprocess
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[1]
EXAMPLES_DIR = REPO_ROOT / "examples"


def _run_example(name: str) -> str:
"""Run an example script and return stdout."""
result = subprocess.run(
[sys.executable, str(EXAMPLES_DIR / name)],
cwd=REPO_ROOT,
capture_output=True,
text=True,
check=True,
)
return result.stdout


def test_pipeline_instantiation_example_runs_successfully():
"""The pipeline-instantiation example should remain runnable."""
output = _run_example("03_pipeline_instantiation.py")

assert "All examples completed!" in output
assert "Success: True" in output
assert "Predictions:" in output
assert "Validation errors:" in output


def test_pandas_example_runs_successfully():
"""The in-memory own-data example should continue to work end to end."""
output = _run_example("pandas_example.py")

assert "Method 1: Using DataSourceRegistry" in output
assert "Method 2: Using Executor" in output
assert "Predictions: True" in output
assert "Cleanup:" in output


def test_csv_example_runs_successfully():
"""The CSV/TSV own-data example should remain runnable."""
output = _run_example("csv_example.py")

assert "Loading data from CSV file" in output
assert "Loading data from TSV file" in output
assert "Forecast for next 10 days:" in output
assert "Example completed!" in output


def test_sql_example_handles_optional_dependency_cleanly():
"""The SQL example should either run or explain the missing optional dependency clearly."""
output = _run_example("sql_example.py")

if importlib.util.find_spec("sqlalchemy") is None:
assert "Optional dependency missing: SQLAlchemy is not installed." in output
assert "Skipping runnable SQLite examples" in output
else:
assert "Example 1: Load with SQL query" in output
assert "Load result: True" in output

assert "Example completed!" in output
Loading