Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 60 additions & 41 deletions src/sktime_mcp/runtime/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,46 +788,54 @@ def format_data_handle(

# 3. Infer and set frequency
if auto_infer_freq:
freq = y.index.freq

if freq is None:
# Try to infer
freq = pd.infer_freq(y.index)
# Guard: integer / RangeIndex data should not go through
# datetime-specific frequency inference (fixes #390).
if isinstance(y.index, pd.RangeIndex) or not hasattr(y.index, "freq"):
changes_made["frequency_set"] = True
changes_made["frequency"] = "Integer"
else:
freq = y.index.freq

if freq is None:
# Manual inference
time_diffs = y.index.to_series().diff().dropna()
if len(time_diffs) > 0:
most_common_diff = time_diffs.mode()[0]

if most_common_diff == pd.Timedelta(days=1):
freq = "D"
elif most_common_diff == pd.Timedelta(hours=1):
freq = "h"
elif most_common_diff == pd.Timedelta(minutes=1):
freq = "min"
elif most_common_diff == pd.Timedelta(seconds=1):
freq = "s"
elif most_common_diff == pd.Timedelta(days=7):
freq = "W"
elif most_common_diff.days >= 28 and most_common_diff.days <= 31:
freq = "MS"
else:
freq = "D"

# Create complete date range
if freq:
full_range = pd.date_range(start=y.index.min(), end=y.index.max(), freq=freq)

n_gaps = len(full_range) - len(y)

y = y.reindex(full_range)
if X is not None:
X = X.reindex(full_range)

changes_made["gaps_filled"] = n_gaps
changes_made["frequency_set"] = True
changes_made["frequency"] = freq
# Try to infer
freq = pd.infer_freq(y.index)

if freq is None:
# Manual inference
time_diffs = y.index.to_series().diff().dropna()
if len(time_diffs) > 0:
most_common_diff = time_diffs.mode()[0]

if most_common_diff == pd.Timedelta(days=1):
freq = "D"
elif most_common_diff == pd.Timedelta(hours=1):
freq = "h"
elif most_common_diff == pd.Timedelta(minutes=1):
freq = "min"
elif most_common_diff == pd.Timedelta(seconds=1):
freq = "s"
elif most_common_diff == pd.Timedelta(days=7):
freq = "W"
elif most_common_diff.days >= 28 and most_common_diff.days <= 31:
freq = "MS"
else:
freq = "D"

# Create complete date range
if freq:
full_range = pd.date_range(
start=y.index.min(), end=y.index.max(), freq=freq
)

n_gaps = len(full_range) - len(y)

y = y.reindex(full_range)
if X is not None:
X = X.reindex(full_range)

changes_made["gaps_filled"] = n_gaps
changes_made["frequency_set"] = True
changes_made["frequency"] = freq

# 4. Fill missing values
if fill_missing and y.isna().any():
Expand All @@ -837,8 +845,13 @@ def format_data_handle(
X = X.ffill().bfill()
changes_made["missing_filled"] = n_missing

# 5. Set frequency explicitly on index
if hasattr(y.index, "freq") and changes_made.get("frequency"):
# 5. Set frequency explicitly on index (only for datetime-like indexes)
if (
hasattr(y.index, "freq")
and not isinstance(y.index, pd.RangeIndex)
and changes_made.get("frequency")
and changes_made["frequency"] != "Integer"
):
y.index.freq = changes_made["frequency"]
if X is not None:
X.index.freq = changes_made["frequency"]
Expand All @@ -853,7 +866,13 @@ def format_data_handle(
"metadata": {
**data_info["metadata"],
"formatted": True,
"frequency": str(y.index.freq) if y.index.freq else changes_made.get("frequency"),
"frequency": (
str(y.index.freq)
if hasattr(y.index, "freq")
and not isinstance(y.index, pd.RangeIndex)
and y.index.freq
else changes_made.get("frequency")
),
"rows": len(y),
"start_date": str(y.index.min()),
"end_date": str(y.index.max()),
Expand Down
67 changes: 67 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,73 @@ def fake_save_model(**kwargs):
assert calls["serialization_format"] == "pickle"


class TestRangeIndexFormatting:
"""Tests for RangeIndex (integer-indexed) time series formatting (issue #390)."""

def test_format_data_handle_rangeindex_no_crash(self):
"""format_data_handle should not crash on integer-indexed data."""
import pandas as pd

from sktime_mcp.runtime.executor import Executor

executor = Executor()

# Manually store a RangeIndex series as if load_data_source created it
y = pd.Series([1, 2, 3, 4, 5], name="target")
handle = "data_test_int"
executor._data_handles[handle] = {
"y": y,
"X": None,
"metadata": {"columns": ["target"]},
"validation": {},
"config": {},
}

result = executor.format_data_handle(handle, auto_infer_freq=True)

assert result["success"], f"Expected success but got: {result}"
assert result["changes_made"]["frequency"] == "Integer"
assert result["changes_made"]["frequency_set"] is True

def test_format_data_handle_rangeindex_metadata(self):
"""Metadata should contain 'Integer' as the frequency for RangeIndex data."""
import pandas as pd

from sktime_mcp.runtime.executor import Executor

executor = Executor()

y = pd.Series([10, 20, 30], name="value")
handle = "data_test_meta"
executor._data_handles[handle] = {
"y": y,
"X": None,
"metadata": {"columns": ["value"]},
"validation": {},
"config": {},
}

result = executor.format_data_handle(handle)

assert result["success"]
assert result["metadata"]["frequency"] == "Integer"

def test_load_data_source_rangeindex_end_to_end(self):
"""load_data_source with integer-indexed inline data should succeed."""
from sktime_mcp.runtime.executor import Executor

executor = Executor()

config = {
"type": "pandas",
"data": {"y": [1, 2, 3, 4]},
}

result = executor.load_data_source(config)

assert result["success"], f"Expected success but got: {result}"


class TestSearchEstimatorsLimit:
"""Tests for the limit parameter validation in search_estimators_tool."""

Expand Down