sktime · Shashankss1205 · Jun 1, 2026 · Jun 4, 2026 · Jun 11, 2026
diff --git a/README.md b/README.md
@@ -584,38 +584,76 @@ Load custom files, SQL database queries, URLs, or inline JSON into the server as
   }
   ```
 
-#### 18. `save_data`
-Persist an in-memory `data_handle` (such as predictions or transformed series) back to disk.
+#### 18. `inspect_data`
+Inspect a loaded data handle and return rich metadata (mtype, scitype, shape, columns, dtypes, frequency, cutoff, missing counts, head preview, summary statistics).
 * **Arguments:**
-  * `data_handle` (`str`, required): In-memory data handle to save.
-  * `path` (`str`, required): Local filesystem path where the file will be saved.
-  * `format` (`str`, optional): Output format (inferred from path extension if omitted: `"csv"`, `"parquet"`, `"json"`).
+  * `data_handle` (`str`, required): Handle to inspect.
 * **Returns:**
   ```json
   {
     "success": true,
-    "saved_path": "/home/user/forecasts.csv",
-    "message": "Data saved successfully."
+    "data_handle": "data_abc123",
+    "mtype": "pd.Series",
+    "scitype": "Series",
+    "shape": [60],
+    "cutoff": "2024-12-01",
+    "n_missing": 0,
+    "head": {},
+    "summary_stats": {}
+  }
+  ```
+
+#### 19. `split_data`
+Split a time series handle into temporal train/test sets, returning two new handles.
+* **Arguments:**
+  * `data_handle` (`str`, required): Handle to split.
+  * `test_size` (`float`, optional): Fraction in (0, 1) to hold out. Mutually exclusive with `fh`.
+  * `fh` (`int | list[int]`, optional): Forecast horizon — integer steps or list of relative indices (uses `max(fh)` steps).
+* **Returns:**
+  ```json
+  {
+    "success": true,
+    "train_handle": "data_train123",
+    "test_handle": "data_test456",
+    "cutoff": "2024-06-01",
+    "train_size": 48,
+    "n_test": 12
   }
   ```
 
-#### 19. `format_time_series`
-Clean, fill missing values, deduplicate, and standardize loaded time series data.
+#### 20. `transform_data`
+Transform a data handle — format (auto-fix frequency/dupes/NaN) or convert mtype.
 * **Arguments:**
-  * `data_handle` (`str`, required): Target data handle.
-  * `auto_infer_freq` (`bool`, optional, default=`true`): Re-infer time delta frequency.
-  * `fill_missing` (`bool`, optional, default=`true`): Interpolate missing values using forward/backward fills.
-  * `remove_duplicates` (`bool`, optional, default=`true`): Deduplicate timestamps.
+  * `data_handle` (`str`, required): Handle to transform.
+  * `action` (`str`, optional, default=`"format"`): `"format"` or `"convert"`.
+  * `auto_infer_freq`, `fill_missing`, `remove_duplicates` (`bool`, optional): Format-mode options.
+  * `to_mtype` (`str`, optional): Required when `action="convert"` (e.g. `"pd.DataFrame"`).
 * **Returns:**
   ```json
   {
     "success": true,
     "data_handle": "data_abc123",
-    "changes_applied": ["inferred frequency: M", "filled 3 missing values"]
+    "changes_applied": ["Inferred and set frequency to 'MS'"]
+  }
+  ```
+
+#### 21. `save_data`
+Persist an in-memory `data_handle` (target series and exogenous features) to disk.
+* **Arguments:**
+  * `data_handle` (`str`, required): In-memory data handle to save.
+  * `path` (`str`, required): Local filesystem path where the file will be saved.
+  * `format` (`str`, optional, default=`"csv"`): Output format — `"csv"`, `"parquet"`, or `"json"`.
+* **Returns:**
+  ```json
+  {
+    "success": true,
+    "saved_path": "/home/user/forecasts.csv",
+    "format": "csv",
+    "rows": 60
   }
   ```
 
-#### 20. `release_data_handle`
+#### 22. `release_data_handle`
 Free a data handle and its contents from server memory.
 * **Arguments:**
   * `data_handle` (`str`, required): Handle ID to release.
@@ -633,7 +671,7 @@ Free a data handle and its contents from server memory.
 
 These tools manage the serialization of estimator instances and generation of production-ready source code.
 
-#### 21. `save_model`
+#### 23. `save_model`
 Serialize an estimator blueprint or fitted model handle to disk using sktime-MLflow integration.
 * **Arguments:**
   * `estimator_handle` (`str`, required): Estimator or pipeline handle to save.
@@ -649,7 +687,7 @@ Serialize an estimator blueprint or fitted model handle to disk using sktime-MLf
   }
   ```
 
-#### 22. `load_model`
+#### 24. `load_model`
 Reload a serialized blueprint or fitted model back into an active `estimator_handle`.
 * **Arguments:**
   * `path` (`str`, required): Filesystem path to the model directory.
@@ -662,7 +700,7 @@ Reload a serialized blueprint or fitted model back into an active `estimator_han
   }
   ```
 
-#### 23. `export_code`
+#### 25. `export_code`
 Generate standalone, executable Python code to reproduce an estimator's structure and execution.
 * **Arguments:**
   * `handle` (`str`, required): Handle ID of the estimator/pipeline.

diff --git a/src/sktime_mcp/server.py b/src/sktime_mcp/server.py
@@ -51,7 +51,7 @@
     fit_predict_async_tool,
     fit_predict_tool,
 )
-from sktime_mcp.tools.format_tools import format_time_series_tool
+from sktime_mcp.tools.inspect_data import inspect_data_tool
 from sktime_mcp.tools.instantiate import (
     instantiate_estimator_tool,
     instantiate_pipeline_tool,
@@ -69,7 +69,10 @@
     get_available_tags,
     list_estimators_tool,
 )
+from sktime_mcp.tools.save_data import save_data_tool
 from sktime_mcp.tools.save_model import save_model_tool
+from sktime_mcp.tools.split_data import split_data_tool
+from sktime_mcp.tools.transform_data import transform_data_tool
 
 
 # ---------------------------------------------------------------------------
@@ -519,34 +522,152 @@ async def list_tools() -> list[Tool]:
             },
         ),
         Tool(
-            name="format_time_series",
-            description="Automatically format time series data (frequency, duplicates, missing values)",
+            name="inspect_data",
+            description=(
+                "Inspect a loaded data handle and return rich metadata for understanding "
+                "the series before modelling. Returns mtype, scitype, shape, column names, "
+                "dtypes, index level names, inferred frequency, cutoff (last training "
+                "timestamp), total missing-value count, a 5-row head preview, and "
+                "per-column summary statistics. Works on handles from load_data_source, "
+                "split_data, or transform_data. Does not modify the data."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "data_handle": {
+                        "type": "string",
+                        "description": (
+                            "Data handle ID to inspect (from load_data_source, split_data, "
+                            "or transform_data)."
+                        ),
+                    },
+                },
+                "required": ["data_handle"],
+            },
+        ),
+        Tool(
+            name="split_data",
+            description=(
+                "Split a time series data handle into temporal train and test sets, "
+                "registering both halves as new data handles. Provide exactly one of "
+                "test_size (fraction in (0, 1)) or fh (forecast horizon). fh may be an "
+                "integer (hold out that many final steps) or a list of relative horizon "
+                "indices (hold out max(fh) final steps). Returns train_handle, "
+                "test_handle, cutoff timestamp, train_size, and n_test."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "data_handle": {
+                        "type": "string",
+                        "description": "Data handle ID to split (from load_data_source or transform_data).",
+                    },
+                    "test_size": {
+                        "type": "number",
+                        "description": (
+                            "Fraction of observations to hold out for the test set, "
+                            "exclusive range (0.0, 1.0). Mutually exclusive with fh."
+                        ),
+                    },
+                    "fh": {
+                        "description": (
+                            "Forecast horizon for the test window. Integer: hold out that "
+                            "many final time steps. List of ints: hold out max(fh) final "
+                            "steps (e.g. fh=[1,5,10] reserves 10 steps). "
+                            "Mutually exclusive with test_size."
+                        ),
+                    },
+                },
+                "required": ["data_handle"],
+            },
+        ),
+        Tool(
+            name="transform_data",
+            description=(
+                "Transform a loaded data handle and return a new handle. "
+                "action='format' (default): auto-fix common time series issues — "
+                "infer/set frequency, remove duplicate timestamps, fill index gaps, "
+                "and forward/backward-fill missing values; returns changes_applied. "
+                "action='convert': convert y to a different sktime mtype via convert_to() "
+                "(requires to_mtype, e.g. 'pd.DataFrame', 'pd.Series', 'np.ndarray'). "
+                "Replaces the legacy format_time_series tool."
+            ),
             inputSchema={
                 "type": "object",
                 "properties": {
                     "data_handle": {
                         "type": "string",
-                        "description": "Handle from load_data_source",
+                        "description": "Data handle ID to transform.",
+                    },
+                    "action": {
+                        "type": "string",
+                        "description": (
+                            "Transformation to apply: 'format' (default) or 'convert'."
+                        ),
+                        "enum": ["format", "convert"],
+                        "default": "format",
                     },
                     "auto_infer_freq": {
                         "type": "boolean",
-                        "description": "Automatically infer and set frequency (default: True)",
+                        "description": "(format only) Infer and set DatetimeIndex frequency (default: true).",
                         "default": True,
                     },
                     "fill_missing": {
                         "type": "boolean",
-                        "description": "Fill missing values with forward/backward fill (default: True)",
+                        "description": "(format only) Forward/backward fill missing values (default: true).",
                         "default": True,
                     },
                     "remove_duplicates": {
                         "type": "boolean",
-                        "description": "Remove duplicate timestamps (default: True)",
+                        "description": "(format only) Drop duplicate timestamps, keeping first (default: true).",
                         "default": True,
                     },
+                    "to_mtype": {
+                        "type": "string",
+                        "description": (
+                            "(convert only, required) Target sktime mtype string, "
+                            "e.g. 'pd.DataFrame', 'pd.Series', 'np.ndarray'."
+                        ),
+                    },
                 },
                 "required": ["data_handle"],
             },
         ),
+        Tool(
+            name="save_data",
+            description=(
+                "Persist the target series (y) and any exogenous features (X) behind a "
+                "data handle to a local file. Combines y and X into one table. Creates "
+                "parent directories as needed. Supported formats: csv (default, writes "
+                "index as first column), parquet, json (records orient, ISO dates)."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "data_handle": {
+                        "type": "string",
+                        "description": (
+                            "Data handle ID to export (from load_data_source, split_data, "
+                            "or transform_data)."
+                        ),
+                    },
+                    "path": {
+                        "type": "string",
+                        "description": (
+                            "Destination file path. Format is controlled by the format "
+                            "argument, not the file extension."
+                        ),
+                    },
+                    "format": {
+                        "type": "string",
+                        "description": "Output format: csv (default), parquet, or json.",
+                        "enum": ["csv", "parquet", "json"],
+                        "default": "csv",
+                    },
+                },
+                "required": ["data_handle", "path"],
+            },
+        ),
         # -- Export / Persistence --------------------------------------------
         Tool(
             name="export_code",
@@ -840,12 +961,31 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
         elif name == "release_data_handle":
             result = release_data_handle_tool(arguments["data_handle"])
 
-        elif name == "format_time_series":
-            result = format_time_series_tool(
-                arguments["data_handle"],
-                arguments.get("auto_infer_freq", True),
-                arguments.get("fill_missing", True),
-                arguments.get("remove_duplicates", True),
+        elif name == "inspect_data":
+            result = inspect_data_tool(arguments["data_handle"])
+
+        elif name == "split_data":
+            result = split_data_tool(
+                data_handle=arguments["data_handle"],
+                test_size=arguments.get("test_size"),
+                fh=arguments.get("fh"),
+            )
+
+        elif name == "transform_data":
+            result = transform_data_tool(
+                data_handle=arguments["data_handle"],
+                action=arguments.get("action", "format"),
+                auto_infer_freq=arguments.get("auto_infer_freq", True),
+                fill_missing=arguments.get("fill_missing", True),
+                remove_duplicates=arguments.get("remove_duplicates", True),
+                to_mtype=arguments.get("to_mtype"),
+            )
+
+        elif name == "save_data":
+            result = save_data_tool(
+                data_handle=arguments["data_handle"],
+                path=arguments["path"],
+                format=arguments.get("format", "csv"),
             )
 
         elif name == "auto_format_on_load":

diff --git a/src/sktime_mcp/tools/__init__.py b/src/sktime_mcp/tools/__init__.py
@@ -13,6 +13,7 @@
     fit_predict_tool,
 )
 from sktime_mcp.tools.format_tools import format_time_series_tool
+from sktime_mcp.tools.inspect_data import inspect_data_tool
 from sktime_mcp.tools.instantiate import (
     instantiate_estimator_tool,
     instantiate_pipeline_tool,
@@ -30,7 +31,10 @@
     get_available_tags,
     list_estimators_tool,
 )
+from sktime_mcp.tools.save_data import save_data_tool
 from sktime_mcp.tools.save_model import save_model_tool
+from sktime_mcp.tools.split_data import split_data_tool
+from sktime_mcp.tools.transform_data import transform_data_tool
 
 __all__ = [
     "list_estimators_tool",
@@ -49,6 +53,10 @@
     "release_data_handle_tool",
     "list_available_data_tool",
     "format_time_series_tool",
+    "inspect_data_tool",
+    "split_data_tool",
+    "transform_data_tool",
+    "save_data_tool",
     "export_code_tool",
     "save_model_tool",
     "check_job_status_tool",