From eef103cfd59f967f0e0649748df38fb7f5c24d2a Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Fri, 7 Feb 2025 18:40:20 +0100
Subject: [PATCH 01/20] remove select_features and make
 RemoveEmptyFeaturesEncoderStep passthrough

---
 src/tabpfn/misc/debug_versions.py |  1 +
 src/tabpfn/model/encoders.py      | 49 +++++--------------------------
 2 files changed, 9 insertions(+), 41 deletions(-)

diff --git a/src/tabpfn/misc/debug_versions.py b/src/tabpfn/misc/debug_versions.py
index 320a239b9..0f93230f9 100644
--- a/src/tabpfn/misc/debug_versions.py
+++ b/src/tabpfn/misc/debug_versions.py
@@ -1,4 +1,5 @@
 # ruff: noqa
+# mypy: ignore-errors
 """This file is taken from PyTorch and modified to work with TabPFN, also
 inspired from sklearn's show_versions function. This collects useful debug
 information that can be used to report issues.
diff --git a/src/tabpfn/model/encoders.py b/src/tabpfn/model/encoders.py
index 3441bdbcb..6a3de8138 100644
--- a/src/tabpfn/model/encoders.py
+++ b/src/tabpfn/model/encoders.py
@@ -105,44 +105,6 @@ def normalize_data(
     return data
 
 
-def select_features(x: torch.Tensor, sel: torch.Tensor) -> torch.Tensor:
-    """Select features from the input tensor based on the selection mask,
-    and arrange them contiguously in the last dimension.
-    If batch size is bigger than 1, we pad the features with zeros to make the number of features fixed.
-
-    Args:
-        x: The input tensor of shape (sequence_length, batch_size, total_features)
-        sel: The boolean selection mask indicating which features to keep of shape (batch_size, total_features)
-
-    Returns:
-        The tensor with selected features.
-        The shape is (sequence_length, batch_size, number_of_selected_features) if batch_size is 1.
-        The shape is (sequence_length, batch_size, total_features) if batch_size is greater than 1.
-    """
-    B, total_features = sel.shape
-    sequence_length = x.shape[0]
-
-    # If B == 1, we don't need to append zeros, as the number of features don't need to be fixed.
-    if B == 1:
-        return x[:, :, sel[0]]
-
-    new_x = torch.zeros(
-        (sequence_length, B, total_features),
-        device=x.device,
-        dtype=x.dtype,
-    )
-
-    # For each batch, compute the number of selected features.
-    sel_counts = sel.sum(dim=-1)  # shape: (B,)
-
-    for b in range(B):
-        s = int(sel_counts[b])
-        if s > 0:
-            new_x[:, b, :s] = x[:, b, sel[b]]
-
-    return new_x
-
-
 def remove_outliers(
     X: torch.Tensor,
     n_sigma: float = 4,
@@ -507,7 +469,11 @@ def _transform(
 
 
 class RemoveEmptyFeaturesEncoderStep(SeqEncStep):
-    """Encoder step to remove empty (constant) features."""
+    """Encoder step to remove empty (constant) features.
+    Was changed to NOT DO ANYTHING, the removal of empty features now
+    done elsewhere, but the saved model still needs this encoder step.
+    TODO: REMOVE.
+    """
 
     def __init__(self, **kwargs: Any):
         """Initialize the RemoveEmptyFeaturesEncoderStep.
@@ -525,7 +491,7 @@ def _fit(self, x: torch.Tensor, **kwargs: Any) -> None:
             x: The input tensor.
             **kwargs: Additional keyword arguments (unused).
         """
-        self.sel = (x[1:] == x[0]).sum(0) != (x.shape[0] - 1)
+        # self.sel = (x[1:] == x[0]).sum(0) != (x.shape[0] - 1)
 
     def _transform(self, x: torch.Tensor, **kwargs: Any) -> tuple[torch.Tensor]:
         """Remove empty features from the input tensor.
@@ -537,7 +503,8 @@ def _transform(self, x: torch.Tensor, **kwargs: Any) -> tuple[torch.Tensor]:
         Returns:
             A tuple containing the transformed tensor with empty features removed.
         """
-        return (select_features(x, self.sel),)
+        # return (select_features(x, self.sel),)
+        return (x,)
 
 
 class RemoveDuplicateFeaturesEncoderStep(SeqEncStep):

From 91034b2f46ad87f087784ff9772f38ee5c5a3bfc Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Fri, 7 Feb 2025 19:13:53 +0100
Subject: [PATCH 02/20] remove cat_ind argument from forward (TODO check it's
 not used)

---
 src/tabpfn/inference.py         |  7 ++-----
 src/tabpfn/model/transformer.py | 31 -------------------------------
 2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py
index c88461f1e..4386fb1c2 100644
--- a/src/tabpfn/inference.py
+++ b/src/tabpfn/inference.py
@@ -161,7 +161,7 @@ def iter_outputs(
         if self.force_inference_dtype is not None:
             self.model = self.model.type(self.force_inference_dtype)
 
-        for config, preprocessor, X_train, y_train, cat_ix in itr:
+        for config, preprocessor, X_train, y_train, _cat_ix in itr:
             X_train = torch.as_tensor(X_train, dtype=torch.float32, device=device)  # noqa: PLW2901
 
             X_test = preprocessor.transform(X).X
@@ -193,7 +193,6 @@ def iter_outputs(
                 output = self.model(
                     *(style, X_full, y_train),
                     only_return_standard_out=only_return_standard_out,
-                    categorical_inds=cat_ix,
                     single_eval_pos=len(y_train),
                 )
 
@@ -291,7 +290,7 @@ def iter_outputs(
         self.model = self.model.to(device)
         if self.force_inference_dtype is not None:
             self.model = self.model.type(self.force_inference_dtype)
-        for preprocessor, X_train, y_train, config, cat_ix in zip(
+        for preprocessor, X_train, y_train, config, _cat_ix in zip(
             self.preprocessors,
             self.X_trains,
             self.y_trains,
@@ -332,7 +331,6 @@ def iter_outputs(
                 output = self.model(
                     *(style, X_full, y_train),
                     only_return_standard_out=only_return_standard_out,
-                    categorical_inds=cat_ix,
                     single_eval_pos=len(y_train),
                 )
 
@@ -497,7 +495,6 @@ def iter_outputs(
                 output = model(
                     *(style, X_test, None),
                     only_return_standard_out=only_return_standard_out,
-                    categorical_inds=cat_ix,
                     single_eval_pos=None,
                 )
 
diff --git a/src/tabpfn/model/transformer.py b/src/tabpfn/model/transformer.py
index 181feb3a4..6b30db91d 100644
--- a/src/tabpfn/model/transformer.py
+++ b/src/tabpfn/model/transformer.py
@@ -373,8 +373,6 @@ def forward(self, *args: Any, **kwargs: Any) -> dict[str, torch.Tensor]:  # noqa
                 Whether to only return the standard output.
             data_dags: Any
                 The data DAGs for each example.
-            categorical_inds: list[int]
-                The indices of categorical features.
             freeze_kv: bool
                 Whether to freeze the key and value weights.
 
@@ -388,7 +386,6 @@ def forward(self, *args: Any, **kwargs: Any) -> dict[str, torch.Tensor]:  # noqa
             "only_return_standard_out",
             "style",
             "data_dags",
-            "categorical_inds",
             "freeze_kv",
             "train_x",
             "train_y",
@@ -428,7 +425,6 @@ def _forward(  # noqa: PLR0912, C901
         only_return_standard_out: bool = True,
         style: torch.Tensor | None = None,
         data_dags: list[Any] | None = None,
-        categorical_inds: list[int] | None = None,
         half_layers: bool = False,
     ) -> Any | dict[str, torch.Tensor]:
         """The core forward pass of the model.
@@ -441,7 +437,6 @@ def _forward(  # noqa: PLR0912, C901
             only_return_standard_out: Whether to only return the standard output.
             style: The style vector.
             data_dags: The data DAGs for each example in the batch.
-            categorical_inds: The indices of categorical features.
             half_layers: Whether to use half the layers.
 
         Returns:
@@ -507,24 +502,6 @@ def _forward(  # noqa: PLR0912, C901
                 n=self.features_per_group,
             )  # s b f -> b s #groups #features_per_group
 
-        # We have to re-work categoricals based on the subgroup they fall into.
-        categorical_inds_to_use: list[list[int]] | None = None
-        if categorical_inds is not None:
-            new_categorical_inds = []
-            n_subgroups = x["main"].shape[2]
-
-            for subgroup in range(n_subgroups):
-                subgroup_lower = subgroup * self.features_per_group
-                subgroup_upper = (subgroup + 1) * self.features_per_group
-                subgroup_indices = [
-                    i - subgroup_lower
-                    for i in categorical_inds
-                    if subgroup_lower <= i < subgroup_upper
-                ]
-                new_categorical_inds.append(subgroup_indices)
-
-            categorical_inds_to_use = new_categorical_inds
-
         for k in y:
             if y[k].ndim == 1:
                 y[k] = y[k].unsqueeze(-1)
@@ -576,13 +553,6 @@ def _forward(  # noqa: PLR0912, C901
                 " to the ys that are not fully provided (test set missing)",
             )
 
-        extra_encoders_args = {}
-        if categorical_inds_to_use is not None and isinstance(
-            self.encoder,
-            SequentialEncoder,
-        ):
-            extra_encoders_args["categorical_inds"] = categorical_inds_to_use
-
         for k in x:
             x[k] = einops.rearrange(x[k], "b s f n -> s (b f) n")
 
@@ -591,7 +561,6 @@ def _forward(  # noqa: PLR0912, C901
                 x,
                 single_eval_pos=single_eval_pos_,
                 cache_trainset_representation=self.cache_trainset_representation,
-                **extra_encoders_args,
             ),
             "s (b f) e -> b s f e",
             b=embedded_y.shape[0],

From d2e89e7761fab60dadb4b577ce28cd355bb6a361 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Tue, 11 Feb 2025 14:40:30 +0100
Subject: [PATCH 03/20] allow to use onnx model inside sklearn interface

---
 src/tabpfn/base.py              |  36 ++++
 src/tabpfn/classifier.py        |  22 ++-
 src/tabpfn/inference.py         |   7 +-
 src/tabpfn/misc/__init__.py     |   0
 src/tabpfn/misc/onnx_wrapper.py | 297 ++++++++++++++++++++++++++++++++
 src/tabpfn/model/memory.py      |   5 +
 src/tabpfn/regressor.py         |  33 +++-
 7 files changed, 387 insertions(+), 13 deletions(-)
 create mode 100644 src/tabpfn/misc/__init__.py
 create mode 100644 src/tabpfn/misc/onnx_wrapper.py

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index a8030f752..c57fca5cf 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -33,6 +33,7 @@
 if TYPE_CHECKING:
     import numpy as np
 
+    from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
     from tabpfn.model.bar_distribution import FullSupportBarDistribution
     from tabpfn.model.config import InferenceConfig
     from tabpfn.model.transformer import PerFeatureTransformer
@@ -111,6 +112,36 @@ def initialize_tabpfn_model(
     return model, config_, bar_distribution
 
 
+def load_onnx_model(
+    model_path: str | Path,
+) -> ONNXModelWrapper:
+    """Load a TabPFN model in ONNX format.
+
+    Args:
+        model_path: Path to the ONNX model file.
+
+    Returns:
+        The loaded ONNX model wrapped in a PyTorch-compatible interface.
+
+    Raises:
+        ImportError: If onnxruntime is not installed.
+        FileNotFoundError: If the model file doesn't exist.
+    """
+    try:
+        from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
+    except ImportError as err:
+        raise ImportError(
+            "onnxruntime is required to load ONNX models. "
+            "Install it with: pip install onnxruntime",
+        ) from err
+
+    model_path = Path(model_path)
+    if not model_path.exists():
+        raise FileNotFoundError(f"ONNX model not found at: {model_path}")
+
+    return ONNXModelWrapper(str(model_path))
+
+
 def determine_precision(
     inference_precision: torch.dtype | Literal["autocast", "auto"],
     device_: torch.device,
@@ -168,6 +199,7 @@ def create_inference_engine(  # noqa: PLR0913
     forced_inference_dtype_: torch.dtype | None,
     memory_saving_mode: bool | Literal["auto"] | float | int,
     use_autocast_: bool,
+    use_onnx: bool = False,
 ) -> InferenceEngine:
     """Creates the appropriate TabPFN inference engine based on `fit_mode`.
 
@@ -190,6 +222,7 @@ def create_inference_engine(  # noqa: PLR0913
         forced_inference_dtype_: If not None, the forced dtype for inference.
         memory_saving_mode: GPU/CPU memory saving settings.
         use_autocast_: Whether we use torch.autocast for inference.
+        use_onnx: Whether to use ONNX runtime for model inference.
     """
     engine: (
         InferenceEngineOnDemand
@@ -208,6 +241,7 @@ def create_inference_engine(  # noqa: PLR0913
             dtype_byte_size=byte_size,
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
+            use_onnx=use_onnx,
         )
     elif fit_mode == "fit_preprocessors":
         engine = InferenceEngineCachePreprocessing.prepare(
@@ -221,6 +255,7 @@ def create_inference_engine(  # noqa: PLR0913
             dtype_byte_size=byte_size,
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
+            use_onnx=use_onnx,
         )
     elif fit_mode == "fit_with_cache":
         engine = InferenceEngineCacheKV.prepare(
@@ -236,6 +271,7 @@ def create_inference_engine(  # noqa: PLR0913
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
             autocast=use_autocast_,
+            use_onnx=use_onnx,
         )
     else:
         raise ValueError(f"Invalid fit_mode: {fit_mode}")
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
index c52b805fb..c17d130cd 100644
--- a/src/tabpfn/classifier.py
+++ b/src/tabpfn/classifier.py
@@ -32,6 +32,7 @@
     create_inference_engine,
     determine_precision,
     initialize_tabpfn_model,
+    load_onnx_model,
 )
 from tabpfn.config import ModelInterfaceConfig
 from tabpfn.constants import (
@@ -149,6 +150,7 @@ def __init__(  # noqa: PLR0913
         random_state: int | np.random.RandomState | np.random.Generator | None = 0,
         n_jobs: int = -1,
         inference_config: dict | ModelInterfaceConfig | None = None,
+        use_onnx: bool = False,
     ) -> None:
         """A TabPFN interface for classification.
 
@@ -338,6 +340,9 @@ def __init__(  # noqa: PLR0913
                 - If `dict`, the key-value pairs are used to update the default
                   `ModelInterfaceConfig`. Raises an error if an unknown key is passed.
                 - If `ModelInterfaceConfig`, the object is used as the configuration.
+
+            use_onnx:
+                Whether to use an ONNX compiled model.
         """
         super().__init__()
         self.n_estimators = n_estimators
@@ -360,6 +365,7 @@ def __init__(  # noqa: PLR0913
         self.random_state = random_state
         self.n_jobs = n_jobs
         self.inference_config = inference_config
+        self.use_onnx = use_onnx
 
     # TODO: We can remove this from scikit-learn lower bound of 1.6
     def _more_tags(self) -> dict[str, Any]:
@@ -384,12 +390,15 @@ def fit(self, X: XType, y: YType) -> Self:
         static_seed, rng = infer_random_state(self.random_state)
 
         # Load the model and config
-        self.model_, self.config_, _ = initialize_tabpfn_model(
-            model_path=self.model_path,
-            which="classifier",
-            fit_mode=self.fit_mode,
-            static_seed=static_seed,
-        )
+        if self.use_onnx:
+            self.model_ = load_onnx_model("model_classifier.onnx")
+        else:
+            self.model_, self.config_, _ = initialize_tabpfn_model(
+                model_path=self.model_path,
+                which="classifier",
+                fit_mode=self.fit_mode,
+                static_seed=static_seed,
+            )
 
         # Determine device and precision
         self.device_ = infer_device_and_type(self.device)
@@ -501,6 +510,7 @@ def fit(self, X: XType, y: YType) -> Self:
             forced_inference_dtype_=self.forced_inference_dtype_,
             memory_saving_mode=self.memory_saving_mode,
             use_autocast_=self.use_autocast_,
+            use_onnx=self.use_onnx,
         )
 
         return self
diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py
index 4386fb1c2..f7614a96b 100644
--- a/src/tabpfn/inference.py
+++ b/src/tabpfn/inference.py
@@ -223,9 +223,10 @@ class InferenceEngineCachePreprocessing(InferenceEngine):
     preprocessors: Sequence[SequentialFeatureTransformer]
     model: PerFeatureTransformer
     force_inference_dtype: torch.dtype | None
+    use_onnx: bool = False
 
     @classmethod
-    def prepare(
+    def prepare(  # noqa: PLR0913
         cls,
         X_train: np.ndarray,
         y_train: np.ndarray,
@@ -238,6 +239,7 @@ def prepare(
         dtype_byte_size: int,
         force_inference_dtype: torch.dtype | None,
         save_peak_mem: bool | Literal["auto"] | float | int,
+        use_onnx: bool = False,
     ) -> InferenceEngineCachePreprocessing:
         """Prepare the inference engine.
 
@@ -252,6 +254,7 @@ def prepare(
             dtype_byte_size: The byte size of the dtype.
             force_inference_dtype: The dtype to force inference to.
             save_peak_mem: Whether to save peak memory usage.
+            use_onnx: Whether to use ONNX for inference.
 
         Returns:
             The prepared inference engine.
@@ -276,6 +279,7 @@ def prepare(
             dtype_byte_size=dtype_byte_size,
             force_inference_dtype=force_inference_dtype,
             save_peak_mem=save_peak_mem,
+            use_onnx=use_onnx,
         )
 
     @override
@@ -320,6 +324,7 @@ def iter_outputs(
                 device=device,
                 dtype_byte_size=self.dtype_byte_size,
                 safety_factor=1.2,  # TODO(Arjun): make customizable
+                use_onnx=self.use_onnx,
             )
 
             style = None
diff --git a/src/tabpfn/misc/__init__.py b/src/tabpfn/misc/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
new file mode 100644
index 000000000..addd2d131
--- /dev/null
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -0,0 +1,297 @@
+"""Module providing wrappers to use ONNX models with a PyTorch-like interface.
+
+This module defines wrappers for ONNX models as well as helper functions to export
+and validate ONNX models derived from TabPFN models.
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import numpy as np
+import onnx
+import onnxruntime as ort
+import sklearn.datasets
+import torch
+from torch import nn
+
+from tabpfn import TabPFNClassifier, TabPFNRegressor
+
+
+class ONNXModelWrapper:
+    """Wrap ONNX model to match the PyTorch model interface."""
+
+    def __init__(self, model_path: str):
+        """Initialize the ONNX model wrapper.
+
+        Args:
+            model_path: Path to the ONNX model file.
+        """
+        self.session = ort.InferenceSession(
+            model_path,
+            providers=["CPUExecutionProvider"],  # TODO: Add GPU support
+        )
+
+    def to(
+        self,
+        device: torch.device,  # noqa: ARG002
+    ) -> ONNXModelWrapper:
+        """Moves the model to the specified device.
+
+        This is a no-op for the ONNX model wrapper. GPU support is not implemented.
+
+        Args:
+            device: The target device (unused).
+
+        Returns:
+            self
+        """
+        # TODO: Add GPU support by changing provider
+        return self
+
+    def type(
+        self,
+        dtype: torch.dtype,  # noqa: ARG002
+    ) -> ONNXModelWrapper:
+        """Changes the model data type.
+
+        The ONNX runtime handles dtype conversion internally; this method does nothing.
+
+        Args:
+            dtype: The target data type (unused).
+
+        Returns:
+            self
+        """
+        return self
+
+    def cpu(self) -> ONNXModelWrapper:
+        """Moves the model to CPU.
+
+        This is a no-op for the ONNX model wrapper.
+
+        Returns:
+            self
+        """
+        return self
+
+    def eval(self) -> ONNXModelWrapper:
+        """Sets the model to evaluation mode.
+
+        For the ONNX model wrapper, this does nothing and simply returns self.
+
+        Returns:
+            self
+        """
+        return self
+
+    def __call__(
+        self,
+        style: torch.Tensor | None,  # noqa: ARG002
+        X: torch.Tensor,
+        y: torch.Tensor | None,
+        *,
+        single_eval_pos: int | None = None,
+        only_return_standard_out: bool = False,  # noqa: ARG002
+    ) -> torch.Tensor:
+        """Run inference using the ONNX model.
+
+        Args:
+            style: Unused tensor placeholder.
+            X: Input tensor.
+            y: Target tensor.
+            single_eval_pos: Position to evaluate at. Defaults to -1 if not provided.
+            only_return_standard_out: Flag to return only the standard output.
+
+        Returns:
+            A torch tensor containing the model output.
+
+        Note that only_return_standard_out is not used in the ONNX runtime.
+        """
+        # Convert inputs to numpy
+        X_np = X.cpu().numpy() if isinstance(X, torch.Tensor) else X
+        y_np = y.cpu().numpy() if isinstance(y, torch.Tensor) and y is not None else y
+
+        # Prepare ONNX inputs
+        onnx_inputs = {
+            "X": X_np,
+            "y": y_np if y_np is not None else np.zeros((0,), dtype=np.float32),
+            "single_eval_pos": np.array(
+                single_eval_pos if single_eval_pos is not None else -1,
+                dtype=np.int64,
+            ),
+        }
+
+        # Run inference
+        outputs = self.session.run(None, onnx_inputs)
+
+        # Convert back to a torch tensor
+        return torch.from_numpy(outputs[0])
+
+
+class ModelWrapper(nn.Module):
+    """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface."""
+
+    def __init__(self, original_model):
+        """Initialize the ModelWrapper.
+
+        Args:
+            original_model: The original model object to wrap.
+        """
+        super().__init__()
+        self.model = original_model
+
+    def forward(self, X, y, single_eval_pos, only_return_standard_out):
+        """Perform a forward pass.
+
+        Args:
+            X: Input tensor.
+            y: Target tensor.
+            single_eval_pos: Position for evaluation.
+            only_return_standard_out: Whether to return only standard outputs.
+
+        Returns:
+            The output tensor from the model.
+        """
+        return self.model(
+            None,
+            X,
+            y,
+            single_eval_pos=single_eval_pos,
+            only_return_standard_out=only_return_standard_out,
+        )
+
+
+def export_model(
+    output_path: str,
+    model_type: str = "classifier",
+) -> None:
+    """Export the TabPFN model to the ONNX format.
+
+    This function creates a sample model based on the specified
+    model_type ('classifier' or 'regressor'), trains it on a small dataset,
+    and exports the model to ONNX format with dynamic axes.
+
+    Args:
+        output_path: The file path where the ONNX model should be saved.
+        model_type: The type of model to export ('classifier' or 'regressor').
+    """
+    # Load sample dataset for initialization
+    if model_type == "classifier":
+        X, y = sklearn.datasets.load_iris(return_X_y=True)
+    else:  # regressor
+        X, y = sklearn.datasets.load_diabetes(return_X_y=True)
+
+    with torch.no_grad():
+        # Initialize and fit the model
+        if model_type == "classifier":
+            model = TabPFNClassifier(n_estimators=1, device="cpu", random_state=42)
+        else:
+            model = TabPFNRegressor(n_estimators=1, device="cpu", random_state=42)
+
+        model.fit(X, y)
+        model.predict(X)
+
+        # Create sample input tensors
+        X = torch.randn(
+            (X.shape[0] * 2, 1, X.shape[1] + 1),
+            generator=torch.Generator().manual_seed(42),
+        )
+        # make the first feature categorical
+        X[:, 0, 0] = torch.randint(0, 10, (X.shape[0],))
+
+        if model_type == "classifier":
+            y = (
+                torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
+                .round()
+                .to(torch.float32)
+            )
+        else:
+            y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
+
+        single_eval_pos = torch.tensor(
+            y.shape[0],
+            dtype=torch.int64,
+        )  # Convert to tensor
+
+        only_return_standard_out = torch.tensor(
+            data=True,
+            dtype=torch.bool,
+        )  # Convert to tensor
+
+        # Define dynamic axes for variable input sizes
+        dynamic_axes = {
+            "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"},
+            "y": {0: "num_labels"},
+            "single_eval_pos": {},
+            "only_return_standard_out": {},
+        }
+
+        # Export the model
+        torch.onnx.export(
+            ModelWrapper(model.model_).eval(),
+            (X, y, single_eval_pos, only_return_standard_out),
+            output_path,
+            input_names=[
+                "X",
+                "y",
+                "single_eval_pos",
+                "only_return_standard_out",
+            ],
+            output_names=["output"],
+            opset_version=17,
+            dynamic_axes=dynamic_axes,
+        )
+
+
+def check_onnx_model(model_path: str) -> None:
+    """Validate the ONNX model.
+
+    Loads the ONNX model and runs a checker to ensure that the model is valid.
+
+    Args:
+        model_path: The path to the ONNX model file.
+    """
+    onnx_model = onnx.load(model_path)  # Load the ONNX model
+    onnx.checker.check_model(onnx_model)  # Check if the model is valid
+
+
+def check_input_names(model_path: str) -> None:
+    """Load the ONNX model to check its input names.
+
+    Args:
+        model_path: The path to the ONNX model file.
+    """
+    onnx.load(model_path)
+    # get input names from graph
+    graph = onnx.load(model_path).graph
+    [input_node.name for input_node in graph.input]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Export TabPFN models to ONNX format",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="model",
+        help=(
+            "Base output path for the ONNX models (will append _classifier.onnx and "
+            "_regressor.onnx)"
+        ),
+    )
+
+    args = parser.parse_args()
+
+    # Export both models with appropriate suffixes
+    classifier_path = f"{args.output}_classifier.onnx"
+    regressor_path = f"{args.output}_regressor.onnx"
+
+    export_model(classifier_path, "classifier")
+    check_onnx_model(classifier_path)
+    check_input_names(classifier_path)
+
+    export_model(regressor_path, "regressor")
+    check_onnx_model(regressor_path)
+    check_input_names(regressor_path)
diff --git a/src/tabpfn/model/memory.py b/src/tabpfn/model/memory.py
index 9fd624b69..12215bf79 100644
--- a/src/tabpfn/model/memory.py
+++ b/src/tabpfn/model/memory.py
@@ -362,6 +362,7 @@ def reset_peak_memory_if_required(
         dtype_byte_size: int,
         safety_factor: float = 5.0,
         n_train_samples: int | None = None,
+        use_onnx: bool = False,
     ) -> None:
         """Reset the peak memory if required.
 
@@ -381,7 +382,11 @@ def reset_peak_memory_if_required(
             safety_factor (float): The safety factor to apply.
             n_train_samples (int): The number of training samples (to be used
                 only for cache_kv mode)
+            use_onnx (bool): Whether we're using an ONNX compiled model.
         """
+        if use_onnx:
+            # TODO: Implement memory estimation for ONNX
+            return
         save_peak_mem_is_num = isinstance(
             save_peak_mem,
             (float, int),
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
index b405d8385..a8ba7cd6e 100644
--- a/src/tabpfn/regressor.py
+++ b/src/tabpfn/regressor.py
@@ -36,6 +36,7 @@
     create_inference_engine,
     determine_precision,
     initialize_tabpfn_model,
+    load_onnx_model,
 )
 from tabpfn.config import ModelInterfaceConfig
 from tabpfn.model.bar_distribution import FullSupportBarDistribution
@@ -149,6 +150,9 @@ class TabPFNRegressor(RegressorMixin, BaseEstimator):
     _USABLE_OUTPUT_TYPES = _OUTPUT_TYPES + _OUTPUT_TYPES_COMPOSITE
     """The output types supported by the model."""
 
+    use_onnx: bool
+    """Whether to use ONNX for inference."""
+
     def __init__(  # noqa: PLR0913
         self,
         *,
@@ -169,6 +173,7 @@ def __init__(  # noqa: PLR0913
         random_state: int | np.random.RandomState | np.random.Generator | None = 0,
         n_jobs: int = -1,
         inference_config: dict | ModelInterfaceConfig | None = None,
+        use_onnx: bool = False,
     ) -> None:
         """A TabPFN interface for regression.
 
@@ -346,6 +351,9 @@ def __init__(  # noqa: PLR0913
                 - If `dict`, the key-value pairs are used to update the default
                   `ModelInterfaceConfig`. Raises an error if an unknown key is passed.
                 - If `ModelInterfaceConfig`, the object is used as the configuration.
+
+            use_onnx:
+                Whether to use an ONNX compiled model.
         """
         super().__init__()
         self.n_estimators = n_estimators
@@ -367,6 +375,7 @@ def __init__(  # noqa: PLR0913
         self.random_state = random_state
         self.n_jobs = n_jobs
         self.inference_config = inference_config
+        self.use_onnx = use_onnx
 
     # TODO: We can remove this from scikit-learn lower bound of 1.6
     def _more_tags(self) -> dict[str, Any]:
@@ -393,12 +402,23 @@ def fit(self, X: XType, y: YType) -> Self:
         static_seed, rng = infer_random_state(self.random_state)
 
         # Load the model and config
-        self.model_, self.config_, self.bardist_ = initialize_tabpfn_model(
-            model_path=self.model_path,
-            which="regressor",
-            fit_mode=self.fit_mode,
-            static_seed=static_seed,
-        )
+        if self.use_onnx:
+            self.model_ = load_onnx_model("model_regressor.onnx")
+            # Initialize bardist_ for ONNX mode
+            # TODO: faster way to do this
+            _, self.config_, self.bardist_ = initialize_tabpfn_model(
+                model_path=self.model_path,
+                which="regressor",
+                fit_mode=self.fit_mode,
+                static_seed=static_seed,
+            )
+        else:
+            self.model_, self.config_, self.bardist_ = initialize_tabpfn_model(
+                model_path=self.model_path,
+                which="regressor",
+                fit_mode=self.fit_mode,
+                static_seed=static_seed,
+            )
 
         # Determine device and precision
         self.device_ = infer_device_and_type(self.device)
@@ -515,6 +535,7 @@ def fit(self, X: XType, y: YType) -> Self:
             forced_inference_dtype_=self.forced_inference_dtype_,
             memory_saving_mode=self.memory_saving_mode,
             use_autocast_=self.use_autocast_,
+            use_onnx=self.use_onnx,
         )
 
         return self

From 720dc1650d5f8db94199d973b6183a5ccb35da8a Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Tue, 11 Feb 2025 14:51:01 +0100
Subject: [PATCH 04/20] allow to move to gpu

---
 src/tabpfn/misc/onnx_wrapper.py | 38 ++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
index addd2d131..1e00758b5 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -27,26 +27,43 @@ def __init__(self, model_path: str):
         Args:
             model_path: Path to the ONNX model file.
         """
+        self.model_path = model_path
+        self.providers = ["CPUExecutionProvider"]
         self.session = ort.InferenceSession(
             model_path,
-            providers=["CPUExecutionProvider"],  # TODO: Add GPU support
+            providers=self.providers,
         )
 
     def to(
         self,
-        device: torch.device,  # noqa: ARG002
+        device: torch.device,
     ) -> ONNXModelWrapper:
         """Moves the model to the specified device.
 
-        This is a no-op for the ONNX model wrapper. GPU support is not implemented.
-
         Args:
-            device: The target device (unused).
+            device: The target device (cuda or cpu).
 
         Returns:
             self
         """
-        # TODO: Add GPU support by changing provider
+        if device.type == "cuda":
+            # Check if CUDA is available in ONNX Runtime
+            cuda_provider = "CUDAExecutionProvider"
+            if cuda_provider in ort.get_available_providers():
+                self.providers = [cuda_provider, "CPUExecutionProvider"]
+                # Reinitialize session with CUDA provider
+                self.session = ort.InferenceSession(
+                    self.model_path,
+                    providers=self.providers,
+                )
+            else:
+                pass
+        else:
+            self.providers = ["CPUExecutionProvider"]
+            self.session = ort.InferenceSession(
+                self.model_path,
+                providers=self.providers,
+            )
         return self
 
     def type(
@@ -105,8 +122,6 @@ def __call__(
 
         Returns:
             A torch tensor containing the model output.
-
-        Note that only_return_standard_out is not used in the ONNX runtime.
         """
         # Convert inputs to numpy
         X_np = X.cpu().numpy() if isinstance(X, torch.Tensor) else X
@@ -125,8 +140,11 @@ def __call__(
         # Run inference
         outputs = self.session.run(None, onnx_inputs)
 
-        # Convert back to a torch tensor
-        return torch.from_numpy(outputs[0])
+        # Convert back to torch tensor and move to the appropriate device
+        output_tensor = torch.from_numpy(outputs[0])
+        if "CUDAExecutionProvider" in self.providers:
+            output_tensor = output_tensor.cuda()
+        return output_tensor
 
 
 class ModelWrapper(nn.Module):

From 523c0510586546e8e0223a38b2a5033ef8bada18 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 10 Mar 2025 14:25:44 +0000
Subject: [PATCH 05/20] only init onnx session once

---
 src/tabpfn/base.py              |  4 +++-
 src/tabpfn/classifier.py        | 14 +++++------
 src/tabpfn/misc/onnx_wrapper.py | 42 ++++++++++++++++++++-------------
 src/tabpfn/regressor.py         | 14 +++++------
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index c57fca5cf..cd3d96d0f 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -114,11 +114,13 @@ def initialize_tabpfn_model(
 
 def load_onnx_model(
     model_path: str | Path,
+    device: torch.device,
 ) -> ONNXModelWrapper:
     """Load a TabPFN model in ONNX format.
 
     Args:
         model_path: Path to the ONNX model file.
+        device: The device to run the model on.
 
     Returns:
         The loaded ONNX model wrapped in a PyTorch-compatible interface.
@@ -139,7 +141,7 @@ def load_onnx_model(
     if not model_path.exists():
         raise FileNotFoundError(f"ONNX model not found at: {model_path}")
 
-    return ONNXModelWrapper(str(model_path))
+    return ONNXModelWrapper(str(model_path), device)
 
 
 def determine_precision(
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
index c17d130cd..9682a2379 100644
--- a/src/tabpfn/classifier.py
+++ b/src/tabpfn/classifier.py
@@ -389,9 +389,15 @@ def fit(self, X: XType, y: YType) -> Self:
         """
         static_seed, rng = infer_random_state(self.random_state)
 
+        # Determine device and precision
+        self.device_ = infer_device_and_type(self.device)
+        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
+            determine_precision(self.inference_precision, self.device_)
+        )
+
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_classifier.onnx")
+            self.model_ = load_onnx_model("model_classifier.onnx", self.device_)
         else:
             self.model_, self.config_, _ = initialize_tabpfn_model(
                 model_path=self.model_path,
@@ -400,12 +406,6 @@ def fit(self, X: XType, y: YType) -> Self:
                 static_seed=static_seed,
             )
 
-        # Determine device and precision
-        self.device_ = infer_device_and_type(self.device)
-        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
-            determine_precision(self.inference_precision, self.device_)
-        )
-
         # Build the interface_config
         self.interface_config_ = ModelInterfaceConfig.from_user_input(
             inference_config=self.inference_config,
diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
index 1e00758b5..32cb380e6 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -21,14 +21,21 @@
 class ONNXModelWrapper:
     """Wrap ONNX model to match the PyTorch model interface."""
 
-    def __init__(self, model_path: str):
+    def __init__(self, model_path: str, device: torch.device):
         """Initialize the ONNX model wrapper.
 
         Args:
             model_path: Path to the ONNX model file.
+            device: The device to run the model on.
         """
         self.model_path = model_path
-        self.providers = ["CPUExecutionProvider"]
+        self.device = device
+        if device.type == "cuda":
+            self.providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        elif device.type == "cpu":
+            self.providers = ["CPUExecutionProvider"]
+        else:
+            raise ValueError(f"Invalid device: {device}")
         self.session = ort.InferenceSession(
             model_path,
             providers=self.providers,
@@ -46,24 +53,27 @@ def to(
         Returns:
             self
         """
-        if device.type == "cuda":
-            # Check if CUDA is available in ONNX Runtime
-            cuda_provider = "CUDAExecutionProvider"
-            if cuda_provider in ort.get_available_providers():
-                self.providers = [cuda_provider, "CPUExecutionProvider"]
-                # Reinitialize session with CUDA provider
+        # Only recreate session if device type has changed
+        if device.type != self.device.type:
+            if device.type == "cuda":
+                # Check if CUDA is available in ONNX Runtime
+                cuda_provider = "CUDAExecutionProvider"
+                if cuda_provider in ort.get_available_providers():
+                    self.providers = [cuda_provider, "CPUExecutionProvider"]
+                    # Reinitialize session with CUDA provider
+                    self.session = ort.InferenceSession(
+                        self.model_path,
+                        providers=self.providers,
+                    )
+                # If CUDA is not available, keep current session
+            else:
+                self.providers = ["CPUExecutionProvider"]
                 self.session = ort.InferenceSession(
                     self.model_path,
                     providers=self.providers,
                 )
-            else:
-                pass
-        else:
-            self.providers = ["CPUExecutionProvider"]
-            self.session = ort.InferenceSession(
-                self.model_path,
-                providers=self.providers,
-            )
+            # Update the device
+            self.device = device
         return self
 
     def type(
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
index a8ba7cd6e..7b81cafc4 100644
--- a/src/tabpfn/regressor.py
+++ b/src/tabpfn/regressor.py
@@ -401,9 +401,15 @@ def fit(self, X: XType, y: YType) -> Self:
         """
         static_seed, rng = infer_random_state(self.random_state)
 
+        # Determine device and precision
+        self.device_ = infer_device_and_type(self.device)
+        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
+            determine_precision(self.inference_precision, self.device_)
+        )
+
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_regressor.onnx")
+            self.model_ = load_onnx_model("model_regressor.onnx", self.device_)
             # Initialize bardist_ for ONNX mode
             # TODO: faster way to do this
             _, self.config_, self.bardist_ = initialize_tabpfn_model(
@@ -420,12 +426,6 @@ def fit(self, X: XType, y: YType) -> Self:
                 static_seed=static_seed,
             )
 
-        # Determine device and precision
-        self.device_ = infer_device_and_type(self.device)
-        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
-            determine_precision(self.inference_precision, self.device_)
-        )
-
         # Build the interface_config
         self.interface_config_ = ModelInterfaceConfig.from_user_input(
             inference_config=self.inference_config,

From 82e37c03c74f7b87d746b7fe6285f4fe4102b5f2 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 10 Mar 2025 18:24:53 +0000
Subject: [PATCH 06/20] a few improvements

---
 src/tabpfn/misc/onnx_wrapper.py | 9 +++++----
 src/tabpfn/model/encoders.py    | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
index 32cb380e6..568817a4d 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -249,7 +249,7 @@ def export_model(
 
         # Define dynamic axes for variable input sizes
         dynamic_axes = {
-            "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"},
+            "X": {0: "num_datapoints", 2: "num_features"},
             "y": {0: "num_labels"},
             "single_eval_pos": {},
             "only_return_standard_out": {},
@@ -291,9 +291,10 @@ def check_input_names(model_path: str) -> None:
         model_path: The path to the ONNX model file.
     """
     onnx.load(model_path)
-    # get input names from graph
-    graph = onnx.load(model_path).graph
-    [input_node.name for input_node in graph.input]
+
+    # Print input names
+
+    # Print output names
 
 
 if __name__ == "__main__":
diff --git a/src/tabpfn/model/encoders.py b/src/tabpfn/model/encoders.py
index 6a3de8138..4d3c08ff9 100644
--- a/src/tabpfn/model/encoders.py
+++ b/src/tabpfn/model/encoders.py
@@ -90,7 +90,7 @@ def normalize_data(
             mean = torch_nanmean(data, axis=0)  # type: ignore
             std = torch_nanstd(data, axis=0) + 1e-20
 
-        if len(data) == 1 or normalize_positions == 1:
+        if data.shape[0] == 1 or normalize_positions == 1:
             std[:] = 1.0
 
         if std_only:

From 3c2ecbca7c6a3183998b21210259f1f02cf1885c Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Fri, 21 Mar 2025 13:52:32 +0100
Subject: [PATCH 07/20] improve test when generating new onnx model

---
 src/tabpfn/misc/onnx_wrapper.py | 93 +++++++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 3 deletions(-)

diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
index 568817a4d..e3a29a502 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -250,7 +250,7 @@ def export_model(
         # Define dynamic axes for variable input sizes
         dynamic_axes = {
             "X": {0: "num_datapoints", 2: "num_features"},
-            "y": {0: "num_labels"},
+            "y": {0: "num_datapoints"},
             "single_eval_pos": {},
             "only_return_standard_out": {},
         }
@@ -292,11 +292,92 @@ def check_input_names(model_path: str) -> None:
     """
     onnx.load(model_path)
 
-    # Print input names
-
     # Print output names
 
 
+def test_models(
+    model_path_classifier: str,
+    model_path_regressor: str,
+) -> None:
+    """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX.
+
+    This function validates that both the original PyTorch models and the
+    exported ONNX models work correctly on simple datasets.
+
+    Args:
+        model_path_classifier: Path to the exported ONNX classifier model.
+        model_path_regressor: Path to the exported ONNX regressor model.
+    """
+    from sklearn.datasets import load_diabetes, load_iris
+    from sklearn.metrics import accuracy_score, mean_squared_error
+    from sklearn.model_selection import train_test_split
+
+    from tabpfn import TabPFNClassifier, TabPFNRegressor
+
+    # Test classifier
+    def _test_classifier(use_onnx: bool = False) -> float:
+        # Load dataset
+        X, y = load_iris(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
+
+        # Create and fit model
+        if use_onnx:
+            model = TabPFNClassifier(n_estimators=1, use_onnx=True)
+        else:
+            model = TabPFNClassifier(n_estimators=1, use_onnx=False)
+
+        model.fit(X_train, y_train)
+
+        # Make predictions
+        y_pred = model.predict(X_test)
+        return accuracy_score(y_test, y_pred)
+
+    # Test regressor
+    def _test_regressor(use_onnx: bool = False) -> float:
+        # Load dataset
+        X, y = load_diabetes(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
+
+        # Create and fit model
+        if use_onnx:
+            model = TabPFNRegressor(n_estimators=1, use_onnx=True)
+        else:
+            model = TabPFNRegressor(n_estimators=1, use_onnx=False)
+
+        model.fit(X_train, y_train)
+
+        # Make predictions (mean)
+        y_pred_mean = model.predict(X_test)
+        return mean_squared_error(y_test, y_pred_mean)
+
+    # Test with PyTorch backend
+    clf_acc_torch = _test_classifier(use_onnx=False)
+    reg_mse_torch = _test_regressor(use_onnx=False)
+
+    # Test with ONNX backend
+    try:
+        clf_acc_onnx = _test_classifier(use_onnx=True)
+        reg_mse_onnx = _test_regressor(use_onnx=True)
+
+        # Compare results
+
+        # Check if results are similar
+        accuracy_diff = abs(clf_acc_torch - clf_acc_onnx)
+        mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10)
+
+        if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0:
+            pass
+        else:
+            pass
+
+    except Exception:
+        pass
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Export TabPFN models to ONNX format",
@@ -324,3 +405,9 @@ def check_input_names(model_path: str) -> None:
     export_model(regressor_path, "regressor")
     check_onnx_model(regressor_path)
     check_input_names(regressor_path)
+
+    # Run tests if requested
+    if args.output == "model":
+        test_models(classifier_path, regressor_path)
+    else:
+        pass

From dd542cd24469358f45892eb86cc5954907aa2544 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Fri, 21 Mar 2025 14:11:40 +0100
Subject: [PATCH 08/20] improve test when generating new onnx model fix

---
 src/tabpfn/misc/onnx_wrapper.py | 118 ++++++++++++++++++++------------
 1 file changed, 75 insertions(+), 43 deletions(-)

diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
index e3a29a502..a6d77b62a 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/onnx_wrapper.py
@@ -222,7 +222,7 @@ def export_model(
 
         # Create sample input tensors
         X = torch.randn(
-            (X.shape[0] * 2, 1, X.shape[1] + 1),
+            (X.shape[0] * 4, 1, X.shape[1] + 1),
             generator=torch.Generator().manual_seed(42),
         )
         # make the first feature categorical
@@ -230,12 +230,12 @@ def export_model(
 
         if model_type == "classifier":
             y = (
-                torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
+                torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42))
                 .round()
                 .to(torch.float32)
             )
         else:
-            y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
+            y = torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42))
 
         single_eval_pos = torch.tensor(
             y.shape[0],
@@ -290,8 +290,13 @@ def check_input_names(model_path: str) -> None:
     Args:
         model_path: The path to the ONNX model file.
     """
-    onnx.load(model_path)
-
+    model = onnx.load(model_path)
+    print("--------------------------------")
+    print("----INPUTS----")
+    print(model.graph.input)
+    print("----OUTPUTS----")
+    print(model.graph.output)
+    print("--------------------------------")
     # Print output names
 
 
@@ -300,82 +305,109 @@ def test_models(
     model_path_regressor: str,
 ) -> None:
     """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX.
-
-    This function validates that both the original PyTorch models and the
+    
+    This function validates that both the original PyTorch models and the 
     exported ONNX models work correctly on simple datasets.
-
+    
     Args:
         model_path_classifier: Path to the exported ONNX classifier model.
         model_path_regressor: Path to the exported ONNX regressor model.
     """
-    from sklearn.datasets import load_diabetes, load_iris
-    from sklearn.metrics import accuracy_score, mean_squared_error
+    import numpy as np
+    from sklearn.datasets import load_iris, load_diabetes
     from sklearn.model_selection import train_test_split
-
+    from sklearn.metrics import accuracy_score, mean_squared_error
     from tabpfn import TabPFNClassifier, TabPFNRegressor
-
+    
     # Test classifier
     def _test_classifier(use_onnx: bool = False) -> float:
+        print(f"\n{'='*20} Testing TabPFNClassifier (use_onnx={use_onnx}) {'='*20}")
+        
         # Load dataset
         X, y = load_iris(return_X_y=True)
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
-        )
-
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
         # Create and fit model
         if use_onnx:
-            model = TabPFNClassifier(n_estimators=1, use_onnx=True)
+            model = TabPFNClassifier(n_estimators=2, use_onnx=True)
         else:
-            model = TabPFNClassifier(n_estimators=1, use_onnx=False)
-
+            model = TabPFNClassifier(n_estimators=2, use_onnx=False)
+        
         model.fit(X_train, y_train)
-
+        
         # Make predictions
         y_pred = model.predict(X_test)
-        return accuracy_score(y_test, y_pred)
-
+        accuracy = accuracy_score(y_test, y_pred)
+        
+        print(f"Accuracy: {accuracy:.4f}")
+        
+        # Test predict_proba
+        proba = model.predict_proba(X_test)
+        print(f"Probability shape: {proba.shape}")
+        
+        return accuracy
+    
     # Test regressor
     def _test_regressor(use_onnx: bool = False) -> float:
+        print(f"\n{'='*20} Testing TabPFNRegressor (use_onnx={use_onnx}) {'='*20}")
+        
         # Load dataset
         X, y = load_diabetes(return_X_y=True)
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
-        )
-
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
         # Create and fit model
         if use_onnx:
-            model = TabPFNRegressor(n_estimators=1, use_onnx=True)
+            model = TabPFNRegressor(n_estimators=2, use_onnx=True)
         else:
-            model = TabPFNRegressor(n_estimators=1, use_onnx=False)
-
+            model = TabPFNRegressor(n_estimators=2, use_onnx=False)
+            
         model.fit(X_train, y_train)
-
+        
         # Make predictions (mean)
         y_pred_mean = model.predict(X_test)
-        return mean_squared_error(y_test, y_pred_mean)
-
+        mse_mean = mean_squared_error(y_test, y_pred_mean)
+        print(f"MSE (mean): {mse_mean:.4f}")
+        
+        # Make predictions (median)
+        y_pred_median = model.predict(X_test, output_type="median")
+        mse_median = mean_squared_error(y_test, y_pred_median)
+        print(f"MSE (median): {mse_median:.4f}")
+        
+        # Test quantiles
+        quantiles = model.predict(X_test, output_type="quantiles", quantiles=[0.1, 0.5, 0.9])
+        print(f"Quantile predictions shape (0.1): {quantiles[0].shape}")
+        
+        return mse_mean
+    
+    print("Testing TabPFN models with PyTorch and ONNX backends")
+    
     # Test with PyTorch backend
     clf_acc_torch = _test_classifier(use_onnx=False)
     reg_mse_torch = _test_regressor(use_onnx=False)
-
+    
     # Test with ONNX backend
     try:
         clf_acc_onnx = _test_classifier(use_onnx=True)
         reg_mse_onnx = _test_regressor(use_onnx=True)
-
+        
         # Compare results
-
+        print("\n" + "="*60)
+        print(f"Classifier accuracy - PyTorch: {clf_acc_torch:.4f}, ONNX: {clf_acc_onnx:.4f}")
+        print(f"Regressor MSE - PyTorch: {reg_mse_torch:.4f}, ONNX: {reg_mse_onnx:.4f}")
+        
         # Check if results are similar
         accuracy_diff = abs(clf_acc_torch - clf_acc_onnx)
         mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10)
-
+        
         if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0:
-            pass
+            print("\nWARNING: Large difference between PyTorch and ONNX model results!")
         else:
-            pass
-
-    except Exception:
-        pass
+            print("\nSUCCESS: PyTorch and ONNX models produce similar results.")
+            
+    except Exception as e:
+        print("\n" + "="*60)
+        print(f"Error testing ONNX models: {e}")
+        print("Make sure ONNX models are correctly exported.")
 
 
 if __name__ == "__main__":
@@ -405,9 +437,9 @@ def _test_regressor(use_onnx: bool = False) -> float:
     export_model(regressor_path, "regressor")
     check_onnx_model(regressor_path)
     check_input_names(regressor_path)
-
+    
     # Run tests if requested
     if args.output == "model":
         test_models(classifier_path, regressor_path)
     else:
-        pass
+        print("using custom output path, the model won't be tested for performance as part of sklearn wrappers")

From 55cdbcb58becb5af88e7ab84b7d586666d2459c5 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 12:39:17 +0100
Subject: [PATCH 09/20] make onnx export work by remove predict, and fetch the
 onnx model in the right cache

---
 src/tabpfn/base.py                            |  21 +-
 src/tabpfn/classifier.py                      |   7 +-
 src/tabpfn/inference.py                       |  37 ++-
 .../{onnx_wrapper.py => compile_to_onnx.py}   | 237 +++++++++---------
 src/tabpfn/regressor.py                       |   7 +-
 src/tabpfn/utils.py                           |  66 +++--
 6 files changed, 225 insertions(+), 150 deletions(-)
 rename src/tabpfn/misc/{onnx_wrapper.py => compile_to_onnx.py} (72%)

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index cd3d96d0f..d79d6288b 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -26,6 +26,7 @@
     InferenceEngineOnDemand,
 )
 from tabpfn.utils import (
+    get_model_path,
     infer_fp16_inference_mode,
     load_model_criterion_config,
 )
@@ -33,7 +34,7 @@
 if TYPE_CHECKING:
     import numpy as np
 
-    from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
+    from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     from tabpfn.model.bar_distribution import FullSupportBarDistribution
     from tabpfn.model.config import InferenceConfig
     from tabpfn.model.transformer import PerFeatureTransformer
@@ -79,8 +80,6 @@ def initialize_tabpfn_model(
     """
     # Handle auto model_path
     download = True
-    if isinstance(model_path, str) and model_path == "auto":
-        model_path = None  # type: ignore
 
     # Load model with potential caching
     if which == "classifier":
@@ -114,12 +113,16 @@ def initialize_tabpfn_model(
 
 def load_onnx_model(
     model_path: str | Path,
+    which: Literal["classifier", "regressor"],
+    version: Literal["v2"],
     device: torch.device,
 ) -> ONNXModelWrapper:
     """Load a TabPFN model in ONNX format.
 
     Args:
         model_path: Path to the ONNX model file.
+        which: Which TabPFN model to load.
+        version: The version of the model.
         device: The device to run the model on.
 
     Returns:
@@ -129,8 +132,9 @@ def load_onnx_model(
         ImportError: If onnxruntime is not installed.
         FileNotFoundError: If the model file doesn't exist.
     """
+    model_path = get_model_path(model_path, which, version, use_onnx=True)
     try:
-        from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
+        from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     except ImportError as err:
         raise ImportError(
             "onnxruntime is required to load ONNX models. "
@@ -139,7 +143,12 @@ def load_onnx_model(
 
     model_path = Path(model_path)
     if not model_path.exists():
-        raise FileNotFoundError(f"ONNX model not found at: {model_path}")
+        raise FileNotFoundError(
+            f"ONNX model not found at: {model_path}, "
+            "please compile the model by running "
+            "`from tabpfn.misc.compile_to_onnx import compile_onnx_models; "
+            "compile_onnx_models()`",
+        )
 
     return ONNXModelWrapper(str(model_path), device)
 
@@ -190,7 +199,7 @@ def create_inference_engine(  # noqa: PLR0913
     *,
     X_train: np.ndarray,
     y_train: np.ndarray,
-    model: PerFeatureTransformer,
+    model: PerFeatureTransformer | ONNXModelWrapper,
     ensemble_configs: Any,
     cat_ix: list[int],
     fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"],
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
index 9682a2379..02b6d4495 100644
--- a/src/tabpfn/classifier.py
+++ b/src/tabpfn/classifier.py
@@ -397,7 +397,12 @@ def fit(self, X: XType, y: YType) -> Self:
 
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_classifier.onnx", self.device_)
+            self.model_ = load_onnx_model(
+                self.model_path,
+                which="classifier",
+                version="v2",
+                device=self.device_,
+            )
         else:
             self.model_, self.config_, _ = initialize_tabpfn_model(
                 model_path=self.model_path,
diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py
index f7614a96b..f94819e61 100644
--- a/src/tabpfn/inference.py
+++ b/src/tabpfn/inference.py
@@ -19,6 +19,7 @@
 from tabpfn.preprocessing import fit_preprocessing
 
 if TYPE_CHECKING:
+    from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     from tabpfn.model.preprocessing import SequentialFeatureTransformer
     from tabpfn.model.transformer import PerFeatureTransformer
     from tabpfn.preprocessing import EnsembleConfig
@@ -62,6 +63,7 @@ def iter_outputs(
         *,
         device: torch.device,
         autocast: bool,
+        only_return_standard_out: bool = True,
     ) -> Iterator[tuple[torch.Tensor, EnsembleConfig]]:
         """Iterate over the outputs of the model.
 
@@ -71,6 +73,7 @@ def iter_outputs(
             X: The input data to make predictions on.
             device: The device to run the model on.
             autocast: Whether to use torch.autocast during inference.
+            only_return_standard_out: Whether to only return the standard output.
         """
         ...
 
@@ -90,9 +93,11 @@ class InferenceEngineOnDemand(InferenceEngine):
     cat_ix: list[int]
     static_seed: int
     n_workers: int
-    model: PerFeatureTransformer
+    model: PerFeatureTransformer | ONNXModelWrapper
     force_inference_dtype: torch.dtype | None
+    use_onnx: bool = False
 
+    # ruff: noqa: PLR0913
     @classmethod
     def prepare(
         cls,
@@ -100,13 +105,14 @@ def prepare(
         y_train: np.ndarray,
         *,
         cat_ix: list[int],
-        model: PerFeatureTransformer,
+        model: PerFeatureTransformer | ONNXModelWrapper,
         ensemble_configs: Sequence[EnsembleConfig],
         rng: np.random.Generator,
         n_workers: int,
         dtype_byte_size: int,
         force_inference_dtype: torch.dtype | None,
         save_peak_mem: bool | Literal["auto"] | float | int,
+        use_onnx: bool = False,
     ) -> InferenceEngineOnDemand:
         """Prepare the inference engine.
 
@@ -121,6 +127,7 @@ def prepare(
             dtype_byte_size: The byte size of the dtype.
             force_inference_dtype: The dtype to force inference to.
             save_peak_mem: Whether to save peak memory usage.
+            use_onnx: Whether to use ONNX models instead of PyTorch models.
         """
         # We save it as a static seed to be reproducible across predicts
         static_seed = rng.integers(0, int(np.iinfo(np.int32).max))
@@ -135,6 +142,7 @@ def prepare(
             dtype_byte_size=dtype_byte_size,
             force_inference_dtype=force_inference_dtype,
             save_peak_mem=save_peak_mem,
+            use_onnx=use_onnx,
         )
 
     @override
@@ -178,6 +186,7 @@ def iter_outputs(
                 dtype_byte_size=self.dtype_byte_size,
                 device=device,
                 safety_factor=1.2,  # TODO(Arjun): make customizable
+                use_onnx=self.use_onnx,
             )
 
             if self.force_inference_dtype is not None:
@@ -221,18 +230,18 @@ class InferenceEngineCachePreprocessing(InferenceEngine):
     cat_ixs: Sequence[list[int]]
     ensemble_configs: Sequence[EnsembleConfig]
     preprocessors: Sequence[SequentialFeatureTransformer]
-    model: PerFeatureTransformer
+    model: PerFeatureTransformer | ONNXModelWrapper
     force_inference_dtype: torch.dtype | None
     use_onnx: bool = False
 
     @classmethod
-    def prepare(  # noqa: PLR0913
+    def prepare(
         cls,
         X_train: np.ndarray,
         y_train: np.ndarray,
         *,
         cat_ix: list[int],
-        model: PerFeatureTransformer,
+        model: PerFeatureTransformer | ONNXModelWrapper,
         ensemble_configs: Sequence[EnsembleConfig],
         n_workers: int,
         rng: np.random.Generator,
@@ -254,7 +263,7 @@ def prepare(  # noqa: PLR0913
             dtype_byte_size: The byte size of the dtype.
             force_inference_dtype: The dtype to force inference to.
             save_peak_mem: Whether to save peak memory usage.
-            use_onnx: Whether to use ONNX for inference.
+            use_onnx: Whether to use ONNX models instead of PyTorch models.
 
         Returns:
             The prepared inference engine.
@@ -359,12 +368,13 @@ class InferenceEngineCacheKV(InferenceEngine):
     preprocessors: list[SequentialFeatureTransformer]
     configs: list[EnsembleConfig]
     cat_ixs: list[list[int]]
-    models: list[PerFeatureTransformer]
+    models: list[PerFeatureTransformer | ONNXModelWrapper]
     n_train_samples: list[int]
     force_inference_dtype: torch.dtype | None
+    use_onnx: bool = False
 
     @classmethod
-    def prepare(  # noqa: PLR0913
+    def prepare(
         cls,
         X_train: np.ndarray,
         y_train: np.ndarray,
@@ -372,7 +382,7 @@ def prepare(  # noqa: PLR0913
         cat_ix: list[int],
         ensemble_configs: Sequence[EnsembleConfig],
         n_workers: int,
-        model: PerFeatureTransformer,
+        model: PerFeatureTransformer | ONNXModelWrapper,
         device: torch.device,
         rng: np.random.Generator,
         dtype_byte_size: int,
@@ -380,6 +390,7 @@ def prepare(  # noqa: PLR0913
         save_peak_mem: bool | Literal["auto"] | float | int,
         autocast: bool,
         only_return_standard_out: bool = True,
+        use_onnx: bool = False,
     ) -> InferenceEngineCacheKV:
         """Prepare the inference engine.
 
@@ -397,6 +408,7 @@ def prepare(  # noqa: PLR0913
             save_peak_mem: Whether to save peak memory usage.
             autocast: Whether to use torch.autocast during inference.
             only_return_standard_out: Whether to only return the standard output
+            use_onnx: Whether to use ONNX models instead of PyTorch models.
         """
         itr = fit_preprocessing(
             configs=ensemble_configs,
@@ -407,7 +419,7 @@ def prepare(  # noqa: PLR0913
             n_workers=n_workers,
             parallel_mode="as-ready",
         )
-        models: list[PerFeatureTransformer] = []
+        models: list[PerFeatureTransformer | ONNXModelWrapper] = []
         preprocessors: list[SequentialFeatureTransformer] = []
         correct_order_configs: list[EnsembleConfig] = []
         cat_ixs: list[list[int]] = []
@@ -435,7 +447,6 @@ def prepare(  # noqa: PLR0913
                 ens_model.forward(
                     *(None, X, y),
                     only_return_standard_out=only_return_standard_out,
-                    categorical_inds=preprocessor_cat_ix,
                     single_eval_pos=len(X),
                 )
 
@@ -453,6 +464,7 @@ def prepare(  # noqa: PLR0913
             dtype_byte_size=dtype_byte_size,
             force_inference_dtype=force_inference_dtype,
             save_peak_mem=save_peak_mem,
+            use_onnx=use_onnx,
         )
 
     @override
@@ -464,7 +476,7 @@ def iter_outputs(
         autocast: bool,
         only_return_standard_out: bool = True,
     ) -> Iterator[tuple[torch.Tensor | dict, EnsembleConfig]]:
-        for preprocessor, model, config, cat_ix, X_train_len in zip(
+        for preprocessor, model, config, _cat_ix, X_train_len in zip(
             self.preprocessors,
             self.models,
             self.configs,
@@ -484,6 +496,7 @@ def iter_outputs(
                 dtype_byte_size=self.dtype_byte_size,
                 safety_factor=1.2,  # TODO(Arjun): make customizable
                 n_train_samples=X_train_len,
+                use_onnx=self.use_onnx,
             )
 
             model = model.to(device)  # noqa: PLW2901
diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/compile_to_onnx.py
similarity index 72%
rename from src/tabpfn/misc/onnx_wrapper.py
rename to src/tabpfn/misc/compile_to_onnx.py
index a6d77b62a..ef3e16b8a 100644
--- a/src/tabpfn/misc/onnx_wrapper.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -6,7 +6,8 @@
 
 from __future__ import annotations
 
-import argparse
+import os
+import sys
 
 import numpy as np
 import onnx
@@ -16,6 +17,7 @@
 from torch import nn
 
 from tabpfn import TabPFNClassifier, TabPFNRegressor
+from tabpfn.utils import _user_cache_dir
 
 
 class ONNXModelWrapper:
@@ -120,7 +122,7 @@ def __call__(
         *,
         single_eval_pos: int | None = None,
         only_return_standard_out: bool = False,  # noqa: ARG002
-    ) -> torch.Tensor:
+    ) -> dict[str, torch.Tensor]:
         """Run inference using the ONNX model.
 
         Args:
@@ -155,12 +157,43 @@ def __call__(
         if "CUDAExecutionProvider" in self.providers:
             output_tensor = output_tensor.cuda()
         return output_tensor
+    
+    def forward(
+        self,
+        style: torch.Tensor | None,
+        X: torch.Tensor,
+        y: torch.Tensor | None,
+        *,
+        single_eval_pos: int | None = None,
+        only_return_standard_out: bool = False,
+    ) -> dict[str, torch.Tensor]:
+        """Forward pass that delegates to __call__.
+
+        Args:
+            style: Unused tensor placeholder.
+            X: Input tensor.
+            y: Target tensor.
+            single_eval_pos: Position to evaluate at. Defaults to -1 if not provided.
+            only_return_standard_out: Flag to return only the standard output.
+
+        Returns:
+            A torch tensor containing the model output.
+        """
+        return self.__call__(
+            style,
+            X,
+            y,
+            single_eval_pos=single_eval_pos,
+            only_return_standard_out=only_return_standard_out,
+        )
 
 
 class ModelWrapper(nn.Module):
-    """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface."""
+    """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface.
+    Only used for exporting the model to ONNX format.
+    """
 
-    def __init__(self, original_model):
+    def __init__(self, original_model: ONNXModelWrapper):
         """Initialize the ModelWrapper.
 
         Args:
@@ -169,7 +202,11 @@ def __init__(self, original_model):
         super().__init__()
         self.model = original_model
 
-    def forward(self, X, y, single_eval_pos, only_return_standard_out):
+    def forward(self, X: torch.Tensor,
+                y: torch.Tensor,
+                single_eval_pos: torch.Tensor,
+                only_return_standard_out: torch.Tensor,
+    ) -> dict[str, torch.Tensor]:
         """Perform a forward pass.
 
         Args:
@@ -218,11 +255,11 @@ def export_model(
             model = TabPFNRegressor(n_estimators=1, device="cpu", random_state=42)
 
         model.fit(X, y)
-        model.predict(X)
+        # NOTE: Calling model.predict(X) at this point would break the export process.
 
         # Create sample input tensors
         X = torch.randn(
-            (X.shape[0] * 4, 1, X.shape[1] + 1),
+            (X.shape[0] * 2, 1, X.shape[1] + 1),
             generator=torch.Generator().manual_seed(42),
         )
         # make the first feature categorical
@@ -230,12 +267,12 @@ def export_model(
 
         if model_type == "classifier":
             y = (
-                torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42))
+                torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
                 .round()
                 .to(torch.float32)
             )
         else:
-            y = torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42))
+            y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
 
         single_eval_pos = torch.tensor(
             y.shape[0],
@@ -290,145 +327,118 @@ def check_input_names(model_path: str) -> None:
     Args:
         model_path: The path to the ONNX model file.
     """
-    model = onnx.load(model_path)
-    print("--------------------------------")
-    print("----INPUTS----")
-    print(model.graph.input)
-    print("----OUTPUTS----")
-    print(model.graph.output)
-    print("--------------------------------")
+    onnx.load(model_path)
     # Print output names
 
 
-def test_models(
-    model_path_classifier: str,
-    model_path_regressor: str,
-) -> None:
+def test_models() -> None:
     """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX.
-    
-    This function validates that both the original PyTorch models and the 
+
+    This function validates that both the original PyTorch models and the
     exported ONNX models work correctly on simple datasets.
-    
+
     Args:
         model_path_classifier: Path to the exported ONNX classifier model.
         model_path_regressor: Path to the exported ONNX regressor model.
     """
-    import numpy as np
-    from sklearn.datasets import load_iris, load_diabetes
-    from sklearn.model_selection import train_test_split
+    from sklearn.datasets import load_diabetes, load_iris
     from sklearn.metrics import accuracy_score, mean_squared_error
+    from sklearn.model_selection import train_test_split
+
     from tabpfn import TabPFNClassifier, TabPFNRegressor
-    
+
     # Test classifier
-    def _test_classifier(use_onnx: bool = False) -> float:
-        print(f"\n{'='*20} Testing TabPFNClassifier (use_onnx={use_onnx}) {'='*20}")
-        
+    def _test_classifier(*, use_onnx: bool = False) -> float:
         # Load dataset
         X, y = load_iris(return_X_y=True)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+        X_train, X_test, y_train, y_test = train_test_split(
+            X,
+            y,
+            test_size=0.2,
+            random_state=42,
+        )
+
         # Create and fit model
         if use_onnx:
             model = TabPFNClassifier(n_estimators=2, use_onnx=True)
         else:
             model = TabPFNClassifier(n_estimators=2, use_onnx=False)
-        
+
         model.fit(X_train, y_train)
-        
+
         # Make predictions
         y_pred = model.predict(X_test)
-        accuracy = accuracy_score(y_test, y_pred)
-        
-        print(f"Accuracy: {accuracy:.4f}")
-        
-        # Test predict_proba
-        proba = model.predict_proba(X_test)
-        print(f"Probability shape: {proba.shape}")
-        
-        return accuracy
-    
+        return accuracy_score(y_test, y_pred)
+
     # Test regressor
-    def _test_regressor(use_onnx: bool = False) -> float:
-        print(f"\n{'='*20} Testing TabPFNRegressor (use_onnx={use_onnx}) {'='*20}")
-        
+    def _test_regressor(*, use_onnx: bool = False) -> float:
         # Load dataset
         X, y = load_diabetes(return_X_y=True)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+        X_train, X_test, y_train, y_test = train_test_split(
+            X,
+            y,
+            test_size=0.2,
+            random_state=42,
+        )
+
         # Create and fit model
         if use_onnx:
             model = TabPFNRegressor(n_estimators=2, use_onnx=True)
         else:
             model = TabPFNRegressor(n_estimators=2, use_onnx=False)
-            
+
         model.fit(X_train, y_train)
-        
+
         # Make predictions (mean)
         y_pred_mean = model.predict(X_test)
-        mse_mean = mean_squared_error(y_test, y_pred_mean)
-        print(f"MSE (mean): {mse_mean:.4f}")
-        
-        # Make predictions (median)
-        y_pred_median = model.predict(X_test, output_type="median")
-        mse_median = mean_squared_error(y_test, y_pred_median)
-        print(f"MSE (median): {mse_median:.4f}")
-        
-        # Test quantiles
-        quantiles = model.predict(X_test, output_type="quantiles", quantiles=[0.1, 0.5, 0.9])
-        print(f"Quantile predictions shape (0.1): {quantiles[0].shape}")
-        
-        return mse_mean
-    
-    print("Testing TabPFN models with PyTorch and ONNX backends")
-    
+        y_pred_full = model.predict(X_test, output_type="full")
+        assert len(y_pred_full.keys()) > 2
+        return mean_squared_error(y_test, y_pred_mean)
+
     # Test with PyTorch backend
     clf_acc_torch = _test_classifier(use_onnx=False)
     reg_mse_torch = _test_regressor(use_onnx=False)
-    
+
     # Test with ONNX backend
-    try:
-        clf_acc_onnx = _test_classifier(use_onnx=True)
-        reg_mse_onnx = _test_regressor(use_onnx=True)
-        
-        # Compare results
-        print("\n" + "="*60)
-        print(f"Classifier accuracy - PyTorch: {clf_acc_torch:.4f}, ONNX: {clf_acc_onnx:.4f}")
-        print(f"Regressor MSE - PyTorch: {reg_mse_torch:.4f}, ONNX: {reg_mse_onnx:.4f}")
-        
-        # Check if results are similar
-        accuracy_diff = abs(clf_acc_torch - clf_acc_onnx)
-        mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10)
-        
-        if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0:
-            print("\nWARNING: Large difference between PyTorch and ONNX model results!")
-        else:
-            print("\nSUCCESS: PyTorch and ONNX models produce similar results.")
-            
-    except Exception as e:
-        print("\n" + "="*60)
-        print(f"Error testing ONNX models: {e}")
-        print("Make sure ONNX models are correctly exported.")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Export TabPFN models to ONNX format",
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        default="model",
-        help=(
-            "Base output path for the ONNX models (will append _classifier.onnx and "
-            "_regressor.onnx)"
-        ),
-    )
-
-    args = parser.parse_args()
+    clf_acc_onnx = _test_classifier(use_onnx=True)
+    reg_mse_onnx = _test_regressor(use_onnx=True)
+
+    # Compare results
+
+    # Check if results are similar
+    accuracy_diff = abs(clf_acc_torch - clf_acc_onnx)
+    mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10)
+
+    if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0:
+        raise ValueError(
+            "FAILED: the performance of the ONNX model is not "
+            "similar to the PyTorch model. \n"
+            f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n"
+            f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}"
+        )
+    else:
+        print("SUCCESS: the performance of the ONNX model is "
+              "similar to the PyTorch model. \n"
+              f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n"
+              f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}")
+
+
+
+def compile_onnx_models(suffix: str = ""):
+    """Compile the ONNX models.
+
+    Args:
+        suffix: The suffix to append to the file names of the ONNX models.
+    """
+    USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
+    if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
+        cache_dir = USER_TABPFN_CACHE_DIR_LOCATION
+    else:
+        cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
 
     # Export both models with appropriate suffixes
-    classifier_path = f"{args.output}_classifier.onnx"
-    regressor_path = f"{args.output}_regressor.onnx"
+    classifier_path = f"{cache_dir}/tabpfn-v2-classifier{suffix}.onnx"
+    regressor_path = f"{cache_dir}/tabpfn-v2-regressor{suffix}.onnx"
 
     export_model(classifier_path, "classifier")
     check_onnx_model(classifier_path)
@@ -437,9 +447,8 @@ def _test_regressor(use_onnx: bool = False) -> float:
     export_model(regressor_path, "regressor")
     check_onnx_model(regressor_path)
     check_input_names(regressor_path)
-    
-    # Run tests if requested
-    if args.output == "model":
-        test_models(classifier_path, regressor_path)
+
+    if not len(suffix):
+        test_models()
     else:
-        print("using custom output path, the model won't be tested for performance as part of sklearn wrappers")
+        print("model name suffix is not empty, skipping test")
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
index 7b81cafc4..51cd715b9 100644
--- a/src/tabpfn/regressor.py
+++ b/src/tabpfn/regressor.py
@@ -409,7 +409,12 @@ def fit(self, X: XType, y: YType) -> Self:
 
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_regressor.onnx", self.device_)
+            self.model_ = load_onnx_model(
+                self.model_path,
+                which="regressor",
+                version="v2",
+                device=self.device_,
+            )
             # Initialize bardist_ for ONNX mode
             # TODO: faster way to do this
             _, self.config_, self.bardist_ = initialize_tabpfn_model(
diff --git a/src/tabpfn/utils.py b/src/tabpfn/utils.py
index 298507a5b..0c18cf244 100644
--- a/src/tabpfn/utils.py
+++ b/src/tabpfn/utils.py
@@ -286,6 +286,53 @@ def _user_cache_dir(platform: str, appname: str = "tabpfn") -> Path:
     return use_instead_path
 
 
+def get_cache_dir() -> Path:
+    """Get the cache directory for the TabPFN model.
+
+    Returns:
+        The cache directory for the TabPFN model.
+    """
+    USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
+    if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
+        cache_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION)
+    else:
+        cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
+    return cache_dir
+
+
+def get_model_path(
+    model_path: str | Path | None,
+    which: Literal["classifier", "regressor"],
+    version: Literal["v2"],
+    *,
+    use_onnx: bool = False,
+) -> Path:
+    """Get the model path for the given task.
+
+    Args:
+        model_path: The path to the model.
+        which: The task to get the model path for.
+        version: The version of the model.
+        use_onnx: Whether to use ONNX models instead of PyTorch models.
+
+    Returns:
+        The model path.
+    """
+    if isinstance(model_path, str) and model_path == "auto":
+        model_path = None  # type: ignore
+    if model_path is None:
+        USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
+        if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
+            model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION)
+        else:
+            model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
+    if use_onnx:
+        model_name = f"tabpfn-{version}-{which}.onnx"
+    else:
+        model_name = f"tabpfn-{version}-{which}.ckpt"
+    return model_dir / model_name
+
+
 @overload
 def load_model_criterion_config(
     model_path: str | Path | None,
@@ -348,22 +395,9 @@ def load_model_criterion_config(
     Returns:
         The model, criterion, and config.
     """
-    if model_path is None:
-        USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
-        if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
-            model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION)
-        else:
-            model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
-
-        model_name = f"tabpfn-{version}-{which}.ckpt"
-        model_path = model_dir / model_name
-    else:
-        if not isinstance(model_path, (str, Path)):
-            raise ValueError(f"Invalid model_path: {model_path}")
-
-        model_path = Path(model_path)
-        model_dir = model_path.parent
-        model_name = model_path.name
+    model_path = get_model_path(model_path, which, version)
+    model_dir = model_path.parent
+    model_name = model_path.name
 
     model_dir.mkdir(parents=True, exist_ok=True)
     if not model_path.exists():

From 71f65579c23b9034a0dd9b361e84730d0981d40d Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 15:10:54 +0100
Subject: [PATCH 10/20] fix tests

---
 tests/test_classifier_interface.py | 3 ---
 tests/test_regressor_interface.py  | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py
index 90969d306..05c711065 100644
--- a/tests/test_classifier_interface.py
+++ b/tests/test_classifier_interface.py
@@ -235,7 +235,6 @@ def forward(
         y,
         single_eval_pos,
         only_return_standard_out,
-        categorical_inds,
     ):
         return self.model(
             None,
@@ -243,7 +242,6 @@ def forward(
             y,
             single_eval_pos=single_eval_pos,
             only_return_standard_out=only_return_standard_out,
-            categorical_inds=categorical_inds,
         )
 
 
@@ -281,7 +279,6 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
                 "y",
                 "single_eval_pos",
                 "only_return_standard_out",
-                "categorical_inds",
             ],
             output_names=["output"],
             opset_version=17,  # using 17 since we use torch>=2.1
diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py
index 20e285376..5a1a7ee13 100644
--- a/tests/test_regressor_interface.py
+++ b/tests/test_regressor_interface.py
@@ -232,7 +232,6 @@ def forward(
         y,
         single_eval_pos,
         only_return_standard_out,
-        categorical_inds,
     ):
         return self.model(
             None,
@@ -240,7 +239,6 @@ def forward(
             y,
             single_eval_pos=single_eval_pos,
             only_return_standard_out=only_return_standard_out,
-            categorical_inds=categorical_inds,
         )
 
 
@@ -277,7 +275,6 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
                 "y",
                 "single_eval_pos",
                 "only_return_standard_out",
-                "categorical_inds",
             ],
             output_names=["output"],
             opset_version=17,  # using 17 since we use torch>=2.1

From a47b254ca82f17670aa9dbbf7534813b11a0d549 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 16:00:22 +0100
Subject: [PATCH 11/20] finish merge + mypi

---
 src/tabpfn/base.py                 |  9 ++--
 src/tabpfn/misc/compile_to_onnx.py | 46 +++++++++----------
 src/tabpfn/model/loading.py        | 73 +++++++++++++++++-------------
 tests/test_classifier_interface.py |  2 +-
 tests/test_regressor_interface.py  |  2 +-
 5 files changed, 69 insertions(+), 63 deletions(-)

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index c491c3aa1..8ca7b52e4 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -27,9 +27,11 @@
     InferenceEngineCachePreprocessing,
     InferenceEngineOnDemand,
 )
-from tabpfn.model.loading import load_model_criterion_config
+from tabpfn.model.loading import (
+    load_model_criterion_config,
+    resolve_model_path,
+)
 from tabpfn.utils import (
-    get_model_path,
     infer_fp16_inference_mode,
 )
 
@@ -135,7 +137,6 @@ def load_onnx_model(
         ImportError: If onnxruntime is not installed.
         FileNotFoundError: If the model file doesn't exist.
     """
-    model_path = get_model_path(model_path, which, version, use_onnx=True)
     try:
         from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     except ImportError as err:
@@ -144,7 +145,7 @@ def load_onnx_model(
             "Install it with: pip install onnxruntime",
         ) from err
 
-    model_path = Path(model_path)
+    model_path, _, _ = resolve_model_path(model_path, which, version, use_onnx=True)
     if not model_path.exists():
         raise FileNotFoundError(
             f"ONNX model not found at: {model_path}, "
diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index ef3e16b8a..a1225de16 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -1,3 +1,4 @@
+# ruff: noqa: T201
 """Module providing wrappers to use ONNX models with a PyTorch-like interface.
 
 This module defines wrappers for ONNX models as well as helper functions to export
@@ -6,9 +7,6 @@
 
 from __future__ import annotations
 
-import os
-import sys
-
 import numpy as np
 import onnx
 import onnxruntime as ort
@@ -17,7 +15,7 @@
 from torch import nn
 
 from tabpfn import TabPFNClassifier, TabPFNRegressor
-from tabpfn.utils import _user_cache_dir
+from tabpfn.model.loading import resolve_model_path
 
 
 class ONNXModelWrapper:
@@ -157,7 +155,7 @@ def __call__(
         if "CUDAExecutionProvider" in self.providers:
             output_tensor = output_tensor.cuda()
         return output_tensor
-    
+
     def forward(
         self,
         style: torch.Tensor | None,
@@ -202,10 +200,12 @@ def __init__(self, original_model: ONNXModelWrapper):
         super().__init__()
         self.model = original_model
 
-    def forward(self, X: torch.Tensor,
-                y: torch.Tensor,
-                single_eval_pos: torch.Tensor,
-                only_return_standard_out: torch.Tensor,
+    def forward(
+        self,
+        X: torch.Tensor,
+        y: torch.Tensor,
+        single_eval_pos: torch.Tensor,
+        only_return_standard_out: torch.Tensor,
     ) -> dict[str, torch.Tensor]:
         """Perform a forward pass.
 
@@ -416,29 +416,25 @@ def _test_regressor(*, use_onnx: bool = False) -> float:
             f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n"
             f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}"
         )
-    else:
-        print("SUCCESS: the performance of the ONNX model is "
-              "similar to the PyTorch model. \n"
-              f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n"
-              f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}")
-
+    print(
+        "SUCCESS: the performance of the ONNX model is "
+        "similar to the PyTorch model. \n"
+        f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n"
+        f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}"
+    )
 
 
-def compile_onnx_models(suffix: str = ""):
+def compile_onnx_models(suffix: str = "") -> None:
     """Compile the ONNX models.
 
     Args:
         suffix: The suffix to append to the file names of the ONNX models.
     """
-    USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
-    if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
-        cache_dir = USER_TABPFN_CACHE_DIR_LOCATION
-    else:
-        cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
-
-    # Export both models with appropriate suffixes
-    classifier_path = f"{cache_dir}/tabpfn-v2-classifier{suffix}.onnx"
-    regressor_path = f"{cache_dir}/tabpfn-v2-regressor{suffix}.onnx"
+    classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True)
+    regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)
+    # add suffix to the file names
+    classifier_path = str(classifier_path) + suffix
+    regressor_path = str(regressor_path) + suffix
 
     export_model(classifier_path, "classifier")
     check_onnx_model(classifier_path)
diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py
index 7253e8c25..85fb9aefe 100644
--- a/src/tabpfn/model/loading.py
+++ b/src/tabpfn/model/loading.py
@@ -101,7 +101,7 @@ def _get_model_source(version: ModelVersion, model_type: ModelType) -> ModelSour
     )
 
 
-def _suppress_hf_token_warning():
+def _suppress_hf_token_warning() -> None:
     """Suppress warning about missing HuggingFace token."""
     import warnings
 
@@ -279,15 +279,16 @@ def download_model(
 def download_all_models(to: Path) -> None:
     """Download all v2 classifier and regressor models into a local directory."""
     to.mkdir(parents=True, exist_ok=True)
+
     for model_source, model_type in [
-        (ModelSource.get_classifier_v2(), "classifier"),
-        (ModelSource.get_regressor_v2(), "regressor"),
+        (ModelSource.get_classifier_v2(), ModelType.CLASSIFIER),
+        (ModelSource.get_regressor_v2(), ModelType.REGRESSOR),
     ]:
         for ckpt_name in model_source.filenames:
             download_model(
                 to=to / ckpt_name,
                 version="v2",
-                which=model_type,
+                which=model_type.value,
                 model_name=ckpt_name,
             )
 
@@ -340,6 +341,41 @@ def _user_cache_dir(platform: str, appname: str = "tabpfn") -> Path:
     return use_instead_path
 
 
+def resolve_model_path(
+    model_path: None | str | Path,
+    which: Literal["regressor", "classifier"],
+    version: Literal["v2"] = "v2",
+    *,
+    use_onnx: bool = False,
+) -> tuple[Path, Path, str]:
+    if isinstance(model_path, str) and model_path == "auto":
+        model_path = None
+
+    if model_path is None:
+        USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
+        if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
+            model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION)
+        else:
+            model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
+        if use_onnx:
+            model_name = f"tabpfn-{version}-{which}.onnx"
+        else:
+            model_name = f"tabpfn-{version}-{which}.ckpt"
+        model_path = model_dir / model_name
+    else:
+        if not isinstance(model_path, (str, Path)):
+            raise ValueError(f"Invalid model_path: {model_path}")
+
+        model_path = Path(model_path)
+        model_dir = model_path.parent
+        if use_onnx and not model_path.name.endswith(".onnx"):
+            model_name = model_path.name.replace(".ckpt", ".onnx")
+        else:
+            model_name = model_path.name
+
+    return model_path, model_dir, model_name
+
+
 @overload
 def load_model_criterion_config(
     model_path: str | Path | None,
@@ -370,31 +406,6 @@ def load_model_criterion_config(
 ) -> tuple[PerFeatureTransformer, FullSupportBarDistribution, InferenceConfig]: ...
 
 
-def resolve_model_path(
-    model_path: None | str | Path,
-    which: Literal["regressor", "classifier"],
-    version: Literal["v2"] = "v2",
-) -> tuple[Path, Path, str, str]:
-    if model_path is None:
-        USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "")
-        if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "":
-            model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION)
-        else:
-            model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn")
-
-        model_name = f"tabpfn-{version}-{which}.ckpt"
-        model_path = model_dir / model_name
-    else:
-        if not isinstance(model_path, (str, Path)):
-            raise ValueError(f"Invalid model_path: {model_path}")
-
-        model_path = Path(model_path)
-        model_dir = model_path.parent
-        model_name = model_path.name
-
-    return model_path, model_dir, model_name, which
-
-
 def load_model_criterion_config(
     model_path: None | str | Path,
     *,
@@ -427,9 +438,7 @@ def load_model_criterion_config(
     Returns:
         The model, criterion, and config.
     """
-    (model_path, model_dir, model_name, which) = resolve_model_path(
-        model_path, which, version
-    )
+    model_path, model_dir, model_name = resolve_model_path(model_path, which, version)
 
     model_dir.mkdir(parents=True, exist_ok=True)
     if not model_path.exists():
diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py
index a1f76ed88..0c4ae1cd3 100644
--- a/tests/test_classifier_interface.py
+++ b/tests/test_classifier_interface.py
@@ -288,7 +288,7 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
         }
         torch.onnx.export(
             ModelWrapper(classifier.model_).eval(),
-            (X, y, y.shape[0], True, []),
+            (X, y, y.shape[0], True),
             io.BytesIO(),
             input_names=[
                 "X",
diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py
index 87d423b1e..6e117bcd7 100644
--- a/tests/test_regressor_interface.py
+++ b/tests/test_regressor_interface.py
@@ -282,7 +282,7 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
         }
         torch.onnx.export(
             ModelWrapper(regressor.model_).eval(),
-            (X, y, y.shape[0], True, []),
+            (X, y, y.shape[0], True),
             io.BytesIO(),
             input_names=[
                 "X",

From 4c31a9a52bb80831134a0c5736b91a7ef20f5f0f Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 17:15:53 +0100
Subject: [PATCH 12/20] improve onnx export tests

---
 src/tabpfn/base.py                 |   3 +-
 src/tabpfn/misc/compile_to_onnx.py |   7 +-
 src/tabpfn/model/loading.py        |   3 +-
 tests/test_classifier_interface.py |  46 ------------
 tests/test_export_onnx.py          | 113 +++++++++++++++++++++++++++++
 tests/test_regressor_interface.py  |  44 -----------
 6 files changed, 121 insertions(+), 95 deletions(-)
 create mode 100644 tests/test_export_onnx.py

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index 8ca7b52e4..058157575 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -151,7 +151,8 @@ def load_onnx_model(
             f"ONNX model not found at: {model_path}, "
             "please compile the model by running "
             "`from tabpfn.misc.compile_to_onnx import compile_onnx_models; "
-            "compile_onnx_models()`",
+            "compile_onnx_models()`"
+            "or change `model_path`.",
         )
 
     return ONNXModelWrapper(str(model_path), device)
diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index a1225de16..0a8204845 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -424,11 +424,12 @@ def _test_regressor(*, use_onnx: bool = False) -> float:
     )
 
 
-def compile_onnx_models(suffix: str = "") -> None:
+def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None:
     """Compile the ONNX models.
 
     Args:
         suffix: The suffix to append to the file names of the ONNX models.
+        skip_test: Whether to skip the performance test of the ONNX models.
     """
     classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True)
     regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)
@@ -444,7 +445,7 @@ def compile_onnx_models(suffix: str = "") -> None:
     check_onnx_model(regressor_path)
     check_input_names(regressor_path)
 
-    if not len(suffix):
+    if not len(suffix) and not skip_test:
         test_models()
-    else:
+    elif not skip_test:
         print("model name suffix is not empty, skipping test")
diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py
index 85fb9aefe..bd5a9b140 100644
--- a/src/tabpfn/model/loading.py
+++ b/src/tabpfn/model/loading.py
@@ -369,7 +369,8 @@ def resolve_model_path(
         model_path = Path(model_path)
         model_dir = model_path.parent
         if use_onnx and not model_path.name.endswith(".onnx"):
-            model_name = model_path.name.replace(".ckpt", ".onnx")
+            # More general approach - replace any extension with .onnx
+            model_name = model_path.stem + ".onnx"
         else:
             model_name = model_path.name
 
diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py
index 0c4ae1cd3..076376984 100644
--- a/tests/test_classifier_interface.py
+++ b/tests/test_classifier_interface.py
@@ -1,8 +1,5 @@
 from __future__ import annotations
 
-import io
-import os
-import sys
 import typing
 from itertools import product
 from typing import Callable, Literal
@@ -259,49 +256,6 @@ def forward(
         )
 
 
-@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
-def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
-    if os.name == "nt":
-        pytest.skip("onnx export is not tested on windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("onnx is not yet supported on Python 3.13")
-    X, y = X_y
-    with torch.no_grad():
-        classifier = TabPFNClassifier(n_estimators=1, device="cpu", random_state=42)
-        # load the model so we can access it via classifier.model_
-        classifier.fit(X, y)
-        # this is necessary if cuda is available
-        classifier.predict(X)
-        # replicate the above call with random tensors of same shape
-        X = torch.randn(
-            (X.shape[0] * 2, 1, X.shape[1] + 1),
-            generator=torch.Generator().manual_seed(42),
-        )
-        y = (
-            torch.rand(y.shape, generator=torch.Generator().manual_seed(42))
-            .round()
-            .to(torch.float32)
-        )
-        dynamic_axes = {
-            "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"},
-            "y": {0: "num_labels"},
-        }
-        torch.onnx.export(
-            ModelWrapper(classifier.model_).eval(),
-            (X, y, y.shape[0], True),
-            io.BytesIO(),
-            input_names=[
-                "X",
-                "y",
-                "single_eval_pos",
-                "only_return_standard_out",
-            ],
-            output_names=["output"],
-            opset_version=17,  # using 17 since we use torch>=2.1
-            dynamic_axes=dynamic_axes,
-        )
-
-
 @pytest.mark.parametrize("data_source", ["train", "test"])
 def test_get_embeddings(X_y: tuple[np.ndarray, np.ndarray], data_source: str) -> None:
     """Test that get_embeddings returns valid embeddings for a fitted model."""
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
new file mode 100644
index 000000000..ed826df35
--- /dev/null
+++ b/tests/test_export_onnx.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+import os
+import sys
+
+import numpy as np
+import pytest
+
+from tabpfn import TabPFNClassifier, TabPFNRegressor
+from tabpfn.misc.compile_to_onnx import compile_onnx_models
+
+
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+def test_onnx_missing_model_error():
+    """Test that appropriate error is raised when trying to
+    use ONNX with a missing model. Here we specify a model path
+    that does not exist to simulate the case where the model
+    has not been compiled.
+    """
+    if os.name == "nt":
+        pytest.skip("ONNX export is not tested on Windows")
+    if sys.version_info >= (3, 13):
+        pytest.xfail("ONNX is not yet supported on Python 3.13")
+
+    try:
+        import onnx  # noqa: F401
+        import onnxruntime  # noqa: F401
+    except ImportError:
+        pytest.skip("ONNX or ONNX Runtime not available")
+
+    # Generate synthetic data
+    rng = np.random.default_rng()
+    X = rng.standard_normal((50, 10)).astype(np.float32)
+    y = rng.integers(0, 2, size=50)
+
+    # Try to use ONNX backend when model doesn't exist
+    classifier = TabPFNClassifier(
+        device="cpu", use_onnx=True, model_path="/fake_dir/tabpfn_classifier_v2.ckpt"
+    )
+
+    # Expect a FileNotFoundError with a specific message
+    with pytest.raises(
+        FileNotFoundError,
+        match=(
+            r"ONNX model not found at:.*please compile the model by "
+            r"running.*compile_onnx_models\(\)"
+        ),
+    ):
+        classifier.fit(X, y)
+
+
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+def test_onnx_export_and_inference():
+    """Test that TabPFN models can be exported to ONNX
+    and produce correct predictions.
+    """
+    if os.name == "nt":
+        pytest.skip("ONNX export is not tested on Windows")
+    if sys.version_info >= (3, 13):
+        pytest.xfail("ONNX is not yet supported on Python 3.13")
+
+    try:
+        import onnx  # noqa: F401
+        import onnxruntime  # noqa: F401
+    except ImportError:
+        pytest.skip("ONNX or ONNX Runtime not available")
+
+    # Compile the model to ONNX format (using default output directory)
+    compile_onnx_models(skip_test=True)
+
+    # Generate synthetic data for testing
+    n_samples = 100
+    n_features = 10
+    rng = np.random.default_rng()
+    X = rng.standard_normal((n_samples, n_features)).astype(np.float32)
+    y = rng.integers(0, 2, size=n_samples)
+
+    # Split into train/test
+    train_size = 80
+    X_train, X_test = X[:train_size], X[train_size:]
+    y_train, _y_test = y[:train_size], y[train_size:]
+
+    # Test with PyTorch backend
+    classifier_torch = TabPFNClassifier(device="cpu", use_onnx=False)
+    classifier_torch.fit(X_train, y_train)
+
+    # Get predictions with PyTorch backend
+    torch_probs = classifier_torch.predict_proba(X_test)
+    torch_preds = classifier_torch.predict(X_test)
+
+    # Test with ONNX backend
+    classifier_onnx = TabPFNClassifier(device="cpu", use_onnx=True)
+    classifier_onnx.fit(X_train, y_train)
+
+    # Get predictions with ONNX backend
+    onnx_probs = classifier_onnx.predict_proba(X_test)
+    onnx_preds = classifier_onnx.predict(X_test)
+
+    # Check that the predictions roughly match
+    np.testing.assert_allclose(torch_probs, onnx_probs, rtol=1e-2, atol=1e-2)
+    np.testing.assert_array_equal(torch_preds, onnx_preds)
+
+    # same for regressor
+    regressor_torch = TabPFNRegressor(device="cpu", use_onnx=False)
+    regressor_torch.fit(X_train, y_train)
+    regressor_onnx = TabPFNRegressor(device="cpu", use_onnx=True)
+    regressor_onnx.fit(X_train, y_train)
+
+    torch_preds = regressor_torch.predict(X_test)
+    onnx_preds = regressor_onnx.predict(X_test)
+
+    # Check that the predictions roughly match
+    np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2)
diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py
index 6e117bcd7..c7733da47 100644
--- a/tests/test_regressor_interface.py
+++ b/tests/test_regressor_interface.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-import io
 import os
-import sys
 import typing
 from itertools import product
 from typing import Callable, Literal
@@ -254,48 +252,6 @@ def forward(
         )
 
 
-# WARNING: unstable for scipy<1.11.0
-@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
-def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None:
-    if os.name == "nt":
-        pytest.skip("onnx export is not tested on windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("onnx is not yet supported on Python 3.13")
-    X, y = X_y
-    with torch.no_grad():
-        regressor = TabPFNRegressor(n_estimators=1, device="cpu", random_state=43)
-        # load the model so we can access it via classifier.model_
-        regressor.fit(X, y)
-        # this is necessary if cuda is available
-        regressor.predict(X)
-        # replicate the above call with random tensors of same shape
-        X = torch.randn(
-            (X.shape[0] * 2, 1, X.shape[1] + 1),
-            generator=torch.Generator().manual_seed(42),
-        )
-        y = (torch.randn(y.shape, generator=torch.Generator().manual_seed(42)) > 0).to(
-            torch.float32,
-        )
-        dynamic_axes = {
-            "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"},
-            "y": {0: "num_labels"},
-        }
-        torch.onnx.export(
-            ModelWrapper(regressor.model_).eval(),
-            (X, y, y.shape[0], True),
-            io.BytesIO(),
-            input_names=[
-                "X",
-                "y",
-                "single_eval_pos",
-                "only_return_standard_out",
-            ],
-            output_names=["output"],
-            opset_version=17,  # using 17 since we use torch>=2.1
-            dynamic_axes=dynamic_axes,
-        )
-
-
 @pytest.mark.parametrize("data_source", ["train", "test"])
 def test_get_embeddings(X_y: tuple[np.ndarray, np.ndarray], data_source: str) -> None:
     """Test that get_embeddings returns valid embeddings for a fitted model."""

From dd84e0a50e8bd741389043a940da0f5c2b0ad308 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 17:19:43 +0100
Subject: [PATCH 13/20] add onnxruntime requirements to dev and ci

---
 .github/workflows/pull_request.yml | 2 +-
 pyproject.toml                     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index d8114c456..29ebdb218 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -80,7 +80,7 @@ jobs:
           uv pip install --system pytest psutil
           # onnx is not supported on python 3.13 yet https://github.com/onnx/onnx/issues/6339
           if [[ "${{ matrix.python-version }}" != "3.13" ]]; then
-            uv pip install --system onnx
+            uv pip install --system onnx onnxruntime
           fi
         shell: bash
 
diff --git a/pyproject.toml b/pyproject.toml
index 90dd591e4..cacebfb2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,6 +63,7 @@ dev = [
   # Test
   "pytest",
   "onnx", # required for onnx export tests
+  "onnxruntime",
   "psutil", # required for testing internal memory tool on windows
   # Docs
   "mkdocs",

From 1bb3a64b7b9190865e1435686c2dacedd9508c2f Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 17:23:20 +0100
Subject: [PATCH 14/20] skip test on python3.13

---
 tests/test_export_onnx.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index ed826df35..ea90f5cc4 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -7,7 +7,6 @@
 import pytest
 
 from tabpfn import TabPFNClassifier, TabPFNRegressor
-from tabpfn.misc.compile_to_onnx import compile_onnx_models
 
 
 @pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
@@ -65,6 +64,8 @@ def test_onnx_export_and_inference():
     except ImportError:
         pytest.skip("ONNX or ONNX Runtime not available")
 
+    from tabpfn.misc.compile_to_onnx import compile_onnx_models
+
     # Compile the model to ONNX format (using default output directory)
     compile_onnx_models(skip_test=True)
 

From 968087ba2ce2fbb4ac1cd1053f4dfc996f281957 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 22:14:38 +0100
Subject: [PATCH 15/20] use the same onnx session if you fit twice

---
 src/tabpfn/base.py                 | 17 ++++++---------
 src/tabpfn/classifier.py           | 32 +++++++++++++++++++++------
 src/tabpfn/misc/compile_to_onnx.py |  8 ++++---
 src/tabpfn/model/loading.py        |  8 +++----
 src/tabpfn/regressor.py            | 35 +++++++++++++++++++++++-------
 tests/test_export_onnx.py          |  5 ++++-
 6 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
index 058157575..94314f673 100644
--- a/src/tabpfn/base.py
+++ b/src/tabpfn/base.py
@@ -29,7 +29,6 @@
 )
 from tabpfn.model.loading import (
     load_model_criterion_config,
-    resolve_model_path,
 )
 from tabpfn.utils import (
     infer_fp16_inference_mode,
@@ -47,7 +46,7 @@
 
 @overload
 def initialize_tabpfn_model(
-    model_path: str | Path | Literal["auto"],
+    model_path: Path,
     which: Literal["regressor"],
     fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"],
     static_seed: int,
@@ -56,7 +55,7 @@ def initialize_tabpfn_model(
 
 @overload
 def initialize_tabpfn_model(
-    model_path: str | Path | Literal["auto"],
+    model_path: Path,
     which: Literal["classifier"],
     fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"],
     static_seed: int,
@@ -64,7 +63,7 @@ def initialize_tabpfn_model(
 
 
 def initialize_tabpfn_model(
-    model_path: str | Path | Literal["auto"],
+    model_path: Path,
     which: Literal["classifier", "regressor"],
     fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"],
     static_seed: int,
@@ -117,9 +116,7 @@ def initialize_tabpfn_model(
 
 
 def load_onnx_model(
-    model_path: str | Path,
-    which: Literal["classifier", "regressor"],
-    version: Literal["v2"],
+    model_path: Path,
     device: torch.device,
 ) -> ONNXModelWrapper:
     """Load a TabPFN model in ONNX format.
@@ -142,10 +139,10 @@ def load_onnx_model(
     except ImportError as err:
         raise ImportError(
             "onnxruntime is required to load ONNX models. "
-            "Install it with: pip install onnxruntime",
+            "Install it with: pip install onnxruntime-gpu"
+            "or pip install onnxruntime",
         ) from err
 
-    model_path, _, _ = resolve_model_path(model_path, which, version, use_onnx=True)
     if not model_path.exists():
         raise FileNotFoundError(
             f"ONNX model not found at: {model_path}, "
@@ -155,7 +152,7 @@ def load_onnx_model(
             "or change `model_path`.",
         )
 
-    return ONNXModelWrapper(str(model_path), device)
+    return ONNXModelWrapper(model_path, device)
 
 
 def determine_precision(
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
index 6c363d563..cdfa806ab 100644
--- a/src/tabpfn/classifier.py
+++ b/src/tabpfn/classifier.py
@@ -44,6 +44,7 @@
     XType,
     YType,
 )
+from tabpfn.model.loading import resolve_model_path
 from tabpfn.preprocessing import (
     ClassifierEnsembleConfig,
     EnsembleConfig,
@@ -69,7 +70,9 @@
     from torch.types import _dtype
 
     from tabpfn.inference import InferenceEngine
+    from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     from tabpfn.model.config import InferenceConfig
+    from tabpfn.model.transformer import PerFeatureTransformer
 
     try:
         from sklearn.base import Tags
@@ -132,6 +135,9 @@ class TabPFNClassifier(ClassifierMixin, BaseEstimator):
     preprocessor_: ColumnTransformer
     """The column transformer used to preprocess the input data to be numeric."""
 
+    model_: PerFeatureTransformer | ONNXModelWrapper
+    """The loaded model used for inference."""
+
     def __init__(  # noqa: PLR0913
         self,
         *,
@@ -399,17 +405,29 @@ def fit(self, X: XType, y: YType) -> Self:
             determine_precision(self.inference_precision, self.device_)
         )
 
+        model_path, _, _ = resolve_model_path(
+            self.model_path,
+            which="classifier",
+            version="v2",
+            use_onnx=self.use_onnx,
+        )
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model(
-                self.model_path,
-                which="classifier",
-                version="v2",
-                device=self.device_,
-            )
+            # if the model was already loaded with the same config
+            # use the same ONNX session
+            if hasattr(self, "model_") and (model_path, self.device_) != (
+                self.model_.model_path,
+                self.model_.device,
+            ):
+                print("Using same ONNX session as last fit call")  # noqa: T201
+            else:
+                self.model_ = load_onnx_model(
+                    model_path,
+                    device=self.device_,
+                )
         else:
             self.model_, self.config_, _ = initialize_tabpfn_model(
-                model_path=self.model_path,
+                model_path=model_path,
                 which="classifier",
                 fit_mode=self.fit_mode,
                 static_seed=static_seed,
diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index 0a8204845..4ee9ee627 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -7,6 +7,8 @@
 
 from __future__ import annotations
 
+from pathlib import Path
+
 import numpy as np
 import onnx
 import onnxruntime as ort
@@ -21,7 +23,7 @@
 class ONNXModelWrapper:
     """Wrap ONNX model to match the PyTorch model interface."""
 
-    def __init__(self, model_path: str, device: torch.device):
+    def __init__(self, model_path: Path, device: torch.device):
         """Initialize the ONNX model wrapper.
 
         Args:
@@ -434,8 +436,8 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None:
     classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True)
     regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)
     # add suffix to the file names
-    classifier_path = str(classifier_path) + suffix
-    regressor_path = str(regressor_path) + suffix
+    classifier_path = classifier_path.stem + suffix + ".onnx"
+    regressor_path = regressor_path.stem + suffix + ".onnx"
 
     export_model(classifier_path, "classifier")
     check_onnx_model(classifier_path)
diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py
index bd5a9b140..ba0052975 100644
--- a/src/tabpfn/model/loading.py
+++ b/src/tabpfn/model/loading.py
@@ -379,7 +379,7 @@ def resolve_model_path(
 
 @overload
 def load_model_criterion_config(
-    model_path: str | Path | None,
+    model_path: Path,
     *,
     check_bar_distribution_criterion: Literal[False],
     cache_trainset_representation: bool,
@@ -396,7 +396,7 @@ def load_model_criterion_config(
 
 @overload
 def load_model_criterion_config(
-    model_path: str | Path | None,
+    model_path: Path,
     *,
     check_bar_distribution_criterion: Literal[True],
     cache_trainset_representation: bool,
@@ -408,7 +408,7 @@ def load_model_criterion_config(
 
 
 def load_model_criterion_config(
-    model_path: None | str | Path,
+    model_path: Path,
     *,
     check_bar_distribution_criterion: bool,
     cache_trainset_representation: bool,
@@ -439,7 +439,7 @@ def load_model_criterion_config(
     Returns:
         The model, criterion, and config.
     """
-    model_path, model_dir, model_name = resolve_model_path(model_path, which, version)
+    model_dir, model_name = model_path.parent, model_path.name
 
     model_dir.mkdir(parents=True, exist_ok=True)
     if not model_path.exists():
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
index 43092fffb..5421bcc85 100644
--- a/src/tabpfn/regressor.py
+++ b/src/tabpfn/regressor.py
@@ -43,6 +43,7 @@
 )
 from tabpfn.config import ModelInterfaceConfig
 from tabpfn.model.bar_distribution import FullSupportBarDistribution
+from tabpfn.model.loading import resolve_model_path
 from tabpfn.model.preprocessing import (
     ReshapeFeatureDistributionsStep,
 )
@@ -80,7 +81,9 @@
     from tabpfn.inference import (
         InferenceEngine,
     )
+    from tabpfn.misc.compile_to_onnx import ONNXModelWrapper
     from tabpfn.model.config import InferenceConfig
+    from tabpfn.model.transformer import PerFeatureTransformer
 
     try:
         from sklearn.base import Tags
@@ -160,6 +163,9 @@ class TabPFNRegressor(RegressorMixin, BaseEstimator):
     preprocessor_: ColumnTransformer
     """The column transformer used to preprocess the input data to be numeric."""
 
+    model_: PerFeatureTransformer | ONNXModelWrapper
+    """The loaded model used for inference."""
+
     # TODO: consider moving the following to constants.py
     _OUTPUT_TYPES_BASIC = ("mean", "median", "mode")
     """The basic output types supported by the model."""
@@ -430,25 +436,38 @@ def fit(self, X: XType, y: YType) -> Self:
             determine_precision(self.inference_precision, self.device_)
         )
 
+        model_path, _, _ = resolve_model_path(
+            self.model_path,
+            which="regressor",
+            version="v2",
+            use_onnx=self.use_onnx,
+        )
+
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model(
-                self.model_path,
-                which="regressor",
-                version="v2",
-                device=self.device_,
-            )
+            # if the model was already loaded with the same config,
+            # use the same ONNX session
+            if hasattr(self, "model_") and (model_path, self.device_) == (
+                self.model_.model_path,
+                self.model_.device,
+            ):
+                print("Using same ONNX session as last fit call")  # noqa: T201
+            else:
+                self.model_ = load_onnx_model(
+                    model_path,
+                    device=self.device_,
+                )
             # Initialize bardist_ for ONNX mode
             # TODO: faster way to do this
             _, self.config_, self.bardist_ = initialize_tabpfn_model(
-                model_path=self.model_path,
+                model_path=model_path.with_stem(model_path.stem).with_suffix(".ckpt"),
                 which="regressor",
                 fit_mode=self.fit_mode,
                 static_seed=static_seed,
             )
         else:
             self.model_, self.config_, self.bardist_ = initialize_tabpfn_model(
-                model_path=self.model_path,
+                model_path=model_path,
                 which="regressor",
                 fit_mode=self.fit_mode,
                 static_seed=static_seed,
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index ea90f5cc4..cf8a424fa 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -99,7 +99,6 @@ def test_onnx_export_and_inference():
 
     # Check that the predictions roughly match
     np.testing.assert_allclose(torch_probs, onnx_probs, rtol=1e-2, atol=1e-2)
-    np.testing.assert_array_equal(torch_preds, onnx_preds)
 
     # same for regressor
     regressor_torch = TabPFNRegressor(device="cpu", use_onnx=False)
@@ -112,3 +111,7 @@ def test_onnx_export_and_inference():
 
     # Check that the predictions roughly match
     np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2)
+
+
+# TODO: test deterministic
+# TODO: test that fitting twice works as intended

From 3f13d243c04ecd6ae02af10ba3c862fdf969881d Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 22:51:46 +0100
Subject: [PATCH 16/20] fix bug + add tests

---
 src/tabpfn/classifier.py  |   2 +-
 tests/test_export_onnx.py | 168 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 167 insertions(+), 3 deletions(-)

diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
index cdfa806ab..5bd4f5212 100644
--- a/src/tabpfn/classifier.py
+++ b/src/tabpfn/classifier.py
@@ -415,7 +415,7 @@ def fit(self, X: XType, y: YType) -> Self:
         if self.use_onnx:
             # if the model was already loaded with the same config
             # use the same ONNX session
-            if hasattr(self, "model_") and (model_path, self.device_) != (
+            if hasattr(self, "model_") and (model_path, self.device_) == (
                 self.model_.model_path,
                 self.model_.device,
             ):
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index cf8a424fa..f884d9d8c 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -2,9 +2,11 @@
 
 import os
 import sys
+from typing import Literal
 
 import numpy as np
 import pytest
+import torch
 
 from tabpfn import TabPFNClassifier, TabPFNRegressor
 
@@ -113,5 +115,167 @@ def test_onnx_export_and_inference():
     np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2)
 
 
-# TODO: test deterministic
-# TODO: test that fitting twice works as intended
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+@pytest.mark.parametrize("which", ["classifier", "regressor"])
+def test_onnx_session_reuse(which: Literal["classifier", "regressor"]):
+    """Test that the ONNX session is reused when fitting a model multiple times
+    with the same model path and device.
+    """
+    if os.name == "nt":
+        pytest.skip("ONNX export is not tested on Windows")
+    if sys.version_info >= (3, 13):
+        pytest.xfail("ONNX is not yet supported on Python 3.13")
+
+    try:
+        import onnx  # noqa: F401
+        import onnxruntime  # noqa: F401
+    except ImportError:
+        pytest.skip("ONNX or ONNX Runtime not available")
+
+    # Generate synthetic data
+    rng = np.random.default_rng(42)
+    X1 = rng.standard_normal((50, 10)).astype(np.float32)
+    y1 = rng.integers(0, 2, size=50)
+
+    X2 = rng.standard_normal((40, 10)).astype(np.float32)
+    y2 = rng.integers(0, 2, size=40)
+
+    # Create a classifier with ONNX backend
+    if which == "classifier":
+        sklearn_model = TabPFNClassifier(device="cpu", use_onnx=True)
+    else:
+        sklearn_model = TabPFNRegressor(device="cpu", use_onnx=True)
+
+    # First fit
+    sklearn_model.fit(X1, y1)
+
+    # Get reference to the first model
+    first_model = sklearn_model.model_
+
+    # Mock print function to check if message is displayed
+    import builtins
+
+    original_print = builtins.print
+    printed_messages = []
+
+    def mock_print(*args, **kwargs):
+        message = " ".join(str(arg) for arg in args)
+        printed_messages.append(message)
+        original_print(*args, **kwargs)
+
+    # Replace print with our mock
+    builtins.print = mock_print
+
+    try:
+        # Second fit with same configuration
+        sklearn_model.fit(X2, y2)
+
+        # Assert that the model object is the same (session reused)
+        assert sklearn_model.model_ is first_model
+
+        # Check that the print message appears
+        assert any(
+            "Using same ONNX session as last fit call" in msg
+            for msg in printed_messages
+        )
+
+        # Now test with a different device to force new session
+        if torch.cuda.is_available():
+            # Change device to force new session
+            sklearn_model.device = "cuda"
+            sklearn_model.fit(X1, y1)
+
+            # Should be a different model object now
+            assert sklearn_model.model_ is not first_model
+
+            # Restore device
+            sklearn_model.device = "cpu"
+            sklearn_model.fit(X1, y1)
+
+            # Should be a new model again
+            assert sklearn_model.model_ is not first_model
+    finally:
+        # Restore original print function
+        builtins.print = original_print
+
+
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+@pytest.mark.parametrize("which", ["classifier", "regressor"])
+def test_onnx_deterministic(which: Literal["classifier", "regressor"]):
+    """Test that TabPFN models using ONNX are deterministic when using the same seed."""
+    if os.name == "nt":
+        pytest.skip("ONNX export is not tested on Windows")
+    if sys.version_info >= (3, 13):
+        pytest.xfail("ONNX is not yet supported on Python 3.13")
+
+    try:
+        import onnx  # noqa: F401
+        import onnxruntime  # noqa: F401
+    except ImportError:
+        pytest.skip("ONNX or ONNX Runtime not available")
+
+    from tabpfn.misc.compile_to_onnx import compile_onnx_models
+
+    # Compile the model to ONNX format if needed
+    compile_onnx_models(skip_test=True)
+
+    # Generate synthetic data
+    rng = np.random.default_rng(42)
+    X_train = rng.standard_normal((50, 10)).astype(np.float32)
+
+    if which == "classifier":
+        y_train = rng.integers(0, 3, size=50)  # 3 classes
+        X_test = rng.standard_normal((20, 10)).astype(np.float32)
+
+        # First model with fixed seed
+        model1 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=123)
+        model1.fit(X_train, y_train)
+        pred1 = model1.predict(X_test)
+        proba1 = model1.predict_proba(X_test)
+
+        # Second model with same seed
+        model2 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=123)
+        model2.fit(X_train, y_train)
+        pred2 = model2.predict(X_test)
+        proba2 = model2.predict_proba(X_test)
+
+        # Predictions should be identical
+        np.testing.assert_array_equal(pred1, pred2)
+        np.testing.assert_array_equal(proba1, proba2)
+
+        # Third model with different seed
+        model3 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=456)
+        model3.fit(X_train, y_train)
+        pred3 = model3.predict(X_test)
+        proba3 = model3.predict_proba(X_test)
+
+        # Predictions should be different (with high probability)
+        # We use assert_raises to verify they're different
+        with pytest.raises(AssertionError):
+            np.testing.assert_array_equal(proba1, proba3)
+
+    else:  # regressor
+        y_train = rng.standard_normal(50)
+        X_test = rng.standard_normal((20, 10)).astype(np.float32)
+
+        # First model with fixed seed
+        model1 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=123)
+        model1.fit(X_train, y_train)
+        pred1 = model1.predict(X_test)
+
+        # Second model with same seed
+        model2 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=123)
+        model2.fit(X_train, y_train)
+        pred2 = model2.predict(X_test)
+
+        # Predictions should be identical
+        np.testing.assert_array_equal(pred1, pred2)
+
+        # Third model with different seed
+        model3 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=456)
+        model3.fit(X_train, y_train)
+        pred3 = model3.predict(X_test)
+
+        # Predictions should be different (with high probability)
+        with pytest.raises(AssertionError):
+            np.testing.assert_array_equal(pred1, pred3)

From d6d906d4ca84d1f74c7a51721c657c013695ba94 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Mon, 24 Mar 2025 23:22:11 +0100
Subject: [PATCH 17/20] fail if device is CUDA but CUDAExecutionProvider not
 available

---
 src/tabpfn/misc/compile_to_onnx.py | 28 ++++++++++++------
 tests/test_export_onnx.py          | 46 ++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index 4ee9ee627..1137bf57c 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -20,6 +20,17 @@
 from tabpfn.model.loading import resolve_model_path
 
 
+def _check_cuda_provider(device: torch.device) -> None:
+    if (
+        device.type == "cuda"
+        and "CUDAExecutionProvider" not in ort.get_available_providers()
+    ):
+        raise ValueError(
+            "Device is cuda but CUDAExecutionProvider is not available in ONNX. "
+            "Check that you installed onnxruntime-gpu and have a GPU."
+        )
+
+
 class ONNXModelWrapper:
     """Wrap ONNX model to match the PyTorch model interface."""
 
@@ -32,6 +43,7 @@ def __init__(self, model_path: Path, device: torch.device):
         """
         self.model_path = model_path
         self.device = device
+        _check_cuda_provider(self.device)
         if device.type == "cuda":
             self.providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
         elif device.type == "cpu":
@@ -56,18 +68,16 @@ def to(
             self
         """
         # Only recreate session if device type has changed
+        _check_cuda_provider(device)
         if device.type != self.device.type:
             if device.type == "cuda":
-                # Check if CUDA is available in ONNX Runtime
                 cuda_provider = "CUDAExecutionProvider"
-                if cuda_provider in ort.get_available_providers():
-                    self.providers = [cuda_provider, "CPUExecutionProvider"]
-                    # Reinitialize session with CUDA provider
-                    self.session = ort.InferenceSession(
-                        self.model_path,
-                        providers=self.providers,
-                    )
-                # If CUDA is not available, keep current session
+                self.providers = [cuda_provider, "CPUExecutionProvider"]
+                # Reinitialize session with CUDA provider
+                self.session = ort.InferenceSession(
+                    self.model_path,
+                    providers=self.providers,
+                )
             else:
                 self.providers = ["CPUExecutionProvider"]
                 self.session = ort.InferenceSession(
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index f884d9d8c..559cf566b 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -279,3 +279,49 @@ def test_onnx_deterministic(which: Literal["classifier", "regressor"]):
         # Predictions should be different (with high probability)
         with pytest.raises(AssertionError):
             np.testing.assert_array_equal(pred1, pred3)
+
+
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+@pytest.mark.parametrize("model_class", [TabPFNClassifier, TabPFNRegressor])
+def test_cuda_provider_missing_error(model_class):
+    """Test that TabPFN models raise the correct error when trying to use CUDA
+    without CUDAExecutionProvider available in ONNX Runtime.
+    """
+    if os.name == "nt":
+        pytest.skip("ONNX export is not tested on Windows")
+    if sys.version_info >= (3, 13):
+        pytest.xfail("ONNX is not yet supported on Python 3.13")
+
+    try:
+        import onnxruntime as ort
+    except ImportError:
+        pytest.skip("ONNX Runtime not available")
+
+    # Generate synthetic data
+    rng = np.random.default_rng(42)
+    X = rng.standard_normal((20, 5)).astype(np.float32)
+    y = (
+        rng.integers(0, 2, size=20)
+        if model_class == TabPFNClassifier
+        else rng.standard_normal(20)
+    )
+
+    # Mock ort.get_available_providers to return only CPUExecutionProvider
+    original_get_providers = ort.get_available_providers
+
+    try:
+        # Replace providers with only CPU
+        ort.get_available_providers = lambda: ["CPUExecutionProvider"]
+
+        # Create model with CUDA device and ONNX enabled
+        model = model_class(device="cuda", use_onnx=True)
+
+        # The error should be raised during fit
+        with pytest.raises(
+            ValueError,
+            match="Device is cuda but CUDAExecutionProvider is not available in ONNX",
+        ):
+            model.fit(X, y)
+    finally:
+        # Restore original function
+        ort.get_available_providers = original_get_providers

From e1ada196fa248c62dca764b1a0abc55cc70d4370 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Tue, 25 Mar 2025 10:31:49 +0100
Subject: [PATCH 18/20] new py3.11 ci tests and skip onnx tests on 3.9

---
 .github/workflows/pull_request.yml | 19 +++++++--
 tests/test_export_onnx.py          | 64 +++++++-----------------------
 2 files changed, 29 insertions(+), 54 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 29ebdb218..1653c6e81 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -36,6 +36,15 @@ jobs:
           - os: windows-latest
             python-version: "3.9"
             dependency-set: minimum
+          - os: ubuntu-latest
+            python-version: "3.11"
+            dependency-set: direct-install
+          - os: macos-latest
+            python-version: "3.11"
+            dependency-set: direct-install
+          - os: windows-latest
+            python-version: "3.11"
+            dependency-set: direct-install
           - os: ubuntu-latest
             python-version: "3.13"
             dependency-set: maximum
@@ -73,10 +82,12 @@ jobs:
 
       - name: Install dependencies
         run: |
-          uv pip install --system --no-deps .
-          # onnx is required for onnx export tests
-          # we don't install all dev dependencies here for speed
-          uv pip install --system -r requirements.txt
+          if [[ "${{ matrix.dependency-set }}" == "direct-install" ]]; then
+            uv pip install --system .
+          else
+            uv pip install --system --no-deps .
+            uv pip install --system -r requirements.txt
+          fi
           uv pip install --system pytest psutil
           # onnx is not supported on python 3.13 yet https://github.com/onnx/onnx/issues/6339
           if [[ "${{ matrix.python-version }}" != "3.13" ]]; then
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index 559cf566b..51f47f155 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -11,24 +11,29 @@
 from tabpfn import TabPFNClassifier, TabPFNRegressor
 
 
-@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
-def test_onnx_missing_model_error():
-    """Test that appropriate error is raised when trying to
-    use ONNX with a missing model. Here we specify a model path
-    that does not exist to simulate the case where the model
-    has not been compiled.
-    """
+# Common fixture to handle all the skip conditions for ONNX tests
+@pytest.fixture(autouse=True, scope="module")
+def check_onnx_compatible():
     if os.name == "nt":
         pytest.skip("ONNX export is not tested on Windows")
     if sys.version_info >= (3, 13):
         pytest.xfail("ONNX is not yet supported on Python 3.13")
-
+    if sys.version_info <= (3, 9):
+        pytest.skip("our onnx export doesn't work on python 3.9")
     try:
         import onnx  # noqa: F401
         import onnxruntime  # noqa: F401
     except ImportError:
         pytest.skip("ONNX or ONNX Runtime not available")
 
+
+@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning")
+def test_onnx_missing_model_error():
+    """Test that appropriate error is raised when trying to
+    use ONNX with a missing model. Here we specify a model path
+    that does not exist to simulate the case where the model
+    has not been compiled.
+    """
     # Generate synthetic data
     rng = np.random.default_rng()
     X = rng.standard_normal((50, 10)).astype(np.float32)
@@ -55,17 +60,6 @@ def test_onnx_export_and_inference():
     """Test that TabPFN models can be exported to ONNX
     and produce correct predictions.
     """
-    if os.name == "nt":
-        pytest.skip("ONNX export is not tested on Windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("ONNX is not yet supported on Python 3.13")
-
-    try:
-        import onnx  # noqa: F401
-        import onnxruntime  # noqa: F401
-    except ImportError:
-        pytest.skip("ONNX or ONNX Runtime not available")
-
     from tabpfn.misc.compile_to_onnx import compile_onnx_models
 
     # Compile the model to ONNX format (using default output directory)
@@ -121,17 +115,6 @@ def test_onnx_session_reuse(which: Literal["classifier", "regressor"]):
     """Test that the ONNX session is reused when fitting a model multiple times
     with the same model path and device.
     """
-    if os.name == "nt":
-        pytest.skip("ONNX export is not tested on Windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("ONNX is not yet supported on Python 3.13")
-
-    try:
-        import onnx  # noqa: F401
-        import onnxruntime  # noqa: F401
-    except ImportError:
-        pytest.skip("ONNX or ONNX Runtime not available")
-
     # Generate synthetic data
     rng = np.random.default_rng(42)
     X1 = rng.standard_normal((50, 10)).astype(np.float32)
@@ -203,17 +186,6 @@ def mock_print(*args, **kwargs):
 @pytest.mark.parametrize("which", ["classifier", "regressor"])
 def test_onnx_deterministic(which: Literal["classifier", "regressor"]):
     """Test that TabPFN models using ONNX are deterministic when using the same seed."""
-    if os.name == "nt":
-        pytest.skip("ONNX export is not tested on Windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("ONNX is not yet supported on Python 3.13")
-
-    try:
-        import onnx  # noqa: F401
-        import onnxruntime  # noqa: F401
-    except ImportError:
-        pytest.skip("ONNX or ONNX Runtime not available")
-
     from tabpfn.misc.compile_to_onnx import compile_onnx_models
 
     # Compile the model to ONNX format if needed
@@ -287,15 +259,7 @@ def test_cuda_provider_missing_error(model_class):
     """Test that TabPFN models raise the correct error when trying to use CUDA
     without CUDAExecutionProvider available in ONNX Runtime.
     """
-    if os.name == "nt":
-        pytest.skip("ONNX export is not tested on Windows")
-    if sys.version_info >= (3, 13):
-        pytest.xfail("ONNX is not yet supported on Python 3.13")
-
-    try:
-        import onnxruntime as ort
-    except ImportError:
-        pytest.skip("ONNX Runtime not available")
+    import onnxruntime as ort
 
     # Generate synthetic data
     rng = np.random.default_rng(42)

From 9acec27abd5f7c580346112a2e51f975346bd0db Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Tue, 25 Mar 2025 10:53:16 +0100
Subject: [PATCH 19/20] fix tests

---
 src/tabpfn/misc/compile_to_onnx.py | 15 +++++++++------
 src/tabpfn/regressor.py            |  2 +-
 tests/test_export_onnx.py          |  3 +--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index 1137bf57c..89e902069 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -240,7 +240,7 @@ def forward(
 
 
 def export_model(
-    output_path: str,
+    output_path: Path,
     model_type: str = "classifier",
 ) -> None:
     """Export the TabPFN model to the ONNX format.
@@ -321,7 +321,7 @@ def export_model(
         )
 
 
-def check_onnx_model(model_path: str) -> None:
+def check_onnx_model(model_path: Path) -> None:
     """Validate the ONNX model.
 
     Loads the ONNX model and runs a checker to ensure that the model is valid.
@@ -333,7 +333,7 @@ def check_onnx_model(model_path: str) -> None:
     onnx.checker.check_model(onnx_model)  # Check if the model is valid
 
 
-def check_input_names(model_path: str) -> None:
+def check_input_names(model_path: Path) -> None:
     """Load the ONNX model to check its input names.
 
     Args:
@@ -445,9 +445,12 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None:
     """
     classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True)
     regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)
-    # add suffix to the file names
-    classifier_path = classifier_path.stem + suffix + ".onnx"
-    regressor_path = regressor_path.stem + suffix + ".onnx"
+
+    # Add suffix before the .onnx extension
+    stem = classifier_path.stem
+    classifier_path = classifier_path.with_name(f"{stem}{suffix}").with_suffix(".onnx")
+    stem = regressor_path.stem
+    regressor_path = regressor_path.with_name(f"{stem}{suffix}").with_suffix(".onnx")
 
     export_model(classifier_path, "classifier")
     check_onnx_model(classifier_path)
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
index 5421bcc85..b3ae7ffa3 100644
--- a/src/tabpfn/regressor.py
+++ b/src/tabpfn/regressor.py
@@ -460,7 +460,7 @@ def fit(self, X: XType, y: YType) -> Self:
             # Initialize bardist_ for ONNX mode
             # TODO: faster way to do this
             _, self.config_, self.bardist_ = initialize_tabpfn_model(
-                model_path=model_path.with_stem(model_path.stem).with_suffix(".ckpt"),
+                model_path=model_path.with_suffix(".ckpt"),
                 which="regressor",
                 fit_mode=self.fit_mode,
                 static_seed=static_seed,
diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py
index 51f47f155..057ca3033 100644
--- a/tests/test_export_onnx.py
+++ b/tests/test_export_onnx.py
@@ -11,14 +11,13 @@
 from tabpfn import TabPFNClassifier, TabPFNRegressor
 
 
-# Common fixture to handle all the skip conditions for ONNX tests
 @pytest.fixture(autouse=True, scope="module")
 def check_onnx_compatible():
     if os.name == "nt":
         pytest.skip("ONNX export is not tested on Windows")
     if sys.version_info >= (3, 13):
         pytest.xfail("ONNX is not yet supported on Python 3.13")
-    if sys.version_info <= (3, 9):
+    if sys.version_info < (3, 10):
         pytest.skip("our onnx export doesn't work on python 3.9")
     try:
         import onnx  # noqa: F401

From 0f1848deb157d5d042e64cdb5c37c22c6384f151 Mon Sep 17 00:00:00 2001
From: LeoGrin <leo.grinsztajn@polytechnique.edu>
Date: Tue, 25 Mar 2025 10:59:47 +0100
Subject: [PATCH 20/20] fail nicely if someones try to export to onnx on
 python3.9

---
 src/tabpfn/misc/compile_to_onnx.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py
index 89e902069..4cac826a6 100644
--- a/src/tabpfn/misc/compile_to_onnx.py
+++ b/src/tabpfn/misc/compile_to_onnx.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+import sys
 from pathlib import Path
 
 import numpy as np
@@ -31,6 +32,20 @@ def _check_cuda_provider(device: torch.device) -> None:
         )
 
 
+def _check_onnx_setup() -> None:
+    try:
+        import onnx  # noqa: F401
+    except ImportError:
+        raise ImportError(
+            "ONNX is not installed. " "Please install it using `pip install onnx`."
+        ) from None
+    if sys.version_info < (3, 10):
+        raise ValueError(
+            "TabPFN ONNX export is not yet supported on Python 3.9. "
+            "Please upgrade to Python 3.10 or higher."
+        ) from None
+
+
 class ONNXModelWrapper:
     """Wrap ONNX model to match the PyTorch model interface."""
 
@@ -443,6 +458,8 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None:
         suffix: The suffix to append to the file names of the ONNX models.
         skip_test: Whether to skip the performance test of the ONNX models.
     """
+    _check_onnx_setup()
+
     classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True)
     regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)