From eef103cfd59f967f0e0649748df38fb7f5c24d2a Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Fri, 7 Feb 2025 18:40:20 +0100 Subject: [PATCH 01/20] remove select_features and make RemoveEmptyFeaturesEncoderStep passthrough --- src/tabpfn/misc/debug_versions.py | 1 + src/tabpfn/model/encoders.py | 49 +++++-------------------------- 2 files changed, 9 insertions(+), 41 deletions(-) diff --git a/src/tabpfn/misc/debug_versions.py b/src/tabpfn/misc/debug_versions.py index 320a239b9..0f93230f9 100644 --- a/src/tabpfn/misc/debug_versions.py +++ b/src/tabpfn/misc/debug_versions.py @@ -1,4 +1,5 @@ # ruff: noqa +# mypy: ignore-errors """This file is taken from PyTorch and modified to work with TabPFN, also inspired from sklearn's show_versions function. This collects useful debug information that can be used to report issues. diff --git a/src/tabpfn/model/encoders.py b/src/tabpfn/model/encoders.py index 3441bdbcb..6a3de8138 100644 --- a/src/tabpfn/model/encoders.py +++ b/src/tabpfn/model/encoders.py @@ -105,44 +105,6 @@ def normalize_data( return data -def select_features(x: torch.Tensor, sel: torch.Tensor) -> torch.Tensor: - """Select features from the input tensor based on the selection mask, - and arrange them contiguously in the last dimension. - If batch size is bigger than 1, we pad the features with zeros to make the number of features fixed. - - Args: - x: The input tensor of shape (sequence_length, batch_size, total_features) - sel: The boolean selection mask indicating which features to keep of shape (batch_size, total_features) - - Returns: - The tensor with selected features. - The shape is (sequence_length, batch_size, number_of_selected_features) if batch_size is 1. - The shape is (sequence_length, batch_size, total_features) if batch_size is greater than 1. - """ - B, total_features = sel.shape - sequence_length = x.shape[0] - - # If B == 1, we don't need to append zeros, as the number of features don't need to be fixed. - if B == 1: - return x[:, :, sel[0]] - - new_x = torch.zeros( - (sequence_length, B, total_features), - device=x.device, - dtype=x.dtype, - ) - - # For each batch, compute the number of selected features. - sel_counts = sel.sum(dim=-1) # shape: (B,) - - for b in range(B): - s = int(sel_counts[b]) - if s > 0: - new_x[:, b, :s] = x[:, b, sel[b]] - - return new_x - - def remove_outliers( X: torch.Tensor, n_sigma: float = 4, @@ -507,7 +469,11 @@ def _transform( class RemoveEmptyFeaturesEncoderStep(SeqEncStep): - """Encoder step to remove empty (constant) features.""" + """Encoder step to remove empty (constant) features. + Was changed to NOT DO ANYTHING, the removal of empty features now + done elsewhere, but the saved model still needs this encoder step. + TODO: REMOVE. + """ def __init__(self, **kwargs: Any): """Initialize the RemoveEmptyFeaturesEncoderStep. @@ -525,7 +491,7 @@ def _fit(self, x: torch.Tensor, **kwargs: Any) -> None: x: The input tensor. **kwargs: Additional keyword arguments (unused). """ - self.sel = (x[1:] == x[0]).sum(0) != (x.shape[0] - 1) + # self.sel = (x[1:] == x[0]).sum(0) != (x.shape[0] - 1) def _transform(self, x: torch.Tensor, **kwargs: Any) -> tuple[torch.Tensor]: """Remove empty features from the input tensor. @@ -537,7 +503,8 @@ def _transform(self, x: torch.Tensor, **kwargs: Any) -> tuple[torch.Tensor]: Returns: A tuple containing the transformed tensor with empty features removed. """ - return (select_features(x, self.sel),) + # return (select_features(x, self.sel),) + return (x,) class RemoveDuplicateFeaturesEncoderStep(SeqEncStep): From 91034b2f46ad87f087784ff9772f38ee5c5a3bfc Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Fri, 7 Feb 2025 19:13:53 +0100 Subject: [PATCH 02/20] remove cat_ind argument from forward (TODO check it's not used) --- src/tabpfn/inference.py | 7 ++----- src/tabpfn/model/transformer.py | 31 ------------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py index c88461f1e..4386fb1c2 100644 --- a/src/tabpfn/inference.py +++ b/src/tabpfn/inference.py @@ -161,7 +161,7 @@ def iter_outputs( if self.force_inference_dtype is not None: self.model = self.model.type(self.force_inference_dtype) - for config, preprocessor, X_train, y_train, cat_ix in itr: + for config, preprocessor, X_train, y_train, _cat_ix in itr: X_train = torch.as_tensor(X_train, dtype=torch.float32, device=device) # noqa: PLW2901 X_test = preprocessor.transform(X).X @@ -193,7 +193,6 @@ def iter_outputs( output = self.model( *(style, X_full, y_train), only_return_standard_out=only_return_standard_out, - categorical_inds=cat_ix, single_eval_pos=len(y_train), ) @@ -291,7 +290,7 @@ def iter_outputs( self.model = self.model.to(device) if self.force_inference_dtype is not None: self.model = self.model.type(self.force_inference_dtype) - for preprocessor, X_train, y_train, config, cat_ix in zip( + for preprocessor, X_train, y_train, config, _cat_ix in zip( self.preprocessors, self.X_trains, self.y_trains, @@ -332,7 +331,6 @@ def iter_outputs( output = self.model( *(style, X_full, y_train), only_return_standard_out=only_return_standard_out, - categorical_inds=cat_ix, single_eval_pos=len(y_train), ) @@ -497,7 +495,6 @@ def iter_outputs( output = model( *(style, X_test, None), only_return_standard_out=only_return_standard_out, - categorical_inds=cat_ix, single_eval_pos=None, ) diff --git a/src/tabpfn/model/transformer.py b/src/tabpfn/model/transformer.py index 181feb3a4..6b30db91d 100644 --- a/src/tabpfn/model/transformer.py +++ b/src/tabpfn/model/transformer.py @@ -373,8 +373,6 @@ def forward(self, *args: Any, **kwargs: Any) -> dict[str, torch.Tensor]: # noqa Whether to only return the standard output. data_dags: Any The data DAGs for each example. - categorical_inds: list[int] - The indices of categorical features. freeze_kv: bool Whether to freeze the key and value weights. @@ -388,7 +386,6 @@ def forward(self, *args: Any, **kwargs: Any) -> dict[str, torch.Tensor]: # noqa "only_return_standard_out", "style", "data_dags", - "categorical_inds", "freeze_kv", "train_x", "train_y", @@ -428,7 +425,6 @@ def _forward( # noqa: PLR0912, C901 only_return_standard_out: bool = True, style: torch.Tensor | None = None, data_dags: list[Any] | None = None, - categorical_inds: list[int] | None = None, half_layers: bool = False, ) -> Any | dict[str, torch.Tensor]: """The core forward pass of the model. @@ -441,7 +437,6 @@ def _forward( # noqa: PLR0912, C901 only_return_standard_out: Whether to only return the standard output. style: The style vector. data_dags: The data DAGs for each example in the batch. - categorical_inds: The indices of categorical features. half_layers: Whether to use half the layers. Returns: @@ -507,24 +502,6 @@ def _forward( # noqa: PLR0912, C901 n=self.features_per_group, ) # s b f -> b s #groups #features_per_group - # We have to re-work categoricals based on the subgroup they fall into. - categorical_inds_to_use: list[list[int]] | None = None - if categorical_inds is not None: - new_categorical_inds = [] - n_subgroups = x["main"].shape[2] - - for subgroup in range(n_subgroups): - subgroup_lower = subgroup * self.features_per_group - subgroup_upper = (subgroup + 1) * self.features_per_group - subgroup_indices = [ - i - subgroup_lower - for i in categorical_inds - if subgroup_lower <= i < subgroup_upper - ] - new_categorical_inds.append(subgroup_indices) - - categorical_inds_to_use = new_categorical_inds - for k in y: if y[k].ndim == 1: y[k] = y[k].unsqueeze(-1) @@ -576,13 +553,6 @@ def _forward( # noqa: PLR0912, C901 " to the ys that are not fully provided (test set missing)", ) - extra_encoders_args = {} - if categorical_inds_to_use is not None and isinstance( - self.encoder, - SequentialEncoder, - ): - extra_encoders_args["categorical_inds"] = categorical_inds_to_use - for k in x: x[k] = einops.rearrange(x[k], "b s f n -> s (b f) n") @@ -591,7 +561,6 @@ def _forward( # noqa: PLR0912, C901 x, single_eval_pos=single_eval_pos_, cache_trainset_representation=self.cache_trainset_representation, - **extra_encoders_args, ), "s (b f) e -> b s f e", b=embedded_y.shape[0], From d2e89e7761fab60dadb4b577ce28cd355bb6a361 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Tue, 11 Feb 2025 14:40:30 +0100 Subject: [PATCH 03/20] allow to use onnx model inside sklearn interface --- src/tabpfn/base.py | 36 ++++ src/tabpfn/classifier.py | 22 ++- src/tabpfn/inference.py | 7 +- src/tabpfn/misc/__init__.py | 0 src/tabpfn/misc/onnx_wrapper.py | 297 ++++++++++++++++++++++++++++++++ src/tabpfn/model/memory.py | 5 + src/tabpfn/regressor.py | 33 +++- 7 files changed, 387 insertions(+), 13 deletions(-) create mode 100644 src/tabpfn/misc/__init__.py create mode 100644 src/tabpfn/misc/onnx_wrapper.py diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index a8030f752..c57fca5cf 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: import numpy as np + from tabpfn.misc.onnx_wrapper import ONNXModelWrapper from tabpfn.model.bar_distribution import FullSupportBarDistribution from tabpfn.model.config import InferenceConfig from tabpfn.model.transformer import PerFeatureTransformer @@ -111,6 +112,36 @@ def initialize_tabpfn_model( return model, config_, bar_distribution +def load_onnx_model( + model_path: str | Path, +) -> ONNXModelWrapper: + """Load a TabPFN model in ONNX format. + + Args: + model_path: Path to the ONNX model file. + + Returns: + The loaded ONNX model wrapped in a PyTorch-compatible interface. + + Raises: + ImportError: If onnxruntime is not installed. + FileNotFoundError: If the model file doesn't exist. + """ + try: + from tabpfn.misc.onnx_wrapper import ONNXModelWrapper + except ImportError as err: + raise ImportError( + "onnxruntime is required to load ONNX models. " + "Install it with: pip install onnxruntime", + ) from err + + model_path = Path(model_path) + if not model_path.exists(): + raise FileNotFoundError(f"ONNX model not found at: {model_path}") + + return ONNXModelWrapper(str(model_path)) + + def determine_precision( inference_precision: torch.dtype | Literal["autocast", "auto"], device_: torch.device, @@ -168,6 +199,7 @@ def create_inference_engine( # noqa: PLR0913 forced_inference_dtype_: torch.dtype | None, memory_saving_mode: bool | Literal["auto"] | float | int, use_autocast_: bool, + use_onnx: bool = False, ) -> InferenceEngine: """Creates the appropriate TabPFN inference engine based on `fit_mode`. @@ -190,6 +222,7 @@ def create_inference_engine( # noqa: PLR0913 forced_inference_dtype_: If not None, the forced dtype for inference. memory_saving_mode: GPU/CPU memory saving settings. use_autocast_: Whether we use torch.autocast for inference. + use_onnx: Whether to use ONNX runtime for model inference. """ engine: ( InferenceEngineOnDemand @@ -208,6 +241,7 @@ def create_inference_engine( # noqa: PLR0913 dtype_byte_size=byte_size, force_inference_dtype=forced_inference_dtype_, save_peak_mem=memory_saving_mode, + use_onnx=use_onnx, ) elif fit_mode == "fit_preprocessors": engine = InferenceEngineCachePreprocessing.prepare( @@ -221,6 +255,7 @@ def create_inference_engine( # noqa: PLR0913 dtype_byte_size=byte_size, force_inference_dtype=forced_inference_dtype_, save_peak_mem=memory_saving_mode, + use_onnx=use_onnx, ) elif fit_mode == "fit_with_cache": engine = InferenceEngineCacheKV.prepare( @@ -236,6 +271,7 @@ def create_inference_engine( # noqa: PLR0913 force_inference_dtype=forced_inference_dtype_, save_peak_mem=memory_saving_mode, autocast=use_autocast_, + use_onnx=use_onnx, ) else: raise ValueError(f"Invalid fit_mode: {fit_mode}") diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py index c52b805fb..c17d130cd 100644 --- a/src/tabpfn/classifier.py +++ b/src/tabpfn/classifier.py @@ -32,6 +32,7 @@ create_inference_engine, determine_precision, initialize_tabpfn_model, + load_onnx_model, ) from tabpfn.config import ModelInterfaceConfig from tabpfn.constants import ( @@ -149,6 +150,7 @@ def __init__( # noqa: PLR0913 random_state: int | np.random.RandomState | np.random.Generator | None = 0, n_jobs: int = -1, inference_config: dict | ModelInterfaceConfig | None = None, + use_onnx: bool = False, ) -> None: """A TabPFN interface for classification. @@ -338,6 +340,9 @@ def __init__( # noqa: PLR0913 - If `dict`, the key-value pairs are used to update the default `ModelInterfaceConfig`. Raises an error if an unknown key is passed. - If `ModelInterfaceConfig`, the object is used as the configuration. + + use_onnx: + Whether to use an ONNX compiled model. """ super().__init__() self.n_estimators = n_estimators @@ -360,6 +365,7 @@ def __init__( # noqa: PLR0913 self.random_state = random_state self.n_jobs = n_jobs self.inference_config = inference_config + self.use_onnx = use_onnx # TODO: We can remove this from scikit-learn lower bound of 1.6 def _more_tags(self) -> dict[str, Any]: @@ -384,12 +390,15 @@ def fit(self, X: XType, y: YType) -> Self: static_seed, rng = infer_random_state(self.random_state) # Load the model and config - self.model_, self.config_, _ = initialize_tabpfn_model( - model_path=self.model_path, - which="classifier", - fit_mode=self.fit_mode, - static_seed=static_seed, - ) + if self.use_onnx: + self.model_ = load_onnx_model("model_classifier.onnx") + else: + self.model_, self.config_, _ = initialize_tabpfn_model( + model_path=self.model_path, + which="classifier", + fit_mode=self.fit_mode, + static_seed=static_seed, + ) # Determine device and precision self.device_ = infer_device_and_type(self.device) @@ -501,6 +510,7 @@ def fit(self, X: XType, y: YType) -> Self: forced_inference_dtype_=self.forced_inference_dtype_, memory_saving_mode=self.memory_saving_mode, use_autocast_=self.use_autocast_, + use_onnx=self.use_onnx, ) return self diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py index 4386fb1c2..f7614a96b 100644 --- a/src/tabpfn/inference.py +++ b/src/tabpfn/inference.py @@ -223,9 +223,10 @@ class InferenceEngineCachePreprocessing(InferenceEngine): preprocessors: Sequence[SequentialFeatureTransformer] model: PerFeatureTransformer force_inference_dtype: torch.dtype | None + use_onnx: bool = False @classmethod - def prepare( + def prepare( # noqa: PLR0913 cls, X_train: np.ndarray, y_train: np.ndarray, @@ -238,6 +239,7 @@ def prepare( dtype_byte_size: int, force_inference_dtype: torch.dtype | None, save_peak_mem: bool | Literal["auto"] | float | int, + use_onnx: bool = False, ) -> InferenceEngineCachePreprocessing: """Prepare the inference engine. @@ -252,6 +254,7 @@ def prepare( dtype_byte_size: The byte size of the dtype. force_inference_dtype: The dtype to force inference to. save_peak_mem: Whether to save peak memory usage. + use_onnx: Whether to use ONNX for inference. Returns: The prepared inference engine. @@ -276,6 +279,7 @@ def prepare( dtype_byte_size=dtype_byte_size, force_inference_dtype=force_inference_dtype, save_peak_mem=save_peak_mem, + use_onnx=use_onnx, ) @override @@ -320,6 +324,7 @@ def iter_outputs( device=device, dtype_byte_size=self.dtype_byte_size, safety_factor=1.2, # TODO(Arjun): make customizable + use_onnx=self.use_onnx, ) style = None diff --git a/src/tabpfn/misc/__init__.py b/src/tabpfn/misc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py new file mode 100644 index 000000000..addd2d131 --- /dev/null +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -0,0 +1,297 @@ +"""Module providing wrappers to use ONNX models with a PyTorch-like interface. + +This module defines wrappers for ONNX models as well as helper functions to export +and validate ONNX models derived from TabPFN models. +""" + +from __future__ import annotations + +import argparse + +import numpy as np +import onnx +import onnxruntime as ort +import sklearn.datasets +import torch +from torch import nn + +from tabpfn import TabPFNClassifier, TabPFNRegressor + + +class ONNXModelWrapper: + """Wrap ONNX model to match the PyTorch model interface.""" + + def __init__(self, model_path: str): + """Initialize the ONNX model wrapper. + + Args: + model_path: Path to the ONNX model file. + """ + self.session = ort.InferenceSession( + model_path, + providers=["CPUExecutionProvider"], # TODO: Add GPU support + ) + + def to( + self, + device: torch.device, # noqa: ARG002 + ) -> ONNXModelWrapper: + """Moves the model to the specified device. + + This is a no-op for the ONNX model wrapper. GPU support is not implemented. + + Args: + device: The target device (unused). + + Returns: + self + """ + # TODO: Add GPU support by changing provider + return self + + def type( + self, + dtype: torch.dtype, # noqa: ARG002 + ) -> ONNXModelWrapper: + """Changes the model data type. + + The ONNX runtime handles dtype conversion internally; this method does nothing. + + Args: + dtype: The target data type (unused). + + Returns: + self + """ + return self + + def cpu(self) -> ONNXModelWrapper: + """Moves the model to CPU. + + This is a no-op for the ONNX model wrapper. + + Returns: + self + """ + return self + + def eval(self) -> ONNXModelWrapper: + """Sets the model to evaluation mode. + + For the ONNX model wrapper, this does nothing and simply returns self. + + Returns: + self + """ + return self + + def __call__( + self, + style: torch.Tensor | None, # noqa: ARG002 + X: torch.Tensor, + y: torch.Tensor | None, + *, + single_eval_pos: int | None = None, + only_return_standard_out: bool = False, # noqa: ARG002 + ) -> torch.Tensor: + """Run inference using the ONNX model. + + Args: + style: Unused tensor placeholder. + X: Input tensor. + y: Target tensor. + single_eval_pos: Position to evaluate at. Defaults to -1 if not provided. + only_return_standard_out: Flag to return only the standard output. + + Returns: + A torch tensor containing the model output. + + Note that only_return_standard_out is not used in the ONNX runtime. + """ + # Convert inputs to numpy + X_np = X.cpu().numpy() if isinstance(X, torch.Tensor) else X + y_np = y.cpu().numpy() if isinstance(y, torch.Tensor) and y is not None else y + + # Prepare ONNX inputs + onnx_inputs = { + "X": X_np, + "y": y_np if y_np is not None else np.zeros((0,), dtype=np.float32), + "single_eval_pos": np.array( + single_eval_pos if single_eval_pos is not None else -1, + dtype=np.int64, + ), + } + + # Run inference + outputs = self.session.run(None, onnx_inputs) + + # Convert back to a torch tensor + return torch.from_numpy(outputs[0]) + + +class ModelWrapper(nn.Module): + """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface.""" + + def __init__(self, original_model): + """Initialize the ModelWrapper. + + Args: + original_model: The original model object to wrap. + """ + super().__init__() + self.model = original_model + + def forward(self, X, y, single_eval_pos, only_return_standard_out): + """Perform a forward pass. + + Args: + X: Input tensor. + y: Target tensor. + single_eval_pos: Position for evaluation. + only_return_standard_out: Whether to return only standard outputs. + + Returns: + The output tensor from the model. + """ + return self.model( + None, + X, + y, + single_eval_pos=single_eval_pos, + only_return_standard_out=only_return_standard_out, + ) + + +def export_model( + output_path: str, + model_type: str = "classifier", +) -> None: + """Export the TabPFN model to the ONNX format. + + This function creates a sample model based on the specified + model_type ('classifier' or 'regressor'), trains it on a small dataset, + and exports the model to ONNX format with dynamic axes. + + Args: + output_path: The file path where the ONNX model should be saved. + model_type: The type of model to export ('classifier' or 'regressor'). + """ + # Load sample dataset for initialization + if model_type == "classifier": + X, y = sklearn.datasets.load_iris(return_X_y=True) + else: # regressor + X, y = sklearn.datasets.load_diabetes(return_X_y=True) + + with torch.no_grad(): + # Initialize and fit the model + if model_type == "classifier": + model = TabPFNClassifier(n_estimators=1, device="cpu", random_state=42) + else: + model = TabPFNRegressor(n_estimators=1, device="cpu", random_state=42) + + model.fit(X, y) + model.predict(X) + + # Create sample input tensors + X = torch.randn( + (X.shape[0] * 2, 1, X.shape[1] + 1), + generator=torch.Generator().manual_seed(42), + ) + # make the first feature categorical + X[:, 0, 0] = torch.randint(0, 10, (X.shape[0],)) + + if model_type == "classifier": + y = ( + torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) + .round() + .to(torch.float32) + ) + else: + y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) + + single_eval_pos = torch.tensor( + y.shape[0], + dtype=torch.int64, + ) # Convert to tensor + + only_return_standard_out = torch.tensor( + data=True, + dtype=torch.bool, + ) # Convert to tensor + + # Define dynamic axes for variable input sizes + dynamic_axes = { + "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"}, + "y": {0: "num_labels"}, + "single_eval_pos": {}, + "only_return_standard_out": {}, + } + + # Export the model + torch.onnx.export( + ModelWrapper(model.model_).eval(), + (X, y, single_eval_pos, only_return_standard_out), + output_path, + input_names=[ + "X", + "y", + "single_eval_pos", + "only_return_standard_out", + ], + output_names=["output"], + opset_version=17, + dynamic_axes=dynamic_axes, + ) + + +def check_onnx_model(model_path: str) -> None: + """Validate the ONNX model. + + Loads the ONNX model and runs a checker to ensure that the model is valid. + + Args: + model_path: The path to the ONNX model file. + """ + onnx_model = onnx.load(model_path) # Load the ONNX model + onnx.checker.check_model(onnx_model) # Check if the model is valid + + +def check_input_names(model_path: str) -> None: + """Load the ONNX model to check its input names. + + Args: + model_path: The path to the ONNX model file. + """ + onnx.load(model_path) + # get input names from graph + graph = onnx.load(model_path).graph + [input_node.name for input_node in graph.input] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Export TabPFN models to ONNX format", + ) + parser.add_argument( + "--output", + type=str, + default="model", + help=( + "Base output path for the ONNX models (will append _classifier.onnx and " + "_regressor.onnx)" + ), + ) + + args = parser.parse_args() + + # Export both models with appropriate suffixes + classifier_path = f"{args.output}_classifier.onnx" + regressor_path = f"{args.output}_regressor.onnx" + + export_model(classifier_path, "classifier") + check_onnx_model(classifier_path) + check_input_names(classifier_path) + + export_model(regressor_path, "regressor") + check_onnx_model(regressor_path) + check_input_names(regressor_path) diff --git a/src/tabpfn/model/memory.py b/src/tabpfn/model/memory.py index 9fd624b69..12215bf79 100644 --- a/src/tabpfn/model/memory.py +++ b/src/tabpfn/model/memory.py @@ -362,6 +362,7 @@ def reset_peak_memory_if_required( dtype_byte_size: int, safety_factor: float = 5.0, n_train_samples: int | None = None, + use_onnx: bool = False, ) -> None: """Reset the peak memory if required. @@ -381,7 +382,11 @@ def reset_peak_memory_if_required( safety_factor (float): The safety factor to apply. n_train_samples (int): The number of training samples (to be used only for cache_kv mode) + use_onnx (bool): Whether we're using an ONNX compiled model. """ + if use_onnx: + # TODO: Implement memory estimation for ONNX + return save_peak_mem_is_num = isinstance( save_peak_mem, (float, int), diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py index b405d8385..a8ba7cd6e 100644 --- a/src/tabpfn/regressor.py +++ b/src/tabpfn/regressor.py @@ -36,6 +36,7 @@ create_inference_engine, determine_precision, initialize_tabpfn_model, + load_onnx_model, ) from tabpfn.config import ModelInterfaceConfig from tabpfn.model.bar_distribution import FullSupportBarDistribution @@ -149,6 +150,9 @@ class TabPFNRegressor(RegressorMixin, BaseEstimator): _USABLE_OUTPUT_TYPES = _OUTPUT_TYPES + _OUTPUT_TYPES_COMPOSITE """The output types supported by the model.""" + use_onnx: bool + """Whether to use ONNX for inference.""" + def __init__( # noqa: PLR0913 self, *, @@ -169,6 +173,7 @@ def __init__( # noqa: PLR0913 random_state: int | np.random.RandomState | np.random.Generator | None = 0, n_jobs: int = -1, inference_config: dict | ModelInterfaceConfig | None = None, + use_onnx: bool = False, ) -> None: """A TabPFN interface for regression. @@ -346,6 +351,9 @@ def __init__( # noqa: PLR0913 - If `dict`, the key-value pairs are used to update the default `ModelInterfaceConfig`. Raises an error if an unknown key is passed. - If `ModelInterfaceConfig`, the object is used as the configuration. + + use_onnx: + Whether to use an ONNX compiled model. """ super().__init__() self.n_estimators = n_estimators @@ -367,6 +375,7 @@ def __init__( # noqa: PLR0913 self.random_state = random_state self.n_jobs = n_jobs self.inference_config = inference_config + self.use_onnx = use_onnx # TODO: We can remove this from scikit-learn lower bound of 1.6 def _more_tags(self) -> dict[str, Any]: @@ -393,12 +402,23 @@ def fit(self, X: XType, y: YType) -> Self: static_seed, rng = infer_random_state(self.random_state) # Load the model and config - self.model_, self.config_, self.bardist_ = initialize_tabpfn_model( - model_path=self.model_path, - which="regressor", - fit_mode=self.fit_mode, - static_seed=static_seed, - ) + if self.use_onnx: + self.model_ = load_onnx_model("model_regressor.onnx") + # Initialize bardist_ for ONNX mode + # TODO: faster way to do this + _, self.config_, self.bardist_ = initialize_tabpfn_model( + model_path=self.model_path, + which="regressor", + fit_mode=self.fit_mode, + static_seed=static_seed, + ) + else: + self.model_, self.config_, self.bardist_ = initialize_tabpfn_model( + model_path=self.model_path, + which="regressor", + fit_mode=self.fit_mode, + static_seed=static_seed, + ) # Determine device and precision self.device_ = infer_device_and_type(self.device) @@ -515,6 +535,7 @@ def fit(self, X: XType, y: YType) -> Self: forced_inference_dtype_=self.forced_inference_dtype_, memory_saving_mode=self.memory_saving_mode, use_autocast_=self.use_autocast_, + use_onnx=self.use_onnx, ) return self From 720dc1650d5f8db94199d973b6183a5ccb35da8a Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Tue, 11 Feb 2025 14:51:01 +0100 Subject: [PATCH 04/20] allow to move to gpu --- src/tabpfn/misc/onnx_wrapper.py | 38 ++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py index addd2d131..1e00758b5 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -27,26 +27,43 @@ def __init__(self, model_path: str): Args: model_path: Path to the ONNX model file. """ + self.model_path = model_path + self.providers = ["CPUExecutionProvider"] self.session = ort.InferenceSession( model_path, - providers=["CPUExecutionProvider"], # TODO: Add GPU support + providers=self.providers, ) def to( self, - device: torch.device, # noqa: ARG002 + device: torch.device, ) -> ONNXModelWrapper: """Moves the model to the specified device. - This is a no-op for the ONNX model wrapper. GPU support is not implemented. - Args: - device: The target device (unused). + device: The target device (cuda or cpu). Returns: self """ - # TODO: Add GPU support by changing provider + if device.type == "cuda": + # Check if CUDA is available in ONNX Runtime + cuda_provider = "CUDAExecutionProvider" + if cuda_provider in ort.get_available_providers(): + self.providers = [cuda_provider, "CPUExecutionProvider"] + # Reinitialize session with CUDA provider + self.session = ort.InferenceSession( + self.model_path, + providers=self.providers, + ) + else: + pass + else: + self.providers = ["CPUExecutionProvider"] + self.session = ort.InferenceSession( + self.model_path, + providers=self.providers, + ) return self def type( @@ -105,8 +122,6 @@ def __call__( Returns: A torch tensor containing the model output. - - Note that only_return_standard_out is not used in the ONNX runtime. """ # Convert inputs to numpy X_np = X.cpu().numpy() if isinstance(X, torch.Tensor) else X @@ -125,8 +140,11 @@ def __call__( # Run inference outputs = self.session.run(None, onnx_inputs) - # Convert back to a torch tensor - return torch.from_numpy(outputs[0]) + # Convert back to torch tensor and move to the appropriate device + output_tensor = torch.from_numpy(outputs[0]) + if "CUDAExecutionProvider" in self.providers: + output_tensor = output_tensor.cuda() + return output_tensor class ModelWrapper(nn.Module): From 523c0510586546e8e0223a38b2a5033ef8bada18 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 10 Mar 2025 14:25:44 +0000 Subject: [PATCH 05/20] only init onnx session once --- src/tabpfn/base.py | 4 +++- src/tabpfn/classifier.py | 14 +++++------ src/tabpfn/misc/onnx_wrapper.py | 42 ++++++++++++++++++++------------- src/tabpfn/regressor.py | 14 +++++------ 4 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index c57fca5cf..cd3d96d0f 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -114,11 +114,13 @@ def initialize_tabpfn_model( def load_onnx_model( model_path: str | Path, + device: torch.device, ) -> ONNXModelWrapper: """Load a TabPFN model in ONNX format. Args: model_path: Path to the ONNX model file. + device: The device to run the model on. Returns: The loaded ONNX model wrapped in a PyTorch-compatible interface. @@ -139,7 +141,7 @@ def load_onnx_model( if not model_path.exists(): raise FileNotFoundError(f"ONNX model not found at: {model_path}") - return ONNXModelWrapper(str(model_path)) + return ONNXModelWrapper(str(model_path), device) def determine_precision( diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py index c17d130cd..9682a2379 100644 --- a/src/tabpfn/classifier.py +++ b/src/tabpfn/classifier.py @@ -389,9 +389,15 @@ def fit(self, X: XType, y: YType) -> Self: """ static_seed, rng = infer_random_state(self.random_state) + # Determine device and precision + self.device_ = infer_device_and_type(self.device) + (self.use_autocast_, self.forced_inference_dtype_, byte_size) = ( + determine_precision(self.inference_precision, self.device_) + ) + # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model("model_classifier.onnx") + self.model_ = load_onnx_model("model_classifier.onnx", self.device_) else: self.model_, self.config_, _ = initialize_tabpfn_model( model_path=self.model_path, @@ -400,12 +406,6 @@ def fit(self, X: XType, y: YType) -> Self: static_seed=static_seed, ) - # Determine device and precision - self.device_ = infer_device_and_type(self.device) - (self.use_autocast_, self.forced_inference_dtype_, byte_size) = ( - determine_precision(self.inference_precision, self.device_) - ) - # Build the interface_config self.interface_config_ = ModelInterfaceConfig.from_user_input( inference_config=self.inference_config, diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py index 1e00758b5..32cb380e6 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -21,14 +21,21 @@ class ONNXModelWrapper: """Wrap ONNX model to match the PyTorch model interface.""" - def __init__(self, model_path: str): + def __init__(self, model_path: str, device: torch.device): """Initialize the ONNX model wrapper. Args: model_path: Path to the ONNX model file. + device: The device to run the model on. """ self.model_path = model_path - self.providers = ["CPUExecutionProvider"] + self.device = device + if device.type == "cuda": + self.providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + elif device.type == "cpu": + self.providers = ["CPUExecutionProvider"] + else: + raise ValueError(f"Invalid device: {device}") self.session = ort.InferenceSession( model_path, providers=self.providers, @@ -46,24 +53,27 @@ def to( Returns: self """ - if device.type == "cuda": - # Check if CUDA is available in ONNX Runtime - cuda_provider = "CUDAExecutionProvider" - if cuda_provider in ort.get_available_providers(): - self.providers = [cuda_provider, "CPUExecutionProvider"] - # Reinitialize session with CUDA provider + # Only recreate session if device type has changed + if device.type != self.device.type: + if device.type == "cuda": + # Check if CUDA is available in ONNX Runtime + cuda_provider = "CUDAExecutionProvider" + if cuda_provider in ort.get_available_providers(): + self.providers = [cuda_provider, "CPUExecutionProvider"] + # Reinitialize session with CUDA provider + self.session = ort.InferenceSession( + self.model_path, + providers=self.providers, + ) + # If CUDA is not available, keep current session + else: + self.providers = ["CPUExecutionProvider"] self.session = ort.InferenceSession( self.model_path, providers=self.providers, ) - else: - pass - else: - self.providers = ["CPUExecutionProvider"] - self.session = ort.InferenceSession( - self.model_path, - providers=self.providers, - ) + # Update the device + self.device = device return self def type( diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py index a8ba7cd6e..7b81cafc4 100644 --- a/src/tabpfn/regressor.py +++ b/src/tabpfn/regressor.py @@ -401,9 +401,15 @@ def fit(self, X: XType, y: YType) -> Self: """ static_seed, rng = infer_random_state(self.random_state) + # Determine device and precision + self.device_ = infer_device_and_type(self.device) + (self.use_autocast_, self.forced_inference_dtype_, byte_size) = ( + determine_precision(self.inference_precision, self.device_) + ) + # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model("model_regressor.onnx") + self.model_ = load_onnx_model("model_regressor.onnx", self.device_) # Initialize bardist_ for ONNX mode # TODO: faster way to do this _, self.config_, self.bardist_ = initialize_tabpfn_model( @@ -420,12 +426,6 @@ def fit(self, X: XType, y: YType) -> Self: static_seed=static_seed, ) - # Determine device and precision - self.device_ = infer_device_and_type(self.device) - (self.use_autocast_, self.forced_inference_dtype_, byte_size) = ( - determine_precision(self.inference_precision, self.device_) - ) - # Build the interface_config self.interface_config_ = ModelInterfaceConfig.from_user_input( inference_config=self.inference_config, From 82e37c03c74f7b87d746b7fe6285f4fe4102b5f2 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 10 Mar 2025 18:24:53 +0000 Subject: [PATCH 06/20] a few improvements --- src/tabpfn/misc/onnx_wrapper.py | 9 +++++---- src/tabpfn/model/encoders.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py index 32cb380e6..568817a4d 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -249,7 +249,7 @@ def export_model( # Define dynamic axes for variable input sizes dynamic_axes = { - "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"}, + "X": {0: "num_datapoints", 2: "num_features"}, "y": {0: "num_labels"}, "single_eval_pos": {}, "only_return_standard_out": {}, @@ -291,9 +291,10 @@ def check_input_names(model_path: str) -> None: model_path: The path to the ONNX model file. """ onnx.load(model_path) - # get input names from graph - graph = onnx.load(model_path).graph - [input_node.name for input_node in graph.input] + + # Print input names + + # Print output names if __name__ == "__main__": diff --git a/src/tabpfn/model/encoders.py b/src/tabpfn/model/encoders.py index 6a3de8138..4d3c08ff9 100644 --- a/src/tabpfn/model/encoders.py +++ b/src/tabpfn/model/encoders.py @@ -90,7 +90,7 @@ def normalize_data( mean = torch_nanmean(data, axis=0) # type: ignore std = torch_nanstd(data, axis=0) + 1e-20 - if len(data) == 1 or normalize_positions == 1: + if data.shape[0] == 1 or normalize_positions == 1: std[:] = 1.0 if std_only: From 3c2ecbca7c6a3183998b21210259f1f02cf1885c Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Fri, 21 Mar 2025 13:52:32 +0100 Subject: [PATCH 07/20] improve test when generating new onnx model --- src/tabpfn/misc/onnx_wrapper.py | 93 +++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py index 568817a4d..e3a29a502 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -250,7 +250,7 @@ def export_model( # Define dynamic axes for variable input sizes dynamic_axes = { "X": {0: "num_datapoints", 2: "num_features"}, - "y": {0: "num_labels"}, + "y": {0: "num_datapoints"}, "single_eval_pos": {}, "only_return_standard_out": {}, } @@ -292,11 +292,92 @@ def check_input_names(model_path: str) -> None: """ onnx.load(model_path) - # Print input names - # Print output names +def test_models( + model_path_classifier: str, + model_path_regressor: str, +) -> None: + """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX. + + This function validates that both the original PyTorch models and the + exported ONNX models work correctly on simple datasets. + + Args: + model_path_classifier: Path to the exported ONNX classifier model. + model_path_regressor: Path to the exported ONNX regressor model. + """ + from sklearn.datasets import load_diabetes, load_iris + from sklearn.metrics import accuracy_score, mean_squared_error + from sklearn.model_selection import train_test_split + + from tabpfn import TabPFNClassifier, TabPFNRegressor + + # Test classifier + def _test_classifier(use_onnx: bool = False) -> float: + # Load dataset + X, y = load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + # Create and fit model + if use_onnx: + model = TabPFNClassifier(n_estimators=1, use_onnx=True) + else: + model = TabPFNClassifier(n_estimators=1, use_onnx=False) + + model.fit(X_train, y_train) + + # Make predictions + y_pred = model.predict(X_test) + return accuracy_score(y_test, y_pred) + + # Test regressor + def _test_regressor(use_onnx: bool = False) -> float: + # Load dataset + X, y = load_diabetes(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + # Create and fit model + if use_onnx: + model = TabPFNRegressor(n_estimators=1, use_onnx=True) + else: + model = TabPFNRegressor(n_estimators=1, use_onnx=False) + + model.fit(X_train, y_train) + + # Make predictions (mean) + y_pred_mean = model.predict(X_test) + return mean_squared_error(y_test, y_pred_mean) + + # Test with PyTorch backend + clf_acc_torch = _test_classifier(use_onnx=False) + reg_mse_torch = _test_regressor(use_onnx=False) + + # Test with ONNX backend + try: + clf_acc_onnx = _test_classifier(use_onnx=True) + reg_mse_onnx = _test_regressor(use_onnx=True) + + # Compare results + + # Check if results are similar + accuracy_diff = abs(clf_acc_torch - clf_acc_onnx) + mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10) + + if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0: + pass + else: + pass + + except Exception: + pass + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Export TabPFN models to ONNX format", @@ -324,3 +405,9 @@ def check_input_names(model_path: str) -> None: export_model(regressor_path, "regressor") check_onnx_model(regressor_path) check_input_names(regressor_path) + + # Run tests if requested + if args.output == "model": + test_models(classifier_path, regressor_path) + else: + pass From dd542cd24469358f45892eb86cc5954907aa2544 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Fri, 21 Mar 2025 14:11:40 +0100 Subject: [PATCH 08/20] improve test when generating new onnx model fix --- src/tabpfn/misc/onnx_wrapper.py | 118 ++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 43 deletions(-) diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py index e3a29a502..a6d77b62a 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/onnx_wrapper.py @@ -222,7 +222,7 @@ def export_model( # Create sample input tensors X = torch.randn( - (X.shape[0] * 2, 1, X.shape[1] + 1), + (X.shape[0] * 4, 1, X.shape[1] + 1), generator=torch.Generator().manual_seed(42), ) # make the first feature categorical @@ -230,12 +230,12 @@ def export_model( if model_type == "classifier": y = ( - torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) + torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42)) .round() .to(torch.float32) ) else: - y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) + y = torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42)) single_eval_pos = torch.tensor( y.shape[0], @@ -290,8 +290,13 @@ def check_input_names(model_path: str) -> None: Args: model_path: The path to the ONNX model file. """ - onnx.load(model_path) - + model = onnx.load(model_path) + print("--------------------------------") + print("----INPUTS----") + print(model.graph.input) + print("----OUTPUTS----") + print(model.graph.output) + print("--------------------------------") # Print output names @@ -300,82 +305,109 @@ def test_models( model_path_regressor: str, ) -> None: """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX. - - This function validates that both the original PyTorch models and the + + This function validates that both the original PyTorch models and the exported ONNX models work correctly on simple datasets. - + Args: model_path_classifier: Path to the exported ONNX classifier model. model_path_regressor: Path to the exported ONNX regressor model. """ - from sklearn.datasets import load_diabetes, load_iris - from sklearn.metrics import accuracy_score, mean_squared_error + import numpy as np + from sklearn.datasets import load_iris, load_diabetes from sklearn.model_selection import train_test_split - + from sklearn.metrics import accuracy_score, mean_squared_error from tabpfn import TabPFNClassifier, TabPFNRegressor - + # Test classifier def _test_classifier(use_onnx: bool = False) -> float: + print(f"\n{'='*20} Testing TabPFNClassifier (use_onnx={use_onnx}) {'='*20}") + # Load dataset X, y = load_iris(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 - ) - + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + # Create and fit model if use_onnx: - model = TabPFNClassifier(n_estimators=1, use_onnx=True) + model = TabPFNClassifier(n_estimators=2, use_onnx=True) else: - model = TabPFNClassifier(n_estimators=1, use_onnx=False) - + model = TabPFNClassifier(n_estimators=2, use_onnx=False) + model.fit(X_train, y_train) - + # Make predictions y_pred = model.predict(X_test) - return accuracy_score(y_test, y_pred) - + accuracy = accuracy_score(y_test, y_pred) + + print(f"Accuracy: {accuracy:.4f}") + + # Test predict_proba + proba = model.predict_proba(X_test) + print(f"Probability shape: {proba.shape}") + + return accuracy + # Test regressor def _test_regressor(use_onnx: bool = False) -> float: + print(f"\n{'='*20} Testing TabPFNRegressor (use_onnx={use_onnx}) {'='*20}") + # Load dataset X, y = load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 - ) - + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + # Create and fit model if use_onnx: - model = TabPFNRegressor(n_estimators=1, use_onnx=True) + model = TabPFNRegressor(n_estimators=2, use_onnx=True) else: - model = TabPFNRegressor(n_estimators=1, use_onnx=False) - + model = TabPFNRegressor(n_estimators=2, use_onnx=False) + model.fit(X_train, y_train) - + # Make predictions (mean) y_pred_mean = model.predict(X_test) - return mean_squared_error(y_test, y_pred_mean) - + mse_mean = mean_squared_error(y_test, y_pred_mean) + print(f"MSE (mean): {mse_mean:.4f}") + + # Make predictions (median) + y_pred_median = model.predict(X_test, output_type="median") + mse_median = mean_squared_error(y_test, y_pred_median) + print(f"MSE (median): {mse_median:.4f}") + + # Test quantiles + quantiles = model.predict(X_test, output_type="quantiles", quantiles=[0.1, 0.5, 0.9]) + print(f"Quantile predictions shape (0.1): {quantiles[0].shape}") + + return mse_mean + + print("Testing TabPFN models with PyTorch and ONNX backends") + # Test with PyTorch backend clf_acc_torch = _test_classifier(use_onnx=False) reg_mse_torch = _test_regressor(use_onnx=False) - + # Test with ONNX backend try: clf_acc_onnx = _test_classifier(use_onnx=True) reg_mse_onnx = _test_regressor(use_onnx=True) - + # Compare results - + print("\n" + "="*60) + print(f"Classifier accuracy - PyTorch: {clf_acc_torch:.4f}, ONNX: {clf_acc_onnx:.4f}") + print(f"Regressor MSE - PyTorch: {reg_mse_torch:.4f}, ONNX: {reg_mse_onnx:.4f}") + # Check if results are similar accuracy_diff = abs(clf_acc_torch - clf_acc_onnx) mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10) - + if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0: - pass + print("\nWARNING: Large difference between PyTorch and ONNX model results!") else: - pass - - except Exception: - pass + print("\nSUCCESS: PyTorch and ONNX models produce similar results.") + + except Exception as e: + print("\n" + "="*60) + print(f"Error testing ONNX models: {e}") + print("Make sure ONNX models are correctly exported.") if __name__ == "__main__": @@ -405,9 +437,9 @@ def _test_regressor(use_onnx: bool = False) -> float: export_model(regressor_path, "regressor") check_onnx_model(regressor_path) check_input_names(regressor_path) - + # Run tests if requested if args.output == "model": test_models(classifier_path, regressor_path) else: - pass + print("using custom output path, the model won't be tested for performance as part of sklearn wrappers") From 55cdbcb58becb5af88e7ab84b7d586666d2459c5 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 12:39:17 +0100 Subject: [PATCH 09/20] make onnx export work by remove predict, and fetch the onnx model in the right cache --- src/tabpfn/base.py | 21 +- src/tabpfn/classifier.py | 7 +- src/tabpfn/inference.py | 37 ++- .../{onnx_wrapper.py => compile_to_onnx.py} | 237 +++++++++--------- src/tabpfn/regressor.py | 7 +- src/tabpfn/utils.py | 66 +++-- 6 files changed, 225 insertions(+), 150 deletions(-) rename src/tabpfn/misc/{onnx_wrapper.py => compile_to_onnx.py} (72%) diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index cd3d96d0f..d79d6288b 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -26,6 +26,7 @@ InferenceEngineOnDemand, ) from tabpfn.utils import ( + get_model_path, infer_fp16_inference_mode, load_model_criterion_config, ) @@ -33,7 +34,7 @@ if TYPE_CHECKING: import numpy as np - from tabpfn.misc.onnx_wrapper import ONNXModelWrapper + from tabpfn.misc.compile_to_onnx import ONNXModelWrapper from tabpfn.model.bar_distribution import FullSupportBarDistribution from tabpfn.model.config import InferenceConfig from tabpfn.model.transformer import PerFeatureTransformer @@ -79,8 +80,6 @@ def initialize_tabpfn_model( """ # Handle auto model_path download = True - if isinstance(model_path, str) and model_path == "auto": - model_path = None # type: ignore # Load model with potential caching if which == "classifier": @@ -114,12 +113,16 @@ def initialize_tabpfn_model( def load_onnx_model( model_path: str | Path, + which: Literal["classifier", "regressor"], + version: Literal["v2"], device: torch.device, ) -> ONNXModelWrapper: """Load a TabPFN model in ONNX format. Args: model_path: Path to the ONNX model file. + which: Which TabPFN model to load. + version: The version of the model. device: The device to run the model on. Returns: @@ -129,8 +132,9 @@ def load_onnx_model( ImportError: If onnxruntime is not installed. FileNotFoundError: If the model file doesn't exist. """ + model_path = get_model_path(model_path, which, version, use_onnx=True) try: - from tabpfn.misc.onnx_wrapper import ONNXModelWrapper + from tabpfn.misc.compile_to_onnx import ONNXModelWrapper except ImportError as err: raise ImportError( "onnxruntime is required to load ONNX models. " @@ -139,7 +143,12 @@ def load_onnx_model( model_path = Path(model_path) if not model_path.exists(): - raise FileNotFoundError(f"ONNX model not found at: {model_path}") + raise FileNotFoundError( + f"ONNX model not found at: {model_path}, " + "please compile the model by running " + "`from tabpfn.misc.compile_to_onnx import compile_onnx_models; " + "compile_onnx_models()`", + ) return ONNXModelWrapper(str(model_path), device) @@ -190,7 +199,7 @@ def create_inference_engine( # noqa: PLR0913 *, X_train: np.ndarray, y_train: np.ndarray, - model: PerFeatureTransformer, + model: PerFeatureTransformer | ONNXModelWrapper, ensemble_configs: Any, cat_ix: list[int], fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"], diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py index 9682a2379..02b6d4495 100644 --- a/src/tabpfn/classifier.py +++ b/src/tabpfn/classifier.py @@ -397,7 +397,12 @@ def fit(self, X: XType, y: YType) -> Self: # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model("model_classifier.onnx", self.device_) + self.model_ = load_onnx_model( + self.model_path, + which="classifier", + version="v2", + device=self.device_, + ) else: self.model_, self.config_, _ = initialize_tabpfn_model( model_path=self.model_path, diff --git a/src/tabpfn/inference.py b/src/tabpfn/inference.py index f7614a96b..f94819e61 100644 --- a/src/tabpfn/inference.py +++ b/src/tabpfn/inference.py @@ -19,6 +19,7 @@ from tabpfn.preprocessing import fit_preprocessing if TYPE_CHECKING: + from tabpfn.misc.compile_to_onnx import ONNXModelWrapper from tabpfn.model.preprocessing import SequentialFeatureTransformer from tabpfn.model.transformer import PerFeatureTransformer from tabpfn.preprocessing import EnsembleConfig @@ -62,6 +63,7 @@ def iter_outputs( *, device: torch.device, autocast: bool, + only_return_standard_out: bool = True, ) -> Iterator[tuple[torch.Tensor, EnsembleConfig]]: """Iterate over the outputs of the model. @@ -71,6 +73,7 @@ def iter_outputs( X: The input data to make predictions on. device: The device to run the model on. autocast: Whether to use torch.autocast during inference. + only_return_standard_out: Whether to only return the standard output. """ ... @@ -90,9 +93,11 @@ class InferenceEngineOnDemand(InferenceEngine): cat_ix: list[int] static_seed: int n_workers: int - model: PerFeatureTransformer + model: PerFeatureTransformer | ONNXModelWrapper force_inference_dtype: torch.dtype | None + use_onnx: bool = False + # ruff: noqa: PLR0913 @classmethod def prepare( cls, @@ -100,13 +105,14 @@ def prepare( y_train: np.ndarray, *, cat_ix: list[int], - model: PerFeatureTransformer, + model: PerFeatureTransformer | ONNXModelWrapper, ensemble_configs: Sequence[EnsembleConfig], rng: np.random.Generator, n_workers: int, dtype_byte_size: int, force_inference_dtype: torch.dtype | None, save_peak_mem: bool | Literal["auto"] | float | int, + use_onnx: bool = False, ) -> InferenceEngineOnDemand: """Prepare the inference engine. @@ -121,6 +127,7 @@ def prepare( dtype_byte_size: The byte size of the dtype. force_inference_dtype: The dtype to force inference to. save_peak_mem: Whether to save peak memory usage. + use_onnx: Whether to use ONNX models instead of PyTorch models. """ # We save it as a static seed to be reproducible across predicts static_seed = rng.integers(0, int(np.iinfo(np.int32).max)) @@ -135,6 +142,7 @@ def prepare( dtype_byte_size=dtype_byte_size, force_inference_dtype=force_inference_dtype, save_peak_mem=save_peak_mem, + use_onnx=use_onnx, ) @override @@ -178,6 +186,7 @@ def iter_outputs( dtype_byte_size=self.dtype_byte_size, device=device, safety_factor=1.2, # TODO(Arjun): make customizable + use_onnx=self.use_onnx, ) if self.force_inference_dtype is not None: @@ -221,18 +230,18 @@ class InferenceEngineCachePreprocessing(InferenceEngine): cat_ixs: Sequence[list[int]] ensemble_configs: Sequence[EnsembleConfig] preprocessors: Sequence[SequentialFeatureTransformer] - model: PerFeatureTransformer + model: PerFeatureTransformer | ONNXModelWrapper force_inference_dtype: torch.dtype | None use_onnx: bool = False @classmethod - def prepare( # noqa: PLR0913 + def prepare( cls, X_train: np.ndarray, y_train: np.ndarray, *, cat_ix: list[int], - model: PerFeatureTransformer, + model: PerFeatureTransformer | ONNXModelWrapper, ensemble_configs: Sequence[EnsembleConfig], n_workers: int, rng: np.random.Generator, @@ -254,7 +263,7 @@ def prepare( # noqa: PLR0913 dtype_byte_size: The byte size of the dtype. force_inference_dtype: The dtype to force inference to. save_peak_mem: Whether to save peak memory usage. - use_onnx: Whether to use ONNX for inference. + use_onnx: Whether to use ONNX models instead of PyTorch models. Returns: The prepared inference engine. @@ -359,12 +368,13 @@ class InferenceEngineCacheKV(InferenceEngine): preprocessors: list[SequentialFeatureTransformer] configs: list[EnsembleConfig] cat_ixs: list[list[int]] - models: list[PerFeatureTransformer] + models: list[PerFeatureTransformer | ONNXModelWrapper] n_train_samples: list[int] force_inference_dtype: torch.dtype | None + use_onnx: bool = False @classmethod - def prepare( # noqa: PLR0913 + def prepare( cls, X_train: np.ndarray, y_train: np.ndarray, @@ -372,7 +382,7 @@ def prepare( # noqa: PLR0913 cat_ix: list[int], ensemble_configs: Sequence[EnsembleConfig], n_workers: int, - model: PerFeatureTransformer, + model: PerFeatureTransformer | ONNXModelWrapper, device: torch.device, rng: np.random.Generator, dtype_byte_size: int, @@ -380,6 +390,7 @@ def prepare( # noqa: PLR0913 save_peak_mem: bool | Literal["auto"] | float | int, autocast: bool, only_return_standard_out: bool = True, + use_onnx: bool = False, ) -> InferenceEngineCacheKV: """Prepare the inference engine. @@ -397,6 +408,7 @@ def prepare( # noqa: PLR0913 save_peak_mem: Whether to save peak memory usage. autocast: Whether to use torch.autocast during inference. only_return_standard_out: Whether to only return the standard output + use_onnx: Whether to use ONNX models instead of PyTorch models. """ itr = fit_preprocessing( configs=ensemble_configs, @@ -407,7 +419,7 @@ def prepare( # noqa: PLR0913 n_workers=n_workers, parallel_mode="as-ready", ) - models: list[PerFeatureTransformer] = [] + models: list[PerFeatureTransformer | ONNXModelWrapper] = [] preprocessors: list[SequentialFeatureTransformer] = [] correct_order_configs: list[EnsembleConfig] = [] cat_ixs: list[list[int]] = [] @@ -435,7 +447,6 @@ def prepare( # noqa: PLR0913 ens_model.forward( *(None, X, y), only_return_standard_out=only_return_standard_out, - categorical_inds=preprocessor_cat_ix, single_eval_pos=len(X), ) @@ -453,6 +464,7 @@ def prepare( # noqa: PLR0913 dtype_byte_size=dtype_byte_size, force_inference_dtype=force_inference_dtype, save_peak_mem=save_peak_mem, + use_onnx=use_onnx, ) @override @@ -464,7 +476,7 @@ def iter_outputs( autocast: bool, only_return_standard_out: bool = True, ) -> Iterator[tuple[torch.Tensor | dict, EnsembleConfig]]: - for preprocessor, model, config, cat_ix, X_train_len in zip( + for preprocessor, model, config, _cat_ix, X_train_len in zip( self.preprocessors, self.models, self.configs, @@ -484,6 +496,7 @@ def iter_outputs( dtype_byte_size=self.dtype_byte_size, safety_factor=1.2, # TODO(Arjun): make customizable n_train_samples=X_train_len, + use_onnx=self.use_onnx, ) model = model.to(device) # noqa: PLW2901 diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/compile_to_onnx.py similarity index 72% rename from src/tabpfn/misc/onnx_wrapper.py rename to src/tabpfn/misc/compile_to_onnx.py index a6d77b62a..ef3e16b8a 100644 --- a/src/tabpfn/misc/onnx_wrapper.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -6,7 +6,8 @@ from __future__ import annotations -import argparse +import os +import sys import numpy as np import onnx @@ -16,6 +17,7 @@ from torch import nn from tabpfn import TabPFNClassifier, TabPFNRegressor +from tabpfn.utils import _user_cache_dir class ONNXModelWrapper: @@ -120,7 +122,7 @@ def __call__( *, single_eval_pos: int | None = None, only_return_standard_out: bool = False, # noqa: ARG002 - ) -> torch.Tensor: + ) -> dict[str, torch.Tensor]: """Run inference using the ONNX model. Args: @@ -155,12 +157,43 @@ def __call__( if "CUDAExecutionProvider" in self.providers: output_tensor = output_tensor.cuda() return output_tensor + + def forward( + self, + style: torch.Tensor | None, + X: torch.Tensor, + y: torch.Tensor | None, + *, + single_eval_pos: int | None = None, + only_return_standard_out: bool = False, + ) -> dict[str, torch.Tensor]: + """Forward pass that delegates to __call__. + + Args: + style: Unused tensor placeholder. + X: Input tensor. + y: Target tensor. + single_eval_pos: Position to evaluate at. Defaults to -1 if not provided. + only_return_standard_out: Flag to return only the standard output. + + Returns: + A torch tensor containing the model output. + """ + return self.__call__( + style, + X, + y, + single_eval_pos=single_eval_pos, + only_return_standard_out=only_return_standard_out, + ) class ModelWrapper(nn.Module): - """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface.""" + """A wrapper class to embed an ONNX model within the PyTorch nn.Module interface. + Only used for exporting the model to ONNX format. + """ - def __init__(self, original_model): + def __init__(self, original_model: ONNXModelWrapper): """Initialize the ModelWrapper. Args: @@ -169,7 +202,11 @@ def __init__(self, original_model): super().__init__() self.model = original_model - def forward(self, X, y, single_eval_pos, only_return_standard_out): + def forward(self, X: torch.Tensor, + y: torch.Tensor, + single_eval_pos: torch.Tensor, + only_return_standard_out: torch.Tensor, + ) -> dict[str, torch.Tensor]: """Perform a forward pass. Args: @@ -218,11 +255,11 @@ def export_model( model = TabPFNRegressor(n_estimators=1, device="cpu", random_state=42) model.fit(X, y) - model.predict(X) + # NOTE: Calling model.predict(X) at this point would break the export process. # Create sample input tensors X = torch.randn( - (X.shape[0] * 4, 1, X.shape[1] + 1), + (X.shape[0] * 2, 1, X.shape[1] + 1), generator=torch.Generator().manual_seed(42), ) # make the first feature categorical @@ -230,12 +267,12 @@ def export_model( if model_type == "classifier": y = ( - torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42)) + torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) .round() .to(torch.float32) ) else: - y = torch.rand((y.shape[0] * 3,), generator=torch.Generator().manual_seed(42)) + y = torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) single_eval_pos = torch.tensor( y.shape[0], @@ -290,145 +327,118 @@ def check_input_names(model_path: str) -> None: Args: model_path: The path to the ONNX model file. """ - model = onnx.load(model_path) - print("--------------------------------") - print("----INPUTS----") - print(model.graph.input) - print("----OUTPUTS----") - print(model.graph.output) - print("--------------------------------") + onnx.load(model_path) # Print output names -def test_models( - model_path_classifier: str, - model_path_regressor: str, -) -> None: +def test_models() -> None: """Test both TabPFNClassifier and TabPFNRegressor with and without ONNX. - - This function validates that both the original PyTorch models and the + + This function validates that both the original PyTorch models and the exported ONNX models work correctly on simple datasets. - + Args: model_path_classifier: Path to the exported ONNX classifier model. model_path_regressor: Path to the exported ONNX regressor model. """ - import numpy as np - from sklearn.datasets import load_iris, load_diabetes - from sklearn.model_selection import train_test_split + from sklearn.datasets import load_diabetes, load_iris from sklearn.metrics import accuracy_score, mean_squared_error + from sklearn.model_selection import train_test_split + from tabpfn import TabPFNClassifier, TabPFNRegressor - + # Test classifier - def _test_classifier(use_onnx: bool = False) -> float: - print(f"\n{'='*20} Testing TabPFNClassifier (use_onnx={use_onnx}) {'='*20}") - + def _test_classifier(*, use_onnx: bool = False) -> float: # Load dataset X, y = load_iris(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + test_size=0.2, + random_state=42, + ) + # Create and fit model if use_onnx: model = TabPFNClassifier(n_estimators=2, use_onnx=True) else: model = TabPFNClassifier(n_estimators=2, use_onnx=False) - + model.fit(X_train, y_train) - + # Make predictions y_pred = model.predict(X_test) - accuracy = accuracy_score(y_test, y_pred) - - print(f"Accuracy: {accuracy:.4f}") - - # Test predict_proba - proba = model.predict_proba(X_test) - print(f"Probability shape: {proba.shape}") - - return accuracy - + return accuracy_score(y_test, y_pred) + # Test regressor - def _test_regressor(use_onnx: bool = False) -> float: - print(f"\n{'='*20} Testing TabPFNRegressor (use_onnx={use_onnx}) {'='*20}") - + def _test_regressor(*, use_onnx: bool = False) -> float: # Load dataset X, y = load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + test_size=0.2, + random_state=42, + ) + # Create and fit model if use_onnx: model = TabPFNRegressor(n_estimators=2, use_onnx=True) else: model = TabPFNRegressor(n_estimators=2, use_onnx=False) - + model.fit(X_train, y_train) - + # Make predictions (mean) y_pred_mean = model.predict(X_test) - mse_mean = mean_squared_error(y_test, y_pred_mean) - print(f"MSE (mean): {mse_mean:.4f}") - - # Make predictions (median) - y_pred_median = model.predict(X_test, output_type="median") - mse_median = mean_squared_error(y_test, y_pred_median) - print(f"MSE (median): {mse_median:.4f}") - - # Test quantiles - quantiles = model.predict(X_test, output_type="quantiles", quantiles=[0.1, 0.5, 0.9]) - print(f"Quantile predictions shape (0.1): {quantiles[0].shape}") - - return mse_mean - - print("Testing TabPFN models with PyTorch and ONNX backends") - + y_pred_full = model.predict(X_test, output_type="full") + assert len(y_pred_full.keys()) > 2 + return mean_squared_error(y_test, y_pred_mean) + # Test with PyTorch backend clf_acc_torch = _test_classifier(use_onnx=False) reg_mse_torch = _test_regressor(use_onnx=False) - + # Test with ONNX backend - try: - clf_acc_onnx = _test_classifier(use_onnx=True) - reg_mse_onnx = _test_regressor(use_onnx=True) - - # Compare results - print("\n" + "="*60) - print(f"Classifier accuracy - PyTorch: {clf_acc_torch:.4f}, ONNX: {clf_acc_onnx:.4f}") - print(f"Regressor MSE - PyTorch: {reg_mse_torch:.4f}, ONNX: {reg_mse_onnx:.4f}") - - # Check if results are similar - accuracy_diff = abs(clf_acc_torch - clf_acc_onnx) - mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10) - - if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0: - print("\nWARNING: Large difference between PyTorch and ONNX model results!") - else: - print("\nSUCCESS: PyTorch and ONNX models produce similar results.") - - except Exception as e: - print("\n" + "="*60) - print(f"Error testing ONNX models: {e}") - print("Make sure ONNX models are correctly exported.") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Export TabPFN models to ONNX format", - ) - parser.add_argument( - "--output", - type=str, - default="model", - help=( - "Base output path for the ONNX models (will append _classifier.onnx and " - "_regressor.onnx)" - ), - ) - - args = parser.parse_args() + clf_acc_onnx = _test_classifier(use_onnx=True) + reg_mse_onnx = _test_regressor(use_onnx=True) + + # Compare results + + # Check if results are similar + accuracy_diff = abs(clf_acc_torch - clf_acc_onnx) + mse_ratio = reg_mse_torch / max(reg_mse_onnx, 1e-10) + + if accuracy_diff > 0.1 or mse_ratio < 0.5 or mse_ratio > 2.0: + raise ValueError( + "FAILED: the performance of the ONNX model is not " + "similar to the PyTorch model. \n" + f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n" + f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}" + ) + else: + print("SUCCESS: the performance of the ONNX model is " + "similar to the PyTorch model. \n" + f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n" + f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}") + + + +def compile_onnx_models(suffix: str = ""): + """Compile the ONNX models. + + Args: + suffix: The suffix to append to the file names of the ONNX models. + """ + USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") + if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": + cache_dir = USER_TABPFN_CACHE_DIR_LOCATION + else: + cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") # Export both models with appropriate suffixes - classifier_path = f"{args.output}_classifier.onnx" - regressor_path = f"{args.output}_regressor.onnx" + classifier_path = f"{cache_dir}/tabpfn-v2-classifier{suffix}.onnx" + regressor_path = f"{cache_dir}/tabpfn-v2-regressor{suffix}.onnx" export_model(classifier_path, "classifier") check_onnx_model(classifier_path) @@ -437,9 +447,8 @@ def _test_regressor(use_onnx: bool = False) -> float: export_model(regressor_path, "regressor") check_onnx_model(regressor_path) check_input_names(regressor_path) - - # Run tests if requested - if args.output == "model": - test_models(classifier_path, regressor_path) + + if not len(suffix): + test_models() else: - print("using custom output path, the model won't be tested for performance as part of sklearn wrappers") + print("model name suffix is not empty, skipping test") diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py index 7b81cafc4..51cd715b9 100644 --- a/src/tabpfn/regressor.py +++ b/src/tabpfn/regressor.py @@ -409,7 +409,12 @@ def fit(self, X: XType, y: YType) -> Self: # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model("model_regressor.onnx", self.device_) + self.model_ = load_onnx_model( + self.model_path, + which="regressor", + version="v2", + device=self.device_, + ) # Initialize bardist_ for ONNX mode # TODO: faster way to do this _, self.config_, self.bardist_ = initialize_tabpfn_model( diff --git a/src/tabpfn/utils.py b/src/tabpfn/utils.py index 298507a5b..0c18cf244 100644 --- a/src/tabpfn/utils.py +++ b/src/tabpfn/utils.py @@ -286,6 +286,53 @@ def _user_cache_dir(platform: str, appname: str = "tabpfn") -> Path: return use_instead_path +def get_cache_dir() -> Path: + """Get the cache directory for the TabPFN model. + + Returns: + The cache directory for the TabPFN model. + """ + USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") + if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": + cache_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION) + else: + cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") + return cache_dir + + +def get_model_path( + model_path: str | Path | None, + which: Literal["classifier", "regressor"], + version: Literal["v2"], + *, + use_onnx: bool = False, +) -> Path: + """Get the model path for the given task. + + Args: + model_path: The path to the model. + which: The task to get the model path for. + version: The version of the model. + use_onnx: Whether to use ONNX models instead of PyTorch models. + + Returns: + The model path. + """ + if isinstance(model_path, str) and model_path == "auto": + model_path = None # type: ignore + if model_path is None: + USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") + if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": + model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION) + else: + model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") + if use_onnx: + model_name = f"tabpfn-{version}-{which}.onnx" + else: + model_name = f"tabpfn-{version}-{which}.ckpt" + return model_dir / model_name + + @overload def load_model_criterion_config( model_path: str | Path | None, @@ -348,22 +395,9 @@ def load_model_criterion_config( Returns: The model, criterion, and config. """ - if model_path is None: - USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") - if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": - model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION) - else: - model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") - - model_name = f"tabpfn-{version}-{which}.ckpt" - model_path = model_dir / model_name - else: - if not isinstance(model_path, (str, Path)): - raise ValueError(f"Invalid model_path: {model_path}") - - model_path = Path(model_path) - model_dir = model_path.parent - model_name = model_path.name + model_path = get_model_path(model_path, which, version) + model_dir = model_path.parent + model_name = model_path.name model_dir.mkdir(parents=True, exist_ok=True) if not model_path.exists(): From 71f65579c23b9034a0dd9b361e84730d0981d40d Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 15:10:54 +0100 Subject: [PATCH 10/20] fix tests --- tests/test_classifier_interface.py | 3 --- tests/test_regressor_interface.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index 90969d306..05c711065 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -235,7 +235,6 @@ def forward( y, single_eval_pos, only_return_standard_out, - categorical_inds, ): return self.model( None, @@ -243,7 +242,6 @@ def forward( y, single_eval_pos=single_eval_pos, only_return_standard_out=only_return_standard_out, - categorical_inds=categorical_inds, ) @@ -281,7 +279,6 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: "y", "single_eval_pos", "only_return_standard_out", - "categorical_inds", ], output_names=["output"], opset_version=17, # using 17 since we use torch>=2.1 diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 20e285376..5a1a7ee13 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -232,7 +232,6 @@ def forward( y, single_eval_pos, only_return_standard_out, - categorical_inds, ): return self.model( None, @@ -240,7 +239,6 @@ def forward( y, single_eval_pos=single_eval_pos, only_return_standard_out=only_return_standard_out, - categorical_inds=categorical_inds, ) @@ -277,7 +275,6 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: "y", "single_eval_pos", "only_return_standard_out", - "categorical_inds", ], output_names=["output"], opset_version=17, # using 17 since we use torch>=2.1 From a47b254ca82f17670aa9dbbf7534813b11a0d549 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 16:00:22 +0100 Subject: [PATCH 11/20] finish merge + mypi --- src/tabpfn/base.py | 9 ++-- src/tabpfn/misc/compile_to_onnx.py | 46 +++++++++---------- src/tabpfn/model/loading.py | 73 +++++++++++++++++------------- tests/test_classifier_interface.py | 2 +- tests/test_regressor_interface.py | 2 +- 5 files changed, 69 insertions(+), 63 deletions(-) diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index c491c3aa1..8ca7b52e4 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -27,9 +27,11 @@ InferenceEngineCachePreprocessing, InferenceEngineOnDemand, ) -from tabpfn.model.loading import load_model_criterion_config +from tabpfn.model.loading import ( + load_model_criterion_config, + resolve_model_path, +) from tabpfn.utils import ( - get_model_path, infer_fp16_inference_mode, ) @@ -135,7 +137,6 @@ def load_onnx_model( ImportError: If onnxruntime is not installed. FileNotFoundError: If the model file doesn't exist. """ - model_path = get_model_path(model_path, which, version, use_onnx=True) try: from tabpfn.misc.compile_to_onnx import ONNXModelWrapper except ImportError as err: @@ -144,7 +145,7 @@ def load_onnx_model( "Install it with: pip install onnxruntime", ) from err - model_path = Path(model_path) + model_path, _, _ = resolve_model_path(model_path, which, version, use_onnx=True) if not model_path.exists(): raise FileNotFoundError( f"ONNX model not found at: {model_path}, " diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index ef3e16b8a..a1225de16 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -1,3 +1,4 @@ +# ruff: noqa: T201 """Module providing wrappers to use ONNX models with a PyTorch-like interface. This module defines wrappers for ONNX models as well as helper functions to export @@ -6,9 +7,6 @@ from __future__ import annotations -import os -import sys - import numpy as np import onnx import onnxruntime as ort @@ -17,7 +15,7 @@ from torch import nn from tabpfn import TabPFNClassifier, TabPFNRegressor -from tabpfn.utils import _user_cache_dir +from tabpfn.model.loading import resolve_model_path class ONNXModelWrapper: @@ -157,7 +155,7 @@ def __call__( if "CUDAExecutionProvider" in self.providers: output_tensor = output_tensor.cuda() return output_tensor - + def forward( self, style: torch.Tensor | None, @@ -202,10 +200,12 @@ def __init__(self, original_model: ONNXModelWrapper): super().__init__() self.model = original_model - def forward(self, X: torch.Tensor, - y: torch.Tensor, - single_eval_pos: torch.Tensor, - only_return_standard_out: torch.Tensor, + def forward( + self, + X: torch.Tensor, + y: torch.Tensor, + single_eval_pos: torch.Tensor, + only_return_standard_out: torch.Tensor, ) -> dict[str, torch.Tensor]: """Perform a forward pass. @@ -416,29 +416,25 @@ def _test_regressor(*, use_onnx: bool = False) -> float: f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n" f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}" ) - else: - print("SUCCESS: the performance of the ONNX model is " - "similar to the PyTorch model. \n" - f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n" - f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}") - + print( + "SUCCESS: the performance of the ONNX model is " + "similar to the PyTorch model. \n" + f"Accuracy PyTorch: {clf_acc_torch}, Accuracy ONNX: {clf_acc_onnx}, \n" + f"MSE PyTorch: {reg_mse_torch}, MSE ONNX: {reg_mse_onnx}" + ) -def compile_onnx_models(suffix: str = ""): +def compile_onnx_models(suffix: str = "") -> None: """Compile the ONNX models. Args: suffix: The suffix to append to the file names of the ONNX models. """ - USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") - if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": - cache_dir = USER_TABPFN_CACHE_DIR_LOCATION - else: - cache_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") - - # Export both models with appropriate suffixes - classifier_path = f"{cache_dir}/tabpfn-v2-classifier{suffix}.onnx" - regressor_path = f"{cache_dir}/tabpfn-v2-regressor{suffix}.onnx" + classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True) + regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True) + # add suffix to the file names + classifier_path = str(classifier_path) + suffix + regressor_path = str(regressor_path) + suffix export_model(classifier_path, "classifier") check_onnx_model(classifier_path) diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py index 7253e8c25..85fb9aefe 100644 --- a/src/tabpfn/model/loading.py +++ b/src/tabpfn/model/loading.py @@ -101,7 +101,7 @@ def _get_model_source(version: ModelVersion, model_type: ModelType) -> ModelSour ) -def _suppress_hf_token_warning(): +def _suppress_hf_token_warning() -> None: """Suppress warning about missing HuggingFace token.""" import warnings @@ -279,15 +279,16 @@ def download_model( def download_all_models(to: Path) -> None: """Download all v2 classifier and regressor models into a local directory.""" to.mkdir(parents=True, exist_ok=True) + for model_source, model_type in [ - (ModelSource.get_classifier_v2(), "classifier"), - (ModelSource.get_regressor_v2(), "regressor"), + (ModelSource.get_classifier_v2(), ModelType.CLASSIFIER), + (ModelSource.get_regressor_v2(), ModelType.REGRESSOR), ]: for ckpt_name in model_source.filenames: download_model( to=to / ckpt_name, version="v2", - which=model_type, + which=model_type.value, model_name=ckpt_name, ) @@ -340,6 +341,41 @@ def _user_cache_dir(platform: str, appname: str = "tabpfn") -> Path: return use_instead_path +def resolve_model_path( + model_path: None | str | Path, + which: Literal["regressor", "classifier"], + version: Literal["v2"] = "v2", + *, + use_onnx: bool = False, +) -> tuple[Path, Path, str]: + if isinstance(model_path, str) and model_path == "auto": + model_path = None + + if model_path is None: + USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") + if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": + model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION) + else: + model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") + if use_onnx: + model_name = f"tabpfn-{version}-{which}.onnx" + else: + model_name = f"tabpfn-{version}-{which}.ckpt" + model_path = model_dir / model_name + else: + if not isinstance(model_path, (str, Path)): + raise ValueError(f"Invalid model_path: {model_path}") + + model_path = Path(model_path) + model_dir = model_path.parent + if use_onnx and not model_path.name.endswith(".onnx"): + model_name = model_path.name.replace(".ckpt", ".onnx") + else: + model_name = model_path.name + + return model_path, model_dir, model_name + + @overload def load_model_criterion_config( model_path: str | Path | None, @@ -370,31 +406,6 @@ def load_model_criterion_config( ) -> tuple[PerFeatureTransformer, FullSupportBarDistribution, InferenceConfig]: ... -def resolve_model_path( - model_path: None | str | Path, - which: Literal["regressor", "classifier"], - version: Literal["v2"] = "v2", -) -> tuple[Path, Path, str, str]: - if model_path is None: - USER_TABPFN_CACHE_DIR_LOCATION = os.environ.get("TABPFN_MODEL_CACHE_DIR", "") - if USER_TABPFN_CACHE_DIR_LOCATION.strip() != "": - model_dir = Path(USER_TABPFN_CACHE_DIR_LOCATION) - else: - model_dir = _user_cache_dir(platform=sys.platform, appname="tabpfn") - - model_name = f"tabpfn-{version}-{which}.ckpt" - model_path = model_dir / model_name - else: - if not isinstance(model_path, (str, Path)): - raise ValueError(f"Invalid model_path: {model_path}") - - model_path = Path(model_path) - model_dir = model_path.parent - model_name = model_path.name - - return model_path, model_dir, model_name, which - - def load_model_criterion_config( model_path: None | str | Path, *, @@ -427,9 +438,7 @@ def load_model_criterion_config( Returns: The model, criterion, and config. """ - (model_path, model_dir, model_name, which) = resolve_model_path( - model_path, which, version - ) + model_path, model_dir, model_name = resolve_model_path(model_path, which, version) model_dir.mkdir(parents=True, exist_ok=True) if not model_path.exists(): diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index a1f76ed88..0c4ae1cd3 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -288,7 +288,7 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: } torch.onnx.export( ModelWrapper(classifier.model_).eval(), - (X, y, y.shape[0], True, []), + (X, y, y.shape[0], True), io.BytesIO(), input_names=[ "X", diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 87d423b1e..6e117bcd7 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -282,7 +282,7 @@ def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: } torch.onnx.export( ModelWrapper(regressor.model_).eval(), - (X, y, y.shape[0], True, []), + (X, y, y.shape[0], True), io.BytesIO(), input_names=[ "X", From 4c31a9a52bb80831134a0c5736b91a7ef20f5f0f Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 17:15:53 +0100 Subject: [PATCH 12/20] improve onnx export tests --- src/tabpfn/base.py | 3 +- src/tabpfn/misc/compile_to_onnx.py | 7 +- src/tabpfn/model/loading.py | 3 +- tests/test_classifier_interface.py | 46 ------------ tests/test_export_onnx.py | 113 +++++++++++++++++++++++++++++ tests/test_regressor_interface.py | 44 ----------- 6 files changed, 121 insertions(+), 95 deletions(-) create mode 100644 tests/test_export_onnx.py diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index 8ca7b52e4..058157575 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -151,7 +151,8 @@ def load_onnx_model( f"ONNX model not found at: {model_path}, " "please compile the model by running " "`from tabpfn.misc.compile_to_onnx import compile_onnx_models; " - "compile_onnx_models()`", + "compile_onnx_models()`" + "or change `model_path`.", ) return ONNXModelWrapper(str(model_path), device) diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index a1225de16..0a8204845 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -424,11 +424,12 @@ def _test_regressor(*, use_onnx: bool = False) -> float: ) -def compile_onnx_models(suffix: str = "") -> None: +def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None: """Compile the ONNX models. Args: suffix: The suffix to append to the file names of the ONNX models. + skip_test: Whether to skip the performance test of the ONNX models. """ classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True) regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True) @@ -444,7 +445,7 @@ def compile_onnx_models(suffix: str = "") -> None: check_onnx_model(regressor_path) check_input_names(regressor_path) - if not len(suffix): + if not len(suffix) and not skip_test: test_models() - else: + elif not skip_test: print("model name suffix is not empty, skipping test") diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py index 85fb9aefe..bd5a9b140 100644 --- a/src/tabpfn/model/loading.py +++ b/src/tabpfn/model/loading.py @@ -369,7 +369,8 @@ def resolve_model_path( model_path = Path(model_path) model_dir = model_path.parent if use_onnx and not model_path.name.endswith(".onnx"): - model_name = model_path.name.replace(".ckpt", ".onnx") + # More general approach - replace any extension with .onnx + model_name = model_path.stem + ".onnx" else: model_name = model_path.name diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index 0c4ae1cd3..076376984 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -1,8 +1,5 @@ from __future__ import annotations -import io -import os -import sys import typing from itertools import product from typing import Callable, Literal @@ -259,49 +256,6 @@ def forward( ) -@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") -def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: - if os.name == "nt": - pytest.skip("onnx export is not tested on windows") - if sys.version_info >= (3, 13): - pytest.xfail("onnx is not yet supported on Python 3.13") - X, y = X_y - with torch.no_grad(): - classifier = TabPFNClassifier(n_estimators=1, device="cpu", random_state=42) - # load the model so we can access it via classifier.model_ - classifier.fit(X, y) - # this is necessary if cuda is available - classifier.predict(X) - # replicate the above call with random tensors of same shape - X = torch.randn( - (X.shape[0] * 2, 1, X.shape[1] + 1), - generator=torch.Generator().manual_seed(42), - ) - y = ( - torch.rand(y.shape, generator=torch.Generator().manual_seed(42)) - .round() - .to(torch.float32) - ) - dynamic_axes = { - "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"}, - "y": {0: "num_labels"}, - } - torch.onnx.export( - ModelWrapper(classifier.model_).eval(), - (X, y, y.shape[0], True), - io.BytesIO(), - input_names=[ - "X", - "y", - "single_eval_pos", - "only_return_standard_out", - ], - output_names=["output"], - opset_version=17, # using 17 since we use torch>=2.1 - dynamic_axes=dynamic_axes, - ) - - @pytest.mark.parametrize("data_source", ["train", "test"]) def test_get_embeddings(X_y: tuple[np.ndarray, np.ndarray], data_source: str) -> None: """Test that get_embeddings returns valid embeddings for a fitted model.""" diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py new file mode 100644 index 000000000..ed826df35 --- /dev/null +++ b/tests/test_export_onnx.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import os +import sys + +import numpy as np +import pytest + +from tabpfn import TabPFNClassifier, TabPFNRegressor +from tabpfn.misc.compile_to_onnx import compile_onnx_models + + +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +def test_onnx_missing_model_error(): + """Test that appropriate error is raised when trying to + use ONNX with a missing model. Here we specify a model path + that does not exist to simulate the case where the model + has not been compiled. + """ + if os.name == "nt": + pytest.skip("ONNX export is not tested on Windows") + if sys.version_info >= (3, 13): + pytest.xfail("ONNX is not yet supported on Python 3.13") + + try: + import onnx # noqa: F401 + import onnxruntime # noqa: F401 + except ImportError: + pytest.skip("ONNX or ONNX Runtime not available") + + # Generate synthetic data + rng = np.random.default_rng() + X = rng.standard_normal((50, 10)).astype(np.float32) + y = rng.integers(0, 2, size=50) + + # Try to use ONNX backend when model doesn't exist + classifier = TabPFNClassifier( + device="cpu", use_onnx=True, model_path="/fake_dir/tabpfn_classifier_v2.ckpt" + ) + + # Expect a FileNotFoundError with a specific message + with pytest.raises( + FileNotFoundError, + match=( + r"ONNX model not found at:.*please compile the model by " + r"running.*compile_onnx_models\(\)" + ), + ): + classifier.fit(X, y) + + +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +def test_onnx_export_and_inference(): + """Test that TabPFN models can be exported to ONNX + and produce correct predictions. + """ + if os.name == "nt": + pytest.skip("ONNX export is not tested on Windows") + if sys.version_info >= (3, 13): + pytest.xfail("ONNX is not yet supported on Python 3.13") + + try: + import onnx # noqa: F401 + import onnxruntime # noqa: F401 + except ImportError: + pytest.skip("ONNX or ONNX Runtime not available") + + # Compile the model to ONNX format (using default output directory) + compile_onnx_models(skip_test=True) + + # Generate synthetic data for testing + n_samples = 100 + n_features = 10 + rng = np.random.default_rng() + X = rng.standard_normal((n_samples, n_features)).astype(np.float32) + y = rng.integers(0, 2, size=n_samples) + + # Split into train/test + train_size = 80 + X_train, X_test = X[:train_size], X[train_size:] + y_train, _y_test = y[:train_size], y[train_size:] + + # Test with PyTorch backend + classifier_torch = TabPFNClassifier(device="cpu", use_onnx=False) + classifier_torch.fit(X_train, y_train) + + # Get predictions with PyTorch backend + torch_probs = classifier_torch.predict_proba(X_test) + torch_preds = classifier_torch.predict(X_test) + + # Test with ONNX backend + classifier_onnx = TabPFNClassifier(device="cpu", use_onnx=True) + classifier_onnx.fit(X_train, y_train) + + # Get predictions with ONNX backend + onnx_probs = classifier_onnx.predict_proba(X_test) + onnx_preds = classifier_onnx.predict(X_test) + + # Check that the predictions roughly match + np.testing.assert_allclose(torch_probs, onnx_probs, rtol=1e-2, atol=1e-2) + np.testing.assert_array_equal(torch_preds, onnx_preds) + + # same for regressor + regressor_torch = TabPFNRegressor(device="cpu", use_onnx=False) + regressor_torch.fit(X_train, y_train) + regressor_onnx = TabPFNRegressor(device="cpu", use_onnx=True) + regressor_onnx.fit(X_train, y_train) + + torch_preds = regressor_torch.predict(X_test) + onnx_preds = regressor_onnx.predict(X_test) + + # Check that the predictions roughly match + np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2) diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 6e117bcd7..c7733da47 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -1,8 +1,6 @@ from __future__ import annotations -import io import os -import sys import typing from itertools import product from typing import Callable, Literal @@ -254,48 +252,6 @@ def forward( ) -# WARNING: unstable for scipy<1.11.0 -@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") -def test_onnx_exportable_cpu(X_y: tuple[np.ndarray, np.ndarray]) -> None: - if os.name == "nt": - pytest.skip("onnx export is not tested on windows") - if sys.version_info >= (3, 13): - pytest.xfail("onnx is not yet supported on Python 3.13") - X, y = X_y - with torch.no_grad(): - regressor = TabPFNRegressor(n_estimators=1, device="cpu", random_state=43) - # load the model so we can access it via classifier.model_ - regressor.fit(X, y) - # this is necessary if cuda is available - regressor.predict(X) - # replicate the above call with random tensors of same shape - X = torch.randn( - (X.shape[0] * 2, 1, X.shape[1] + 1), - generator=torch.Generator().manual_seed(42), - ) - y = (torch.randn(y.shape, generator=torch.Generator().manual_seed(42)) > 0).to( - torch.float32, - ) - dynamic_axes = { - "X": {0: "num_datapoints", 1: "batch_size", 2: "num_features"}, - "y": {0: "num_labels"}, - } - torch.onnx.export( - ModelWrapper(regressor.model_).eval(), - (X, y, y.shape[0], True), - io.BytesIO(), - input_names=[ - "X", - "y", - "single_eval_pos", - "only_return_standard_out", - ], - output_names=["output"], - opset_version=17, # using 17 since we use torch>=2.1 - dynamic_axes=dynamic_axes, - ) - - @pytest.mark.parametrize("data_source", ["train", "test"]) def test_get_embeddings(X_y: tuple[np.ndarray, np.ndarray], data_source: str) -> None: """Test that get_embeddings returns valid embeddings for a fitted model.""" From dd84e0a50e8bd741389043a940da0f5c2b0ad308 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 17:19:43 +0100 Subject: [PATCH 13/20] add onnxruntime requirements to dev and ci --- .github/workflows/pull_request.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d8114c456..29ebdb218 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -80,7 +80,7 @@ jobs: uv pip install --system pytest psutil # onnx is not supported on python 3.13 yet https://github.com/onnx/onnx/issues/6339 if [[ "${{ matrix.python-version }}" != "3.13" ]]; then - uv pip install --system onnx + uv pip install --system onnx onnxruntime fi shell: bash diff --git a/pyproject.toml b/pyproject.toml index 90dd591e4..cacebfb2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ dev = [ # Test "pytest", "onnx", # required for onnx export tests + "onnxruntime", "psutil", # required for testing internal memory tool on windows # Docs "mkdocs", From 1bb3a64b7b9190865e1435686c2dacedd9508c2f Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 17:23:20 +0100 Subject: [PATCH 14/20] skip test on python3.13 --- tests/test_export_onnx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index ed826df35..ea90f5cc4 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -7,7 +7,6 @@ import pytest from tabpfn import TabPFNClassifier, TabPFNRegressor -from tabpfn.misc.compile_to_onnx import compile_onnx_models @pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") @@ -65,6 +64,8 @@ def test_onnx_export_and_inference(): except ImportError: pytest.skip("ONNX or ONNX Runtime not available") + from tabpfn.misc.compile_to_onnx import compile_onnx_models + # Compile the model to ONNX format (using default output directory) compile_onnx_models(skip_test=True) From 968087ba2ce2fbb4ac1cd1053f4dfc996f281957 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 22:14:38 +0100 Subject: [PATCH 15/20] use the same onnx session if you fit twice --- src/tabpfn/base.py | 17 ++++++--------- src/tabpfn/classifier.py | 32 +++++++++++++++++++++------ src/tabpfn/misc/compile_to_onnx.py | 8 ++++--- src/tabpfn/model/loading.py | 8 +++---- src/tabpfn/regressor.py | 35 +++++++++++++++++++++++------- tests/test_export_onnx.py | 5 ++++- 6 files changed, 72 insertions(+), 33 deletions(-) diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py index 058157575..94314f673 100644 --- a/src/tabpfn/base.py +++ b/src/tabpfn/base.py @@ -29,7 +29,6 @@ ) from tabpfn.model.loading import ( load_model_criterion_config, - resolve_model_path, ) from tabpfn.utils import ( infer_fp16_inference_mode, @@ -47,7 +46,7 @@ @overload def initialize_tabpfn_model( - model_path: str | Path | Literal["auto"], + model_path: Path, which: Literal["regressor"], fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"], static_seed: int, @@ -56,7 +55,7 @@ def initialize_tabpfn_model( @overload def initialize_tabpfn_model( - model_path: str | Path | Literal["auto"], + model_path: Path, which: Literal["classifier"], fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"], static_seed: int, @@ -64,7 +63,7 @@ def initialize_tabpfn_model( def initialize_tabpfn_model( - model_path: str | Path | Literal["auto"], + model_path: Path, which: Literal["classifier", "regressor"], fit_mode: Literal["low_memory", "fit_preprocessors", "fit_with_cache"], static_seed: int, @@ -117,9 +116,7 @@ def initialize_tabpfn_model( def load_onnx_model( - model_path: str | Path, - which: Literal["classifier", "regressor"], - version: Literal["v2"], + model_path: Path, device: torch.device, ) -> ONNXModelWrapper: """Load a TabPFN model in ONNX format. @@ -142,10 +139,10 @@ def load_onnx_model( except ImportError as err: raise ImportError( "onnxruntime is required to load ONNX models. " - "Install it with: pip install onnxruntime", + "Install it with: pip install onnxruntime-gpu" + "or pip install onnxruntime", ) from err - model_path, _, _ = resolve_model_path(model_path, which, version, use_onnx=True) if not model_path.exists(): raise FileNotFoundError( f"ONNX model not found at: {model_path}, " @@ -155,7 +152,7 @@ def load_onnx_model( "or change `model_path`.", ) - return ONNXModelWrapper(str(model_path), device) + return ONNXModelWrapper(model_path, device) def determine_precision( diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py index 6c363d563..cdfa806ab 100644 --- a/src/tabpfn/classifier.py +++ b/src/tabpfn/classifier.py @@ -44,6 +44,7 @@ XType, YType, ) +from tabpfn.model.loading import resolve_model_path from tabpfn.preprocessing import ( ClassifierEnsembleConfig, EnsembleConfig, @@ -69,7 +70,9 @@ from torch.types import _dtype from tabpfn.inference import InferenceEngine + from tabpfn.misc.compile_to_onnx import ONNXModelWrapper from tabpfn.model.config import InferenceConfig + from tabpfn.model.transformer import PerFeatureTransformer try: from sklearn.base import Tags @@ -132,6 +135,9 @@ class TabPFNClassifier(ClassifierMixin, BaseEstimator): preprocessor_: ColumnTransformer """The column transformer used to preprocess the input data to be numeric.""" + model_: PerFeatureTransformer | ONNXModelWrapper + """The loaded model used for inference.""" + def __init__( # noqa: PLR0913 self, *, @@ -399,17 +405,29 @@ def fit(self, X: XType, y: YType) -> Self: determine_precision(self.inference_precision, self.device_) ) + model_path, _, _ = resolve_model_path( + self.model_path, + which="classifier", + version="v2", + use_onnx=self.use_onnx, + ) # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model( - self.model_path, - which="classifier", - version="v2", - device=self.device_, - ) + # if the model was already loaded with the same config + # use the same ONNX session + if hasattr(self, "model_") and (model_path, self.device_) != ( + self.model_.model_path, + self.model_.device, + ): + print("Using same ONNX session as last fit call") # noqa: T201 + else: + self.model_ = load_onnx_model( + model_path, + device=self.device_, + ) else: self.model_, self.config_, _ = initialize_tabpfn_model( - model_path=self.model_path, + model_path=model_path, which="classifier", fit_mode=self.fit_mode, static_seed=static_seed, diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index 0a8204845..4ee9ee627 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -7,6 +7,8 @@ from __future__ import annotations +from pathlib import Path + import numpy as np import onnx import onnxruntime as ort @@ -21,7 +23,7 @@ class ONNXModelWrapper: """Wrap ONNX model to match the PyTorch model interface.""" - def __init__(self, model_path: str, device: torch.device): + def __init__(self, model_path: Path, device: torch.device): """Initialize the ONNX model wrapper. Args: @@ -434,8 +436,8 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None: classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True) regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True) # add suffix to the file names - classifier_path = str(classifier_path) + suffix - regressor_path = str(regressor_path) + suffix + classifier_path = classifier_path.stem + suffix + ".onnx" + regressor_path = regressor_path.stem + suffix + ".onnx" export_model(classifier_path, "classifier") check_onnx_model(classifier_path) diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py index bd5a9b140..ba0052975 100644 --- a/src/tabpfn/model/loading.py +++ b/src/tabpfn/model/loading.py @@ -379,7 +379,7 @@ def resolve_model_path( @overload def load_model_criterion_config( - model_path: str | Path | None, + model_path: Path, *, check_bar_distribution_criterion: Literal[False], cache_trainset_representation: bool, @@ -396,7 +396,7 @@ def load_model_criterion_config( @overload def load_model_criterion_config( - model_path: str | Path | None, + model_path: Path, *, check_bar_distribution_criterion: Literal[True], cache_trainset_representation: bool, @@ -408,7 +408,7 @@ def load_model_criterion_config( def load_model_criterion_config( - model_path: None | str | Path, + model_path: Path, *, check_bar_distribution_criterion: bool, cache_trainset_representation: bool, @@ -439,7 +439,7 @@ def load_model_criterion_config( Returns: The model, criterion, and config. """ - model_path, model_dir, model_name = resolve_model_path(model_path, which, version) + model_dir, model_name = model_path.parent, model_path.name model_dir.mkdir(parents=True, exist_ok=True) if not model_path.exists(): diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py index 43092fffb..5421bcc85 100644 --- a/src/tabpfn/regressor.py +++ b/src/tabpfn/regressor.py @@ -43,6 +43,7 @@ ) from tabpfn.config import ModelInterfaceConfig from tabpfn.model.bar_distribution import FullSupportBarDistribution +from tabpfn.model.loading import resolve_model_path from tabpfn.model.preprocessing import ( ReshapeFeatureDistributionsStep, ) @@ -80,7 +81,9 @@ from tabpfn.inference import ( InferenceEngine, ) + from tabpfn.misc.compile_to_onnx import ONNXModelWrapper from tabpfn.model.config import InferenceConfig + from tabpfn.model.transformer import PerFeatureTransformer try: from sklearn.base import Tags @@ -160,6 +163,9 @@ class TabPFNRegressor(RegressorMixin, BaseEstimator): preprocessor_: ColumnTransformer """The column transformer used to preprocess the input data to be numeric.""" + model_: PerFeatureTransformer | ONNXModelWrapper + """The loaded model used for inference.""" + # TODO: consider moving the following to constants.py _OUTPUT_TYPES_BASIC = ("mean", "median", "mode") """The basic output types supported by the model.""" @@ -430,25 +436,38 @@ def fit(self, X: XType, y: YType) -> Self: determine_precision(self.inference_precision, self.device_) ) + model_path, _, _ = resolve_model_path( + self.model_path, + which="regressor", + version="v2", + use_onnx=self.use_onnx, + ) + # Load the model and config if self.use_onnx: - self.model_ = load_onnx_model( - self.model_path, - which="regressor", - version="v2", - device=self.device_, - ) + # if the model was already loaded with the same config, + # use the same ONNX session + if hasattr(self, "model_") and (model_path, self.device_) == ( + self.model_.model_path, + self.model_.device, + ): + print("Using same ONNX session as last fit call") # noqa: T201 + else: + self.model_ = load_onnx_model( + model_path, + device=self.device_, + ) # Initialize bardist_ for ONNX mode # TODO: faster way to do this _, self.config_, self.bardist_ = initialize_tabpfn_model( - model_path=self.model_path, + model_path=model_path.with_stem(model_path.stem).with_suffix(".ckpt"), which="regressor", fit_mode=self.fit_mode, static_seed=static_seed, ) else: self.model_, self.config_, self.bardist_ = initialize_tabpfn_model( - model_path=self.model_path, + model_path=model_path, which="regressor", fit_mode=self.fit_mode, static_seed=static_seed, diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index ea90f5cc4..cf8a424fa 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -99,7 +99,6 @@ def test_onnx_export_and_inference(): # Check that the predictions roughly match np.testing.assert_allclose(torch_probs, onnx_probs, rtol=1e-2, atol=1e-2) - np.testing.assert_array_equal(torch_preds, onnx_preds) # same for regressor regressor_torch = TabPFNRegressor(device="cpu", use_onnx=False) @@ -112,3 +111,7 @@ def test_onnx_export_and_inference(): # Check that the predictions roughly match np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2) + + +# TODO: test deterministic +# TODO: test that fitting twice works as intended From 3f13d243c04ecd6ae02af10ba3c862fdf969881d Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 22:51:46 +0100 Subject: [PATCH 16/20] fix bug + add tests --- src/tabpfn/classifier.py | 2 +- tests/test_export_onnx.py | 168 +++++++++++++++++++++++++++++++++++++- 2 files changed, 167 insertions(+), 3 deletions(-) diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py index cdfa806ab..5bd4f5212 100644 --- a/src/tabpfn/classifier.py +++ b/src/tabpfn/classifier.py @@ -415,7 +415,7 @@ def fit(self, X: XType, y: YType) -> Self: if self.use_onnx: # if the model was already loaded with the same config # use the same ONNX session - if hasattr(self, "model_") and (model_path, self.device_) != ( + if hasattr(self, "model_") and (model_path, self.device_) == ( self.model_.model_path, self.model_.device, ): diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index cf8a424fa..f884d9d8c 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -2,9 +2,11 @@ import os import sys +from typing import Literal import numpy as np import pytest +import torch from tabpfn import TabPFNClassifier, TabPFNRegressor @@ -113,5 +115,167 @@ def test_onnx_export_and_inference(): np.testing.assert_allclose(torch_preds, onnx_preds, rtol=1e-2, atol=1e-2) -# TODO: test deterministic -# TODO: test that fitting twice works as intended +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +@pytest.mark.parametrize("which", ["classifier", "regressor"]) +def test_onnx_session_reuse(which: Literal["classifier", "regressor"]): + """Test that the ONNX session is reused when fitting a model multiple times + with the same model path and device. + """ + if os.name == "nt": + pytest.skip("ONNX export is not tested on Windows") + if sys.version_info >= (3, 13): + pytest.xfail("ONNX is not yet supported on Python 3.13") + + try: + import onnx # noqa: F401 + import onnxruntime # noqa: F401 + except ImportError: + pytest.skip("ONNX or ONNX Runtime not available") + + # Generate synthetic data + rng = np.random.default_rng(42) + X1 = rng.standard_normal((50, 10)).astype(np.float32) + y1 = rng.integers(0, 2, size=50) + + X2 = rng.standard_normal((40, 10)).astype(np.float32) + y2 = rng.integers(0, 2, size=40) + + # Create a classifier with ONNX backend + if which == "classifier": + sklearn_model = TabPFNClassifier(device="cpu", use_onnx=True) + else: + sklearn_model = TabPFNRegressor(device="cpu", use_onnx=True) + + # First fit + sklearn_model.fit(X1, y1) + + # Get reference to the first model + first_model = sklearn_model.model_ + + # Mock print function to check if message is displayed + import builtins + + original_print = builtins.print + printed_messages = [] + + def mock_print(*args, **kwargs): + message = " ".join(str(arg) for arg in args) + printed_messages.append(message) + original_print(*args, **kwargs) + + # Replace print with our mock + builtins.print = mock_print + + try: + # Second fit with same configuration + sklearn_model.fit(X2, y2) + + # Assert that the model object is the same (session reused) + assert sklearn_model.model_ is first_model + + # Check that the print message appears + assert any( + "Using same ONNX session as last fit call" in msg + for msg in printed_messages + ) + + # Now test with a different device to force new session + if torch.cuda.is_available(): + # Change device to force new session + sklearn_model.device = "cuda" + sklearn_model.fit(X1, y1) + + # Should be a different model object now + assert sklearn_model.model_ is not first_model + + # Restore device + sklearn_model.device = "cpu" + sklearn_model.fit(X1, y1) + + # Should be a new model again + assert sklearn_model.model_ is not first_model + finally: + # Restore original print function + builtins.print = original_print + + +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +@pytest.mark.parametrize("which", ["classifier", "regressor"]) +def test_onnx_deterministic(which: Literal["classifier", "regressor"]): + """Test that TabPFN models using ONNX are deterministic when using the same seed.""" + if os.name == "nt": + pytest.skip("ONNX export is not tested on Windows") + if sys.version_info >= (3, 13): + pytest.xfail("ONNX is not yet supported on Python 3.13") + + try: + import onnx # noqa: F401 + import onnxruntime # noqa: F401 + except ImportError: + pytest.skip("ONNX or ONNX Runtime not available") + + from tabpfn.misc.compile_to_onnx import compile_onnx_models + + # Compile the model to ONNX format if needed + compile_onnx_models(skip_test=True) + + # Generate synthetic data + rng = np.random.default_rng(42) + X_train = rng.standard_normal((50, 10)).astype(np.float32) + + if which == "classifier": + y_train = rng.integers(0, 3, size=50) # 3 classes + X_test = rng.standard_normal((20, 10)).astype(np.float32) + + # First model with fixed seed + model1 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=123) + model1.fit(X_train, y_train) + pred1 = model1.predict(X_test) + proba1 = model1.predict_proba(X_test) + + # Second model with same seed + model2 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=123) + model2.fit(X_train, y_train) + pred2 = model2.predict(X_test) + proba2 = model2.predict_proba(X_test) + + # Predictions should be identical + np.testing.assert_array_equal(pred1, pred2) + np.testing.assert_array_equal(proba1, proba2) + + # Third model with different seed + model3 = TabPFNClassifier(device="cpu", use_onnx=True, random_state=456) + model3.fit(X_train, y_train) + pred3 = model3.predict(X_test) + proba3 = model3.predict_proba(X_test) + + # Predictions should be different (with high probability) + # We use assert_raises to verify they're different + with pytest.raises(AssertionError): + np.testing.assert_array_equal(proba1, proba3) + + else: # regressor + y_train = rng.standard_normal(50) + X_test = rng.standard_normal((20, 10)).astype(np.float32) + + # First model with fixed seed + model1 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=123) + model1.fit(X_train, y_train) + pred1 = model1.predict(X_test) + + # Second model with same seed + model2 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=123) + model2.fit(X_train, y_train) + pred2 = model2.predict(X_test) + + # Predictions should be identical + np.testing.assert_array_equal(pred1, pred2) + + # Third model with different seed + model3 = TabPFNRegressor(device="cpu", use_onnx=True, random_state=456) + model3.fit(X_train, y_train) + pred3 = model3.predict(X_test) + + # Predictions should be different (with high probability) + with pytest.raises(AssertionError): + np.testing.assert_array_equal(pred1, pred3) From d6d906d4ca84d1f74c7a51721c657c013695ba94 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Mon, 24 Mar 2025 23:22:11 +0100 Subject: [PATCH 17/20] fail if device is CUDA but CUDAExecutionProvider not available --- src/tabpfn/misc/compile_to_onnx.py | 28 ++++++++++++------ tests/test_export_onnx.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index 4ee9ee627..1137bf57c 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -20,6 +20,17 @@ from tabpfn.model.loading import resolve_model_path +def _check_cuda_provider(device: torch.device) -> None: + if ( + device.type == "cuda" + and "CUDAExecutionProvider" not in ort.get_available_providers() + ): + raise ValueError( + "Device is cuda but CUDAExecutionProvider is not available in ONNX. " + "Check that you installed onnxruntime-gpu and have a GPU." + ) + + class ONNXModelWrapper: """Wrap ONNX model to match the PyTorch model interface.""" @@ -32,6 +43,7 @@ def __init__(self, model_path: Path, device: torch.device): """ self.model_path = model_path self.device = device + _check_cuda_provider(self.device) if device.type == "cuda": self.providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] elif device.type == "cpu": @@ -56,18 +68,16 @@ def to( self """ # Only recreate session if device type has changed + _check_cuda_provider(device) if device.type != self.device.type: if device.type == "cuda": - # Check if CUDA is available in ONNX Runtime cuda_provider = "CUDAExecutionProvider" - if cuda_provider in ort.get_available_providers(): - self.providers = [cuda_provider, "CPUExecutionProvider"] - # Reinitialize session with CUDA provider - self.session = ort.InferenceSession( - self.model_path, - providers=self.providers, - ) - # If CUDA is not available, keep current session + self.providers = [cuda_provider, "CPUExecutionProvider"] + # Reinitialize session with CUDA provider + self.session = ort.InferenceSession( + self.model_path, + providers=self.providers, + ) else: self.providers = ["CPUExecutionProvider"] self.session = ort.InferenceSession( diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index f884d9d8c..559cf566b 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -279,3 +279,49 @@ def test_onnx_deterministic(which: Literal["classifier", "regressor"]): # Predictions should be different (with high probability) with pytest.raises(AssertionError): np.testing.assert_array_equal(pred1, pred3) + + +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +@pytest.mark.parametrize("model_class", [TabPFNClassifier, TabPFNRegressor]) +def test_cuda_provider_missing_error(model_class): + """Test that TabPFN models raise the correct error when trying to use CUDA + without CUDAExecutionProvider available in ONNX Runtime. + """ + if os.name == "nt": + pytest.skip("ONNX export is not tested on Windows") + if sys.version_info >= (3, 13): + pytest.xfail("ONNX is not yet supported on Python 3.13") + + try: + import onnxruntime as ort + except ImportError: + pytest.skip("ONNX Runtime not available") + + # Generate synthetic data + rng = np.random.default_rng(42) + X = rng.standard_normal((20, 5)).astype(np.float32) + y = ( + rng.integers(0, 2, size=20) + if model_class == TabPFNClassifier + else rng.standard_normal(20) + ) + + # Mock ort.get_available_providers to return only CPUExecutionProvider + original_get_providers = ort.get_available_providers + + try: + # Replace providers with only CPU + ort.get_available_providers = lambda: ["CPUExecutionProvider"] + + # Create model with CUDA device and ONNX enabled + model = model_class(device="cuda", use_onnx=True) + + # The error should be raised during fit + with pytest.raises( + ValueError, + match="Device is cuda but CUDAExecutionProvider is not available in ONNX", + ): + model.fit(X, y) + finally: + # Restore original function + ort.get_available_providers = original_get_providers From e1ada196fa248c62dca764b1a0abc55cc70d4370 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Tue, 25 Mar 2025 10:31:49 +0100 Subject: [PATCH 18/20] new py3.11 ci tests and skip onnx tests on 3.9 --- .github/workflows/pull_request.yml | 19 +++++++-- tests/test_export_onnx.py | 64 +++++++----------------------- 2 files changed, 29 insertions(+), 54 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 29ebdb218..1653c6e81 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -36,6 +36,15 @@ jobs: - os: windows-latest python-version: "3.9" dependency-set: minimum + - os: ubuntu-latest + python-version: "3.11" + dependency-set: direct-install + - os: macos-latest + python-version: "3.11" + dependency-set: direct-install + - os: windows-latest + python-version: "3.11" + dependency-set: direct-install - os: ubuntu-latest python-version: "3.13" dependency-set: maximum @@ -73,10 +82,12 @@ jobs: - name: Install dependencies run: | - uv pip install --system --no-deps . - # onnx is required for onnx export tests - # we don't install all dev dependencies here for speed - uv pip install --system -r requirements.txt + if [[ "${{ matrix.dependency-set }}" == "direct-install" ]]; then + uv pip install --system . + else + uv pip install --system --no-deps . + uv pip install --system -r requirements.txt + fi uv pip install --system pytest psutil # onnx is not supported on python 3.13 yet https://github.com/onnx/onnx/issues/6339 if [[ "${{ matrix.python-version }}" != "3.13" ]]; then diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index 559cf566b..51f47f155 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -11,24 +11,29 @@ from tabpfn import TabPFNClassifier, TabPFNRegressor -@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") -def test_onnx_missing_model_error(): - """Test that appropriate error is raised when trying to - use ONNX with a missing model. Here we specify a model path - that does not exist to simulate the case where the model - has not been compiled. - """ +# Common fixture to handle all the skip conditions for ONNX tests +@pytest.fixture(autouse=True, scope="module") +def check_onnx_compatible(): if os.name == "nt": pytest.skip("ONNX export is not tested on Windows") if sys.version_info >= (3, 13): pytest.xfail("ONNX is not yet supported on Python 3.13") - + if sys.version_info <= (3, 9): + pytest.skip("our onnx export doesn't work on python 3.9") try: import onnx # noqa: F401 import onnxruntime # noqa: F401 except ImportError: pytest.skip("ONNX or ONNX Runtime not available") + +@pytest.mark.filterwarnings("ignore::torch.jit.TracerWarning") +def test_onnx_missing_model_error(): + """Test that appropriate error is raised when trying to + use ONNX with a missing model. Here we specify a model path + that does not exist to simulate the case where the model + has not been compiled. + """ # Generate synthetic data rng = np.random.default_rng() X = rng.standard_normal((50, 10)).astype(np.float32) @@ -55,17 +60,6 @@ def test_onnx_export_and_inference(): """Test that TabPFN models can be exported to ONNX and produce correct predictions. """ - if os.name == "nt": - pytest.skip("ONNX export is not tested on Windows") - if sys.version_info >= (3, 13): - pytest.xfail("ONNX is not yet supported on Python 3.13") - - try: - import onnx # noqa: F401 - import onnxruntime # noqa: F401 - except ImportError: - pytest.skip("ONNX or ONNX Runtime not available") - from tabpfn.misc.compile_to_onnx import compile_onnx_models # Compile the model to ONNX format (using default output directory) @@ -121,17 +115,6 @@ def test_onnx_session_reuse(which: Literal["classifier", "regressor"]): """Test that the ONNX session is reused when fitting a model multiple times with the same model path and device. """ - if os.name == "nt": - pytest.skip("ONNX export is not tested on Windows") - if sys.version_info >= (3, 13): - pytest.xfail("ONNX is not yet supported on Python 3.13") - - try: - import onnx # noqa: F401 - import onnxruntime # noqa: F401 - except ImportError: - pytest.skip("ONNX or ONNX Runtime not available") - # Generate synthetic data rng = np.random.default_rng(42) X1 = rng.standard_normal((50, 10)).astype(np.float32) @@ -203,17 +186,6 @@ def mock_print(*args, **kwargs): @pytest.mark.parametrize("which", ["classifier", "regressor"]) def test_onnx_deterministic(which: Literal["classifier", "regressor"]): """Test that TabPFN models using ONNX are deterministic when using the same seed.""" - if os.name == "nt": - pytest.skip("ONNX export is not tested on Windows") - if sys.version_info >= (3, 13): - pytest.xfail("ONNX is not yet supported on Python 3.13") - - try: - import onnx # noqa: F401 - import onnxruntime # noqa: F401 - except ImportError: - pytest.skip("ONNX or ONNX Runtime not available") - from tabpfn.misc.compile_to_onnx import compile_onnx_models # Compile the model to ONNX format if needed @@ -287,15 +259,7 @@ def test_cuda_provider_missing_error(model_class): """Test that TabPFN models raise the correct error when trying to use CUDA without CUDAExecutionProvider available in ONNX Runtime. """ - if os.name == "nt": - pytest.skip("ONNX export is not tested on Windows") - if sys.version_info >= (3, 13): - pytest.xfail("ONNX is not yet supported on Python 3.13") - - try: - import onnxruntime as ort - except ImportError: - pytest.skip("ONNX Runtime not available") + import onnxruntime as ort # Generate synthetic data rng = np.random.default_rng(42) From 9acec27abd5f7c580346112a2e51f975346bd0db Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Tue, 25 Mar 2025 10:53:16 +0100 Subject: [PATCH 19/20] fix tests --- src/tabpfn/misc/compile_to_onnx.py | 15 +++++++++------ src/tabpfn/regressor.py | 2 +- tests/test_export_onnx.py | 3 +-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index 1137bf57c..89e902069 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -240,7 +240,7 @@ def forward( def export_model( - output_path: str, + output_path: Path, model_type: str = "classifier", ) -> None: """Export the TabPFN model to the ONNX format. @@ -321,7 +321,7 @@ def export_model( ) -def check_onnx_model(model_path: str) -> None: +def check_onnx_model(model_path: Path) -> None: """Validate the ONNX model. Loads the ONNX model and runs a checker to ensure that the model is valid. @@ -333,7 +333,7 @@ def check_onnx_model(model_path: str) -> None: onnx.checker.check_model(onnx_model) # Check if the model is valid -def check_input_names(model_path: str) -> None: +def check_input_names(model_path: Path) -> None: """Load the ONNX model to check its input names. Args: @@ -445,9 +445,12 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None: """ classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True) regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True) - # add suffix to the file names - classifier_path = classifier_path.stem + suffix + ".onnx" - regressor_path = regressor_path.stem + suffix + ".onnx" + + # Add suffix before the .onnx extension + stem = classifier_path.stem + classifier_path = classifier_path.with_name(f"{stem}{suffix}").with_suffix(".onnx") + stem = regressor_path.stem + regressor_path = regressor_path.with_name(f"{stem}{suffix}").with_suffix(".onnx") export_model(classifier_path, "classifier") check_onnx_model(classifier_path) diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py index 5421bcc85..b3ae7ffa3 100644 --- a/src/tabpfn/regressor.py +++ b/src/tabpfn/regressor.py @@ -460,7 +460,7 @@ def fit(self, X: XType, y: YType) -> Self: # Initialize bardist_ for ONNX mode # TODO: faster way to do this _, self.config_, self.bardist_ = initialize_tabpfn_model( - model_path=model_path.with_stem(model_path.stem).with_suffix(".ckpt"), + model_path=model_path.with_suffix(".ckpt"), which="regressor", fit_mode=self.fit_mode, static_seed=static_seed, diff --git a/tests/test_export_onnx.py b/tests/test_export_onnx.py index 51f47f155..057ca3033 100644 --- a/tests/test_export_onnx.py +++ b/tests/test_export_onnx.py @@ -11,14 +11,13 @@ from tabpfn import TabPFNClassifier, TabPFNRegressor -# Common fixture to handle all the skip conditions for ONNX tests @pytest.fixture(autouse=True, scope="module") def check_onnx_compatible(): if os.name == "nt": pytest.skip("ONNX export is not tested on Windows") if sys.version_info >= (3, 13): pytest.xfail("ONNX is not yet supported on Python 3.13") - if sys.version_info <= (3, 9): + if sys.version_info < (3, 10): pytest.skip("our onnx export doesn't work on python 3.9") try: import onnx # noqa: F401 From 0f1848deb157d5d042e64cdb5c37c22c6384f151 Mon Sep 17 00:00:00 2001 From: LeoGrin Date: Tue, 25 Mar 2025 10:59:47 +0100 Subject: [PATCH 20/20] fail nicely if someones try to export to onnx on python3.9 --- src/tabpfn/misc/compile_to_onnx.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/tabpfn/misc/compile_to_onnx.py b/src/tabpfn/misc/compile_to_onnx.py index 89e902069..4cac826a6 100644 --- a/src/tabpfn/misc/compile_to_onnx.py +++ b/src/tabpfn/misc/compile_to_onnx.py @@ -7,6 +7,7 @@ from __future__ import annotations +import sys from pathlib import Path import numpy as np @@ -31,6 +32,20 @@ def _check_cuda_provider(device: torch.device) -> None: ) +def _check_onnx_setup() -> None: + try: + import onnx # noqa: F401 + except ImportError: + raise ImportError( + "ONNX is not installed. " "Please install it using `pip install onnx`." + ) from None + if sys.version_info < (3, 10): + raise ValueError( + "TabPFN ONNX export is not yet supported on Python 3.9. " + "Please upgrade to Python 3.10 or higher." + ) from None + + class ONNXModelWrapper: """Wrap ONNX model to match the PyTorch model interface.""" @@ -443,6 +458,8 @@ def compile_onnx_models(suffix: str = "", *, skip_test: bool = False) -> None: suffix: The suffix to append to the file names of the ONNX models. skip_test: Whether to skip the performance test of the ONNX models. """ + _check_onnx_setup() + classifier_path, _, _ = resolve_model_path(None, "classifier", "v2", use_onnx=True) regressor_path, _, _ = resolve_model_path(None, "regressor", "v2", use_onnx=True)