PriorLabs
diff --git a/‎src/tabpfn/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/tabpfn/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tabpfn/base.py‎
Lines changed: 36 additions & 0 deletions b/‎src/tabpfn/base.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎src/tabpfn/classifier.py‎
Lines changed: 16 additions & 6 deletions b/‎src/tabpfn/classifier.py‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎src/tabpfn/inference.py‎
Lines changed: 6 additions & 1 deletion b/‎src/tabpfn/inference.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/tabpfn/misc/__init__.py‎ b/‎src/tabpfn/misc/__init__.py‎
diff --git a/‎src/tabpfn/debug_versions.py‎ renamed to ‎src/tabpfn/misc/debug_versions.py‎ b/‎src/tabpfn/debug_versions.py‎ renamed to ‎src/tabpfn/misc/debug_versions.py‎
@@ -1,7 +1,7 @@
 from importlib.metadata import version
 
 from tabpfn.classifier import TabPFNClassifier
-from tabpfn.debug_versions import display_debug_info
+from tabpfn.misc.debug_versions import display_debug_info
 from tabpfn.regressor import TabPFNRegressor
 
 try:
 
@@ -33,6 +33,7 @@
 if TYPE_CHECKING:
     import numpy as np
 
+    from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
     from tabpfn.model.bar_distribution import FullSupportBarDistribution
     from tabpfn.model.config import InferenceConfig
     from tabpfn.model.transformer import PerFeatureTransformer
@@ -111,6 +112,36 @@ def initialize_tabpfn_model(
     return model, config_, bar_distribution
 
 
+def load_onnx_model(
+    model_path: str | Path,
+) -> ONNXModelWrapper:
+    """Load a TabPFN model in ONNX format.
+
+    Args:
+        model_path: Path to the ONNX model file.
+
+    Returns:
+        The loaded ONNX model wrapped in a PyTorch-compatible interface.
+
+    Raises:
+        ImportError: If onnxruntime is not installed.
+        FileNotFoundError: If the model file doesn't exist.
+    """
+    try:
+        from tabpfn.misc.onnx_wrapper import ONNXModelWrapper
+    except ImportError as err:
+        raise ImportError(
+            "onnxruntime is required to load ONNX models. "
+            "Install it with: pip install onnxruntime",
+        ) from err
+
+    model_path = Path(model_path)
+    if not model_path.exists():
+        raise FileNotFoundError(f"ONNX model not found at: {model_path}")
+
+    return ONNXModelWrapper(str(model_path))
+
+
 def determine_precision(
     inference_precision: torch.dtype | Literal["autocast", "auto"],
     device_: torch.device,
@@ -168,6 +199,7 @@ def create_inference_engine(  # noqa: PLR0913
     forced_inference_dtype_: torch.dtype | None,
     memory_saving_mode: bool | Literal["auto"] | float | int,
     use_autocast_: bool,
+    use_onnx: bool = False,
 ) -> InferenceEngine:
     """Creates the appropriate TabPFN inference engine based on `fit_mode`.
 
@@ -190,6 +222,7 @@ def create_inference_engine(  # noqa: PLR0913
         forced_inference_dtype_: If not None, the forced dtype for inference.
         memory_saving_mode: GPU/CPU memory saving settings.
         use_autocast_: Whether we use torch.autocast for inference.
+        use_onnx: Whether to use ONNX runtime for model inference.
     """
     engine: (
         InferenceEngineOnDemand
@@ -208,6 +241,7 @@ def create_inference_engine(  # noqa: PLR0913
             dtype_byte_size=byte_size,
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
+            use_onnx=use_onnx,
         )
     elif fit_mode == "fit_preprocessors":
         engine = InferenceEngineCachePreprocessing.prepare(
@@ -221,6 +255,7 @@ def create_inference_engine(  # noqa: PLR0913
             dtype_byte_size=byte_size,
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
+            use_onnx=use_onnx,
         )
     elif fit_mode == "fit_with_cache":
         engine = InferenceEngineCacheKV.prepare(
@@ -236,6 +271,7 @@ def create_inference_engine(  # noqa: PLR0913
             force_inference_dtype=forced_inference_dtype_,
             save_peak_mem=memory_saving_mode,
             autocast=use_autocast_,
+            use_onnx=use_onnx,
         )
     else:
         raise ValueError(f"Invalid fit_mode: {fit_mode}")
 
@@ -32,6 +32,7 @@
     create_inference_engine,
     determine_precision,
     initialize_tabpfn_model,
+    load_onnx_model,
 )
 from tabpfn.config import ModelInterfaceConfig
 from tabpfn.constants import (
@@ -148,6 +149,7 @@ def __init__(  # noqa: PLR0913
         random_state: int | np.random.RandomState | np.random.Generator | None = 0,
         n_jobs: int = -1,
         inference_config: dict | ModelInterfaceConfig | None = None,
+        use_onnx: bool = False,
     ) -> None:
         """A TabPFN interface for classification.
 
@@ -337,6 +339,9 @@ def __init__(  # noqa: PLR0913
                 - If `dict`, the key-value pairs are used to update the default
                   `ModelInterfaceConfig`. Raises an error if an unknown key is passed.
                 - If `ModelInterfaceConfig`, the object is used as the configuration.
+
+            use_onnx:
+                Whether to use an ONNX compiled model.
         """
         super().__init__()
         self.n_estimators = n_estimators
@@ -359,6 +364,7 @@ def __init__(  # noqa: PLR0913
         self.random_state = random_state
         self.n_jobs = n_jobs
         self.inference_config = inference_config
+        self.use_onnx = use_onnx
 
     # TODO: We can remove this from scikit-learn lower bound of 1.6
     def _more_tags(self) -> dict[str, Any]:
@@ -383,12 +389,15 @@ def fit(self, X: XType, y: YType) -> Self:
         static_seed, rng = infer_random_state(self.random_state)
 
         # Load the model and config
-        self.model_, self.config_, _ = initialize_tabpfn_model(
-            model_path=self.model_path,
-            which="classifier",
-            fit_mode=self.fit_mode,
-            static_seed=static_seed,
-        )
+        if self.use_onnx:
+            self.model_ = load_onnx_model("model_classifier.onnx")
+        else:
+            self.model_, self.config_, _ = initialize_tabpfn_model(
+                model_path=self.model_path,
+                which="classifier",
+                fit_mode=self.fit_mode,
+                static_seed=static_seed,
+            )
 
         # Determine device and precision
         self.device_ = infer_device_and_type(self.device)
@@ -500,6 +509,7 @@ def fit(self, X: XType, y: YType) -> Self:
             forced_inference_dtype_=self.forced_inference_dtype_,
             memory_saving_mode=self.memory_saving_mode,
             use_autocast_=self.use_autocast_,
+            use_onnx=self.use_onnx,
         )
 
         return self
 
@@ -219,9 +219,10 @@ class InferenceEngineCachePreprocessing(InferenceEngine):
     preprocessors: Sequence[SequentialFeatureTransformer]
     model: PerFeatureTransformer
     force_inference_dtype: torch.dtype | None
+    use_onnx: bool = False
 
     @classmethod
-    def prepare(
+    def prepare(  # noqa: PLR0913
         cls,
         X_train: np.ndarray,
         y_train: np.ndarray,
@@ -234,6 +235,7 @@ def prepare(
         dtype_byte_size: int,
         force_inference_dtype: torch.dtype | None,
         save_peak_mem: bool | Literal["auto"] | float | int,
+        use_onnx: bool = False,
     ) -> InferenceEngineCachePreprocessing:
         """Prepare the inference engine.
 
@@ -248,6 +250,7 @@ def prepare(
             dtype_byte_size: The byte size of the dtype.
             force_inference_dtype: The dtype to force inference to.
             save_peak_mem: Whether to save peak memory usage.
+            use_onnx: Whether to use ONNX for inference.
 
         Returns:
             The prepared inference engine.
@@ -272,6 +275,7 @@ def prepare(
             dtype_byte_size=dtype_byte_size,
             force_inference_dtype=force_inference_dtype,
             save_peak_mem=save_peak_mem,
+            use_onnx=use_onnx,
         )
 
     @override
@@ -315,6 +319,7 @@ def iter_outputs(
                 device=device,
                 dtype_byte_size=self.dtype_byte_size,
                 safety_factor=1.2,  # TODO(Arjun): make customizable
+                use_onnx=self.use_onnx,
             )
 
             style = None