automl
diff --git a/‎README.md‎
Lines changed: 10 additions & 7 deletions b/‎README.md‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎nanotabpfn/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎nanotabpfn/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎nanotabpfn/callbacks.py‎
Lines changed: 90 additions & 0 deletions b/‎nanotabpfn/callbacks.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎nanotabpfn/evaluation.py‎
Lines changed: 163 additions & 0 deletions b/‎nanotabpfn/evaluation.py‎
Lines changed: 163 additions & 0 deletions
@@ -48,7 +48,11 @@ python pretrain_classification.py -epochs 80 -steps 25 -batchsize 50 -priordump
 ```
 This should take less than 5 min on a modern NVIDIA GPU (around 10 minutes on Macbook M4 Pro GPU and around 40 min on M4 Pro CPU).
 
-#### Step by Step Explanation
+We also offer a pre-generated dataset containing 1.28M tables with 50 datapoints and 3 features each for regression [here](https://ml.informatik.uni-freiburg.de/research-artifacts/pfefferle/nanoTabPFN/50x3_1280k_regression.h5).
+
+You can pretrain on it using `python pretrain_regressor.py`.
+
+#### Step by Step Explanation (Classifier)
 
 First we import our Architecture, Prior interface and training loop, etc.
 ```python
@@ -58,6 +62,7 @@ from nanotabpfn.train import train
 from nanotabpfn.utils import get_default_device
 from nanotabpfn.interface import NanoTabPFNClassifier
 from torch.nn import CrossEntropyLoss
+from nanotabpfn.callbacks import ConsoleLoggerCallback
 ```
 then we instantiate our model and loss criterion:
 ```python
@@ -77,17 +82,15 @@ prior = PriorDumpDataLoader(filename='50x3_3_100k_classification.h5', num_steps=
 ```
 and finally train our model:
 ```python
-def epoch_callback(epoch, epoch_time, mean_loss, model):
-    classifier = NanoTabPFNClassifier(model, device)
-    # you can add your own eval code here that runs after every epoch
-    print(f'epoch {epoch:5d} | time {epoch_time:5.2f}s | mean loss {mean_loss:5.2f}', flush=True)
-
 trained_model, loss = train(
     model=model,
     prior=prior,
     criterion=criterion,
     epochs=80,
     device=device,
-    epoch_callback=epoch_callback
+    callbacks=[ConsoleLoggerCallback()]
 )
 ```
+
+### Creating your own datasets
+Check out the [tabularpriors](https://github.com/automl/tabularpriors/) repository to create your own data using publicly available priors.
@@ -1 +1 @@
-from nanotabpfn.interface import NanoTabPFNClassifier
+from nanotabpfn.interface import NanoTabPFNClassifier, NanoTabPFNRegressor
@@ -0,0 +1,90 @@
+from abc import ABC, abstractmethod
+
+
+class Callback(ABC):
+    """ Abstract base class for callbacks."""
+
+    @abstractmethod
+    def on_epoch_end(self, epoch: int, epoch_time: float, loss: float, model, **kwargs):
+        """
+        Called at the end of each epoch.
+
+        Args:
+            epoch (int): The current epoch number.
+            epoch_time (float): Time of the epoch in seconds.
+            loss (float): Mean loss for the epoch.
+            model: The model being trained.
+            **kwargs: Additional arguments.
+        """
+        pass
+
+    @abstractmethod
+    def close(self):
+        """
+        Called to release any resources or perform cleanup.
+        """
+        pass
+
+
+class BaseLoggerCallback(Callback):
+    """ Abstract base class for logger callbacks. """
+    pass
+
+
+class ConsoleLoggerCallback(BaseLoggerCallback):
+    """ Logger callback that prints epoch information to the console. """
+
+    def on_epoch_end(self, epoch: int, epoch_time: float, loss: float, model, **kwargs):
+        print(f'Epoch {epoch:5d} | Time {epoch_time:5.2f}s | Mean Loss {loss:5.2f}', flush=True)
+
+    def close(self):
+        """ Nothing to clean up for print logger. """
+        pass
+
+
+class TensorboardLoggerCallback(BaseLoggerCallback):
+    """ Logger callback that logs epoch information to TensorBoard. """
+
+    def __init__(self, log_dir: str):
+        from torch.utils.tensorboard import SummaryWriter
+        self.writer = SummaryWriter(log_dir=log_dir)
+
+    def on_epoch_end(self, epoch: int, epoch_time: float, loss: float, model, **kwargs):
+        self.writer.add_scalar('Loss/train', loss, epoch)
+        self.writer.add_scalar('Time/epoch', epoch_time, epoch)
+
+    def close(self):
+        self.writer.close()
+
+
+class WandbLoggerCallback(BaseLoggerCallback):
+    """ Logger callback that logs epoch information to Weights & Biases. """
+
+    def __init__(self, project: str, name: str = None, config: dict = None, log_dir: str = None):
+        """
+        Initializes a WandbLoggerCallback.
+
+        Args:
+            project (str): The name of the wandb project.
+            name (str, optional): The name of the run. Defaults to None.
+            config (dict, optional): Configuration dictionary for the run. Defaults to None.
+            log_dir (str, optional): Directory to save wandb logs. Defaults to None.
+        """
+        try:
+            import wandb
+            self.wandb = wandb  # store wandb module to avoid import if not used
+            wandb.init(
+                project=project,
+                name=name,
+                config=config,
+                dir=log_dir,
+            )
+        except ImportError:
+            raise ImportError("wandb is not installed. Install it with: pip install wandb") from e
+
+    def on_epoch_end(self, epoch: int, epoch_time: float, loss: float, model, **kwargs):
+        log_dict = {'epoch': epoch, 'loss': loss, ' epoch_time': epoch_time}
+        self.wandb.log(log_dict)
+
+    def close(self):
+        self.wandb.finish()
@@ -0,0 +1,163 @@
+import argparse
+
+import numpy as np
+import openml
+import torch
+from openml.config import set_root_cache_directory
+from openml.tasks import TaskType
+from sklearn.metrics import balanced_accuracy_score, roc_auc_score, r2_score
+from sklearn.preprocessing import LabelEncoder
+
+from nanotabpfn.interface import NanoTabPFNRegressor, NanoTabPFNClassifier
+
+TOY_TASKS_REGRESSION = [
+362443, # diabetes
+]
+
+TOY_TASKS_CLASSIFICATION = [
+    59, # iris
+    2382, # wine
+    9946, # breast_cancer
+]
+
+@torch.no_grad()
+def get_openml_predictions(
+        *,
+        model: NanoTabPFNRegressor | NanoTabPFNClassifier,
+        tasks: list[int] | str = "tabarena-v0.1",
+        max_n_features=500,
+        max_n_instances=10_000,
+        classification: bool | None = None,
+        cache_directory: str | None = None,
+):
+    """
+    Evaluates a model on a set of OpenML tasks and returns predictions.
+
+    Retrieves datasets from OpenML, applies preprocessing, and evaluates the given model on each task.
+    Returns true targets, predicted labels, and predicted probabilities for each dataset.
+
+    Args:
+        model (NanoTabPFNRegressor | NanoTabPFNClassifier): A scikit-learn compatible model or classifier to be evaluated.
+        tasks (list[int] | str, optional): A list of OpenML task IDs or the name of a benchmark suite.
+        max_n_features (int, optional): Maximum number of features allowed for a task. Tasks exceeding this limit are skipped.
+        max_n_instances (int, optional): Maximum number of instances allowed for a task. Tasks exceeding this limit are skipped.
+        classification (bool | None, optional): Whether the model is a classifier (True) or regressor (False). If None, it is inferred from the model type.
+        cache_directory (str | None, optional): Directory to save OpenML data. If None, default cache path is used.
+    Returns:
+        dict: A dictionary where keys are dataset names and values are tuples of (true targets, predicted labels, predicted probabilities).
+    """
+    if classification is None:
+        classification = isinstance(model, NanoTabPFNClassifier)
+
+    if cache_directory is not None:
+        set_root_cache_directory(cache_directory)
+
+    if isinstance(tasks, str):
+        benchmark_suite = openml.study.get_suite(tasks)
+        task_ids = benchmark_suite.tasks
+    else:
+        task_ids = tasks
+
+    dataset_predictions = {}
+
+    for task_id in task_ids:
+        task = openml.tasks.get_task(task_id, download_splits=False)
+
+        if classification and task.task_type_id != TaskType.SUPERVISED_CLASSIFICATION:
+            continue # skip task, only classification
+        if not classification and task.task_type_id != TaskType.SUPERVISED_REGRESSION:
+            continue # skip task, only regression
+
+        dataset = task.get_dataset(download_data=False)
+
+        n_features = dataset.qualities["NumberOfFeatures"]
+        n_instances = dataset.qualities["NumberOfInstances"]
+        if n_features > max_n_features or n_instances > max_n_instances:
+            continue  # skip task, too big
+
+        _, folds, _ = task.get_split_dimensions()
+        tabarena_light = True
+        if tabarena_light:
+            folds = 1 # code supports multiple folds but tabarena_light only has one
+        repeat = 0 # code only supports one repeat
+        targets = []
+        predictions = []
+        probabilities = []
+        for fold in range(folds):
+            X, y, categorical_indicator, attribute_names = dataset.get_data(
+                target=task.target_name, dataset_format="dataframe"
+            )
+            train_indices, test_indices = task.get_train_test_split_indices(
+                fold=fold, repeat=repeat
+            )
+            X_train = X.iloc[train_indices].to_numpy()
+            y_train = y.iloc[train_indices].to_numpy()
+            X_test = X.iloc[test_indices].to_numpy()
+            y_test = y.iloc[test_indices].to_numpy()
+
+            if classification:
+                label_encoder = LabelEncoder()
+                y_train = label_encoder.fit_transform(y_train)
+                y_test = label_encoder.transform(y_test)
+            targets.append(y_test)
+
+            model.fit(X_train, y_train)
+            y_pred = model.predict(X_test)
+            predictions.append(y_pred)
+            if classification:
+                y_proba = model.predict_proba(X_test)
+                if y_proba.shape[1] == 2:  # binary classification
+                    y_proba = y_proba[:, 1]
+                probabilities.append(y_proba)
+
+        y_pred = np.concatenate(predictions, axis=0)
+        targets = np.concatenate(targets, axis=0)
+        probabilities = np.concatenate(probabilities, axis=0) if len(probabilities) > 0 else None
+        dataset_predictions[str(dataset.name)] = (targets, y_pred, probabilities)
+    return dataset_predictions
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-model_type", type=str, choices=["regression", "classification"], required=True,
+                        help="Whether to use the regressor or classifier model")
+    parser.add_argument("-checkpoint", type=str, default=None,
+                        help="Path to load the model weights from. If None, default weights are used.")
+    parser.add_argument("-dist_path", type=str, default=None,
+                        help="Path to load the bucket edges for the support bar distribution from. Only needed for regression.")
+    parser.add_argument("-tasks", type=str, default="tabarena-v0.1",
+                        choices=["tabarena-v0.1", "toy_tasks"], help="Which OpenML tasks to evaluate on.")
+    parser.add_argument("-cache_directory", type=str, default=None,
+                        help="Directory to save OpenML data. If None, default cache path is used.")
+    parser.add_argument("-max_n_features", type=int, default=500,
+                        help="Maximum number of features allowed for a task. Tasks exceeding this limit are skipped.")
+    parser.add_argument("-max_n_instances", type=int, default=10_000,
+                        help="Maximum number of instances allowed for a task. Tasks exceeding this limit are skipped.")
+    args = parser.parse_args()
+
+    if args.model_type == "classification":
+        model = NanoTabPFNClassifier(model=args.checkpoint)
+    else:
+        model = NanoTabPFNRegressor(model=args.checkpoint, dist=args.dist_path)
+    model.model.eval()
+
+    if args.tasks == "toy_tasks" and args.model_type == "regression":
+        tasks = TOY_TASKS_REGRESSION
+    elif args.tasks == "toy_tasks" and args.model_type == "classification":
+        tasks = TOY_TASKS_CLASSIFICATION
+    else:
+        tasks = args.tasks
+
+    predictions = get_openml_predictions(
+        model=model, tasks=tasks, max_n_features=args.max_n_features, max_n_instances=args.max_n_instances,
+        classification=(args.model_type=="classification"), cache_directory=args.cache_directory
+    )
+
+    for dataset_name, (y_true, y_pred, y_proba) in predictions.items():
+        if args.model_type == "classification":
+            acc = balanced_accuracy_score(y_true, y_pred)
+            auc = roc_auc_score(y_true, y_proba, multi_class='ovr')
+            print(f"Dataset: {dataset_name} | ROC AUC: {auc:.4f} | Balanced Accuracy: {acc:.4f}")
+        else:
+            r2 = r2_score(y_true, y_pred)
+            print(f"Dataset: {dataset_name} | R2: {r2:.4f}")
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from nanotabpfn.interface import NanoTabPFNClassifier`
	`1`	`+from nanotabpfn.interface import NanoTabPFNClassifier, NanoTabPFNRegressor`