diff --git a/.gitignore b/.gitignore index f91d209dc..febccef61 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,5 @@ notebook_test_stats.csv **/*.quarto_ipynb **/.jupyter_cache + +scripts/* diff --git a/CHANGELOG.md b/CHANGELOG.md index cd2151ae2..34eced53b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment - Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, `MultiTaskGP`, and `RobustSingleTaskGP`) and corresponding linear/polynomial wrappers. - Optional `initial_value` field on the `GreaterThan`, `LessThan`, and `Positive` prior constraint data models (already present on `Interval`), letting users opt-in to a warm-start of the constrained gpytorch parameter at construction time. - Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`. +- `PairwiseGPSurrogate`, a Gaussian process surrogate that learns a latent utility function from pairwise preference/comparison data, wrapping BoTorch's `PairwiseGP`. The pairwise likelihood is selectable via `likelihood="probit"` (default) or `"logit"`. +- `SmoothedBoxPrior` prior, and a concrete instantiable `Interval` prior constraint. - Aggregation of duplicated experiments in the `cross_validate` method of trainable surrogates to avoid data leakage, controlled via the `aggregate` boolean flag, default `False`. ### Changed diff --git a/_quarto.yml b/_quarto.yml index 3a3478c4c..16fd1c03b 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -222,6 +222,7 @@ website: - docs/tutorials/advanced_examples/transfer_learning_bo.qmd - docs/tutorials/advanced_examples/octane_number.qmd - docs/tutorials/advanced_examples/llm_molecular.qmd + - docs/tutorials/advanced_examples/pairwise_gp.qmd - section: "Benchmarks" contents: - docs/tutorials/benchmarks/index.qmd diff --git a/bofire/data_models/priors/api.py b/bofire/data_models/priors/api.py index 7bdec0ffd..2c0db0772 100644 --- a/bofire/data_models/priors/api.py +++ b/bofire/data_models/priors/api.py @@ -23,6 +23,7 @@ NormalPrior, ) from bofire.data_models.priors.prior import Prior +from bofire.data_models.priors.smoothedbox import SmoothedBoxPrior from bofire.data_models.unions import tagged_union @@ -32,11 +33,13 @@ LKJPrior, LogNormalPrior, DimensionalityScaledLogNormalPrior, + SmoothedBoxPrior, ] AnyPrior = tagged_union(*_PRIOR_TYPES) _PRIOR_CONSTRAINT_TYPES: list[type] = [ + Interval, NonTransformedInterval, LogTransformedInterval, Positive, @@ -49,7 +52,9 @@ # these are priors that are generally applicable # and do not depend on problem specific extra parameters -AnyGeneralPrior = tagged_union(GammaPrior, NormalPrior, LKJPrior, LogNormalPrior) +AnyGeneralPrior = tagged_union( + GammaPrior, NormalPrior, LKJPrior, LogNormalPrior, SmoothedBoxPrior +) # default priors of interest # botorch defaults @@ -72,7 +77,7 @@ sd_prior=GammaPrior(concentration=2.0, rate=0.15), ) -# prior for RobustSingleTaskGPSurrogate +# priors for RobustSingleTaskGPSurrogate ROBUSTGP_LENGTHSCALE_CONSTRAINT = partial( NonTransformedInterval, lower_bound=0.05, @@ -87,6 +92,34 @@ initial_value=0.1, ) + +# Priors for PairwiseGPSurrogate based on botorch defaults +PAIRWISEGP_LENGTHSCALE_PRIOR = partial( + GammaPrior, + concentration=2.4, + rate=2.7, +) + +PAIRWISEGP_LENGTHSCALE_CONSTRAINT = partial( + GreaterThan, + lower_bound=1e-4, + initial_value=0.5185, # mode of the lengthscale GammaPrior(2.4, 2.7) +) + +PAIRWISEGP_OUTPUTSCALE_PRIOR = partial( + SmoothedBoxPrior, + lower_bound=0.01, + upper_bound=100, + sigma=0.01, +) + +PAIRWISEGP_OUTPUTSCALE_CONSTRAINT = partial( + Interval, + lower_bound=5e-3, + upper_bound=200, + initial_value=1, +) + # Hvarfner priors HVARFNER_NOISE_PRIOR = partial(LogNormalPrior, loc=-4, scale=1) HVARFNER_LENGTHSCALE_PRIOR = DimensionalityScaledLogNormalPrior diff --git a/bofire/data_models/priors/interval.py b/bofire/data_models/priors/interval.py index 7fafc8699..3295b24fb 100644 --- a/bofire/data_models/priors/interval.py +++ b/bofire/data_models/priors/interval.py @@ -1,4 +1,4 @@ -from typing import Any, Literal, Optional +from typing import Literal, Optional from pydantic import PositiveFloat, model_validator @@ -6,7 +6,7 @@ class Interval(PriorConstraint): - """Abstract Interval class. + """Interval constraint on a GP hyperparameter. It is used to define interval constraints on GP hyperparameters. @@ -19,7 +19,7 @@ class Interval(PriorConstraint): the raw parameter at its default (no warm-start). """ - type: Any + type: Literal["Interval"] = "Interval" lower_bound: PositiveFloat upper_bound: PositiveFloat initial_value: Optional[PositiveFloat] = None diff --git a/bofire/data_models/priors/smoothedbox.py b/bofire/data_models/priors/smoothedbox.py new file mode 100644 index 000000000..34189f58e --- /dev/null +++ b/bofire/data_models/priors/smoothedbox.py @@ -0,0 +1,37 @@ +from typing import Literal + +from pydantic import PositiveFloat, model_validator + +from bofire.data_models.priors.prior import Prior + + +class SmoothedBoxPrior(Prior): + """A smoothed approximation of a uniform prior. + + .. math:: + + \begin{equation*} + B = {x: a_i <= x_i <= b_i} + d(x, B) = min_{x' in B} |x - x'| + pdf(x) \\sim exp(- d(x, B)**2 / sqrt(2 * sigma^2)) + \\end{equation*} + + Attributes: + lower_bound: lower bound of the uniform prior + upper_bound: upper bound of the uniform prior + sigma: related to pdf(x) + + """ + + type: Literal["SmoothedBoxPrior"] = "SmoothedBoxPrior" + lower_bound: float + upper_bound: float + sigma: PositiveFloat = 0.01 + + @model_validator(mode="after") + def validate_bounds(self): + if self.lower_bound >= self.upper_bound: + raise ValueError( + "The lower bound must be less than the upper bound for an interval." + ) + return self diff --git a/bofire/data_models/surrogates/api.py b/bofire/data_models/surrogates/api.py index 062c1b18a..892d5b8fb 100644 --- a/bofire/data_models/surrogates/api.py +++ b/bofire/data_models/surrogates/api.py @@ -31,6 +31,7 @@ MultiTaskGPHyperconfig, MultiTaskGPSurrogate, ) +from bofire.data_models.surrogates.pairwise_gp import PairwiseGPSurrogate from bofire.data_models.surrogates.polynomial import PolynomialSurrogate from bofire.data_models.surrogates.random_forest import RandomForestSurrogate from bofire.data_models.surrogates.robust_single_task_gp import ( @@ -72,6 +73,7 @@ PiecewiseLinearGPSurrogate, AdditiveMapSaasSingleTaskGPSurrogate, EnsembleMapSaasSingleTaskGPSurrogate, + PairwiseGPSurrogate, ) AnyTrainableSurrogate = tagged_union( diff --git a/bofire/data_models/surrogates/botorch.py b/bofire/data_models/surrogates/botorch.py index e36d99cc4..2f0e129f8 100644 --- a/bofire/data_models/surrogates/botorch.py +++ b/bofire/data_models/surrogates/botorch.py @@ -1,7 +1,8 @@ from typing import Type -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator +from bofire.data_models.domain.api import EngineeredFeatures from bofire.data_models.domain.features import Inputs from bofire.data_models.enum import CategoricalEncodingEnum from bofire.data_models.features.api import ( @@ -37,6 +38,9 @@ class BotorchSurrogate(Surrogate): categorical_encodings: InputTransformSpecs = Field( default_factory=dict, validate_default=True ) + engineered_features: EngineeredFeatures = Field( + default_factory=lambda: EngineeredFeatures() + ) @field_validator("input_preprocessing_specs") @classmethod @@ -99,3 +103,8 @@ def validate_categorical_encodings(cls, v, info): v = cls._generate_default_categorical_encodings(inputs, v) inputs._validate_transform_specs(v) return v + + @model_validator(mode="after") + def validate_engineered_features(self): + self.engineered_features.validate_inputs(self.inputs) + return self diff --git a/bofire/data_models/surrogates/deterministic.py b/bofire/data_models/surrogates/deterministic.py index 31cee404e..02abb8b03 100644 --- a/bofire/data_models/surrogates/deterministic.py +++ b/bofire/data_models/surrogates/deterministic.py @@ -2,7 +2,6 @@ from pydantic import Field, field_validator, model_validator -from bofire.data_models.domain.api import EngineeredFeatures from bofire.data_models.features.api import ( AnyOutput, CategoricalInput, @@ -67,9 +66,6 @@ def validate_mapping(self): class LinearDeterministicSurrogate(BotorchSurrogate): type: Literal["LinearDeterministicSurrogate"] = "LinearDeterministicSurrogate" - engineered_features: EngineeredFeatures = Field( - default_factory=lambda: EngineeredFeatures() - ) coefficients: Annotated[Dict[str, float], Field(min_length=1)] intercept: float @@ -93,7 +89,7 @@ def validate_input_types(self): @field_validator("engineered_features") @classmethod - def validate_engineered_features(cls, engineered_features, info): + def validate_linear_engineered_features(cls, engineered_features, info): for feat in engineered_features.get(): if feat.n_transformed_inputs != 1: raise ValueError( diff --git a/bofire/data_models/surrogates/pairwise_gp.py b/bofire/data_models/surrogates/pairwise_gp.py new file mode 100644 index 000000000..3ba3c2a66 --- /dev/null +++ b/bofire/data_models/surrogates/pairwise_gp.py @@ -0,0 +1,68 @@ +from typing import Literal, Type + +from pydantic import Field, model_validator + +from bofire.data_models.features.api import AnyOutput, ContinuousOutput +from bofire.data_models.kernels.api import AnyKernel, RBFKernel, ScaleKernel +from bofire.data_models.priors.api import ( + PAIRWISEGP_LENGTHSCALE_CONSTRAINT, + PAIRWISEGP_LENGTHSCALE_PRIOR, + PAIRWISEGP_OUTPUTSCALE_CONSTRAINT, + PAIRWISEGP_OUTPUTSCALE_PRIOR, +) +from bofire.data_models.surrogates.botorch import BotorchSurrogate +from bofire.data_models.surrogates.scaler import AnyScaler, Normalize +from bofire.data_models.surrogates.trainable import TrainableSurrogate + + +class PairwiseGPSurrogate(BotorchSurrogate, TrainableSurrogate): + """Pairwise Gaussian Process surrogate built on top of BoTorch's PairwiseGP. + + Fits a latent utility function from binary winner/loser pair labels. The + `preferences` DataFrame references rows of the standard BoFire `experiments` + DataFrame by `labcode`; the single output feature represents the latent + utility inferred from those comparisons. + + Attributes: + likelihood: The pairwise likelihood linking latent-utility differences + to preference probabilities -- ``"probit"`` (Gaussian comparison + noise, BoTorch's default) or ``"logit"`` (logistic noise, i.e. the + Bradley-Terry model). + """ + + type: Literal["PairwiseGPSurrogate"] = "PairwiseGPSurrogate" + + kernel: AnyKernel = Field( + default_factory=lambda: ScaleKernel( + base_kernel=RBFKernel( + ard=True, + lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(), + lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(), + ), + outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(), + outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(), + ) + ) + scaler: AnyScaler = Field(default_factory=Normalize) + likelihood: Literal["probit", "logit"] = "probit" + + @classmethod + def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: + return isinstance(my_type, type(ContinuousOutput)) + + @model_validator(mode="after") + def validate_single_output(self): + if len(self.outputs) != 1: + raise ValueError( + "PairwiseGPSurrogate supports exactly one output (the latent utility)." + ) + return self + + @model_validator(mode="after") + def validate_scalekernel(self): + if not isinstance(self.kernel, ScaleKernel): + raise ValueError( + "PairwiseGPSurrogate.kernel must be a ScaleKernel " + "(BoTorch's PairwiseGP requires the covariance module to be a ScaleKernel)." + ) + return self diff --git a/bofire/data_models/surrogates/trainable.py b/bofire/data_models/surrogates/trainable.py index 631b0ddcd..1fd722beb 100644 --- a/bofire/data_models/surrogates/trainable.py +++ b/bofire/data_models/surrogates/trainable.py @@ -1,10 +1,10 @@ from typing import Annotated, Any, Literal, Optional import pandas as pd -from pydantic import Field, field_validator, model_validator +from pydantic import Field, field_validator from bofire.data_models.base import BaseModel -from bofire.data_models.domain.api import Domain, EngineeredFeatures, Inputs, Outputs +from bofire.data_models.domain.api import Domain, Inputs, Outputs from bofire.data_models.enum import RegressionMetricsEnum, UQRegressionMetricsEnum from bofire.data_models.features.api import ContinuousOutput from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective @@ -76,16 +76,6 @@ def _update_hyperparameters(surrogate_data, hyperparameters: pd.Series): class TrainableSurrogate(BaseModel): hyperconfig: Optional[Hyperconfig] = None - engineered_features: EngineeredFeatures = Field( - default_factory=lambda: EngineeredFeatures() - ) - - @model_validator(mode="after") - def validate_aggregations(self): - self.engineered_features.validate_inputs( - self.inputs # ty: ignore[unresolved-attribute] - ) - return self def update_hyperparameters(self, hyperparameters: pd.Series): if self.hyperconfig is not None: diff --git a/bofire/priors/mapper.py b/bofire/priors/mapper.py index f0b2a5eb9..d2f3d8e91 100644 --- a/bofire/priors/mapper.py +++ b/bofire/priors/mapper.py @@ -103,6 +103,25 @@ def map_DimensionalityScaledLogNormalPrior( ) +def map_SmoothedBoxPrior( + data_model: data_models.SmoothedBoxPrior, + **kwargs, +) -> gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior: + return gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior( + a=data_model.lower_bound, b=data_model.upper_bound, sigma=data_model.sigma + ) + + +def map_Interval( + data_model: data_models.Interval, +) -> gpytorch.constraints.Interval: + return gpytorch.constraints.Interval( + lower_bound=data_model.lower_bound, + upper_bound=data_model.upper_bound, + initial_value=data_model.initial_value, + ) + + def map_NonTransformedInterval( data_model: data_models.NonTransformedInterval, ) -> NonTransformedInterval: @@ -155,6 +174,8 @@ def map_LessThan( data_models.LKJPrior: map_LKJPrior, data_models.LogNormalPrior: map_LogNormalPrior, data_models.DimensionalityScaledLogNormalPrior: map_DimensionalityScaledLogNormalPrior, + data_models.SmoothedBoxPrior: map_SmoothedBoxPrior, + data_models.Interval: map_Interval, data_models.NonTransformedInterval: map_NonTransformedInterval, data_models.LogTransformedInterval: map_LogTransformedInterval, data_models.Positive: map_Positive, diff --git a/bofire/surrogates/api.py b/bofire/surrogates/api.py index dd3b149df..100c2f2f0 100644 --- a/bofire/surrogates/api.py +++ b/bofire/surrogates/api.py @@ -12,6 +12,8 @@ RegressionMLPEnsemble, ) from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate +from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate +from bofire.surrogates.pairwise_trainable import PairwiseTrainableSurrogate from bofire.surrogates.random_forest import RandomForestSurrogate from bofire.surrogates.shape import PiecewiseLinearGPSurrogate from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate diff --git a/bofire/surrogates/botorch.py b/bofire/surrogates/botorch.py index 83dc18187..28bf250b8 100644 --- a/bofire/surrogates/botorch.py +++ b/bofire/surrogates/botorch.py @@ -40,6 +40,7 @@ def __init__( self.categorical_encodings: InputTransformSpecs = ( data_model.categorical_encodings ) + self.engineered_features = data_model.engineered_features super().__init__(data_model=data_model, **kwargs) def _predict(self, transformed_X: pd.DataFrame): @@ -101,24 +102,6 @@ def loads(self, data: str): buffer = io.BytesIO(base64.b64decode(data.encode())) self.model = torch.load(buffer, weights_only=False) - -class TrainableBotorchSurrogate(BotorchSurrogate, TrainableSurrogate): - def __init__( - self, - data_model: TrainableDataModel, - input_transform: Optional[Union[InputTransform, None]] = None, - **kwargs, - ): - self.scaler = data_model.scaler - self.output_scaler = data_model.output_scaler - self.engineered_features = data_model.engineered_features - self._input_transform: Union[InputTransform, None] = input_transform - super().__init__(data_model=data_model, **kwargs) - - @property - def re_init_kwargs(self) -> dict: - return {"input_transform": self._input_transform} - def get_feature_indices( self, feature_keys: List[str], @@ -126,6 +109,10 @@ def get_feature_indices( """Returns the indices of the specified features (both original and engineered) after applying all input transforms. + Used as the ``features_to_idx_mapper`` when mapping feature-specific + kernels, so a kernel restricted to a subset of feature keys can resolve + them to tensor column indices. + Args: feature_keys: The feature keys to get the indices for. @@ -160,6 +147,23 @@ def get_feature_indices( ) return indices + +class TrainableBotorchSurrogate(BotorchSurrogate, TrainableSurrogate): + def __init__( + self, + data_model: TrainableDataModel, + input_transform: Optional[Union[InputTransform, None]] = None, + **kwargs, + ): + self.scaler = data_model.scaler + self.output_scaler = data_model.output_scaler + self._input_transform: Union[InputTransform, None] = input_transform + super().__init__(data_model=data_model, **kwargs) + + @property + def re_init_kwargs(self) -> dict: + return {"input_transform": self._input_transform} + def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, **kwargs): if self._input_transform is None: self._input_transform = get_input_transform( diff --git a/bofire/surrogates/deterministic.py b/bofire/surrogates/deterministic.py index 3871534c2..217134279 100644 --- a/bofire/surrogates/deterministic.py +++ b/bofire/surrogates/deterministic.py @@ -22,7 +22,6 @@ def __init__( ): self.intercept = data_model.intercept self.coefficients = data_model.coefficients - self.engineered_features = data_model.engineered_features super().__init__(data_model=data_model, **kwargs) self.model = AffineDeterministicModel( b=data_model.intercept, diff --git a/bofire/surrogates/mapper.py b/bofire/surrogates/mapper.py index 8270b070e..895f4bd74 100644 --- a/bofire/surrogates/mapper.py +++ b/bofire/surrogates/mapper.py @@ -18,6 +18,7 @@ ) from bofire.surrogates.mlp import ClassificationMLPEnsemble, RegressionMLPEnsemble from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate +from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate from bofire.surrogates.random_forest import RandomForestSurrogate from bofire.surrogates.robust_single_task_gp import RobustSingleTaskGPSurrogate from bofire.surrogates.shape import PiecewiseLinearGPSurrogate @@ -99,6 +100,7 @@ def map_MixedSingleTaskGPSurrogate( data_models.CategoricalDeterministicSurrogate: CategoricalDeterministicSurrogate, data_models.AdditiveMapSaasSingleTaskGPSurrogate: AdditiveMapSaasSingleTaskGPSurrogate, data_models.EnsembleMapSaasSingleTaskGPSurrogate: EnsembleMapSaasSingleTaskGPSurrogate, + data_models.PairwiseGPSurrogate: PairwiseGPSurrogate, } diff --git a/bofire/surrogates/pairwise_gp.py b/bofire/surrogates/pairwise_gp.py new file mode 100644 index 000000000..fd18a992d --- /dev/null +++ b/bofire/surrogates/pairwise_gp.py @@ -0,0 +1,73 @@ +from typing import Dict, Optional + +import botorch +import torch +from botorch.fit import fit_gpytorch_mll +from botorch.models.likelihoods.pairwise import ( + PairwiseLogitLikelihood, + PairwiseProbitLikelihood, +) +from botorch.models.pairwise_gp import PairwiseLaplaceMarginalLogLikelihood +from botorch.models.transforms.input import InputTransform + +import bofire.kernels.api as kernels +from bofire.data_models.surrogates.api import PairwiseGPSurrogate as DataModel +from bofire.surrogates.botorch import BotorchSurrogate +from bofire.surrogates.pairwise_trainable import PairwiseTrainableSurrogate + + +# maps the serializable likelihood name to the BoTorch PairwiseLikelihood class +PAIRWISE_LIKELIHOODS = { + "probit": PairwiseProbitLikelihood, + "logit": PairwiseLogitLikelihood, +} + + +class PairwiseGPSurrogate(BotorchSurrogate, PairwiseTrainableSurrogate): + def __init__( + self, + data_model: DataModel, + **kwargs, + ): + self.kernel = data_model.kernel + self.scaler = data_model.scaler + self.likelihood = data_model.likelihood + super().__init__(data_model=data_model, **kwargs) + + model: Optional[botorch.models.PairwiseGP] = None + training_specs: Dict = {} + + def _fit_pairwise( + self, + datapoints: torch.Tensor, + comparisons: torch.Tensor, + input_transform: Optional[InputTransform] = None, + **kwargs, + ): + if input_transform is not None: + n_dim = input_transform(datapoints).shape[-1] + else: + n_dim = datapoints.shape[-1] + + self.model = botorch.models.PairwiseGP( + datapoints=datapoints, + comparisons=comparisons, + likelihood=PAIRWISE_LIKELIHOODS[self.likelihood](), + covar_module=kernels.map( + self.kernel, + batch_shape=torch.Size(), + active_dims=list(range(n_dim)), + features_to_idx_mapper=self.get_feature_indices, + ), + input_transform=input_transform, + ) + + mll = PairwiseLaplaceMarginalLogLikelihood( + likelihood=self.model.likelihood, + model=self.model, + ) + fit_gpytorch_mll(mll, options=self.training_specs, max_attempts=50) + + # `_predict` is inherited from BotorchSurrogate: it calls + # `posterior(X, observation_noise=True)`, and PairwiseGP ignores + # `observation_noise` (verified in scripts/pairwise_gp_checks.py). diff --git a/bofire/surrogates/pairwise_trainable.py b/bofire/surrogates/pairwise_trainable.py new file mode 100644 index 000000000..801cbc2d1 --- /dev/null +++ b/bofire/surrogates/pairwise_trainable.py @@ -0,0 +1,170 @@ +import warnings +from abc import ABC, abstractmethod +from typing import Callable, Dict, Optional + +import numpy as np +import pandas as pd +import torch +from botorch.models.transforms.input import InputTransform + +from bofire.data_models.domain.api import EngineeredFeatures +from bofire.data_models.domain.features import Inputs, Outputs +from bofire.data_models.surrogates.scaler import AnyScaler +from bofire.data_models.types import InputTransformSpecs +from bofire.surrogates.utils import get_input_transform +from bofire.utils.torch_tools import tkwargs + + +class PairwiseTrainableSurrogate(ABC): + """Mixin for surrogates that train on pairwise preference data. + + Structurally parallel to :class:`bofire.surrogates.trainable.TrainableSurrogate` + but with a different fit signature: ``fit(experiments, preferences)`` instead + of ``fit(experiments)``. The ``preferences`` DataFrame carries the label + signal; ``experiments`` provides the candidate designs referenced by + ``labcode``. + """ + + # These attributes are provided by Surrogate / BotorchSurrogate via + # multiple inheritance. + inputs: Inputs + outputs: Outputs + predict: Callable[..., pd.DataFrame] + input_preprocessing_specs: InputTransformSpecs + categorical_encodings: InputTransformSpecs + scaler: AnyScaler + engineered_features: EngineeredFeatures + + PREFERENCE_COLUMNS = ("labcode_A", "labcode_B", "preference") + + def fit( + self, + experiments: pd.DataFrame, + preferences: pd.DataFrame, + options: Optional[Dict] = None, + ): + """Fit the pairwise surrogate to preference data. + + Args: + experiments: DataFrame with input columns plus a ``labcode`` column. + Output columns are ignored if present. + preferences: DataFrame with exactly the columns ``labcode_A``, + ``labcode_B``, ``preference``. ``preference`` is a float in + ``[-1, 1]``; sign determines the winner (``>0`` = A wins, + ``<0`` = B wins), magnitude is currently discarded, ``==0`` + rows are dropped as ties. + options: Additional keyword arguments forwarded to ``_fit_pairwise``. + """ + # validate experiment inputs (skip outputs validation: pairwise GP has + # no observed Y values in the experiments DataFrame). + experiments = self.inputs.validate_experiments(experiments, strict=False) + + if "labcode" not in experiments.columns: + raise ValueError( + "PairwiseGPSurrogate requires a 'labcode' column on experiments " + "to reference preferences." + ) + + if experiments["labcode"].duplicated().any(): + dups = ( + experiments.loc[ + experiments["labcode"].duplicated(keep=False), "labcode" + ] + .unique() + .tolist() + ) + raise ValueError( + f"Duplicate labcodes in experiments: {sorted(dups)}. " + "PairwiseGPSurrogate requires unique labcodes." + ) + + expected_cols = set(self.PREFERENCE_COLUMNS) + missing = expected_cols - set(preferences.columns) + if missing: + raise ValueError( + f"`preferences` is missing required columns: {sorted(missing)}. " + f"Expected at least {sorted(expected_cols)}." + ) + + valid_labcodes = set(experiments["labcode"].tolist()) + ref_labcodes = set(preferences["labcode_A"].tolist()) | set( + preferences["labcode_B"].tolist() + ) + unknown = ref_labcodes - valid_labcodes + if unknown: + raise ValueError( + "`preferences` references labcodes not present in experiments: " + f"{sorted(unknown)}." + ) + + # sign conversion: drop ties (preference == 0) + pref_values = preferences["preference"].astype(float) + tie_mask = pref_values == 0.0 + n_ties = int(tie_mask.sum()) + if n_ties > 0: + warnings.warn( + f"Dropping {n_ties} pair(s) with preference == 0 (ties).", + stacklevel=2, + ) + preferences = preferences.loc[~tie_mask].reset_index(drop=True) + + if len(preferences) == 0: + raise ValueError("No valid pairs remain after dropping ties.") + + # build idx_map: labcode -> position in datapoints tensor + idx_map = { + labcode: i for i, labcode in enumerate(experiments["labcode"].tolist()) + } + + # winner/loser indices from sign of preference + pref_signs = preferences["preference"].astype(float).to_numpy() + labcode_A = preferences["labcode_A"].to_numpy() + labcode_B = preferences["labcode_B"].to_numpy() + winners = np.where(pref_signs > 0, labcode_A, labcode_B) + losers = np.where(pref_signs > 0, labcode_B, labcode_A) + winner_idx = np.array([idx_map[w] for w in winners], dtype=np.int64) + loser_idx = np.array([idx_map[loser] for loser in losers], dtype=np.int64) + comparisons = torch.from_numpy(np.stack([winner_idx, loser_idx], axis=1)).to( + dtype=torch.long + ) + + # datapoints tensor (float64), pre-transformed via BoFire's + # categorical preprocessing; BoTorch's input_transform is applied + # internally by the model. + X = experiments[self.inputs.get_keys()] + transformed_X = self.inputs.transform(X, self.input_preprocessing_specs) + datapoints = torch.from_numpy(transformed_X.values).to(**tkwargs) + + input_transform = get_input_transform( + inputs=self.inputs, + engineered_features=self.engineered_features, + scaler_type=self.scaler, + categorical_encodings=self.categorical_encodings, + X=X, + ) + + options = options or {} + self._fit_pairwise( + datapoints=datapoints, + comparisons=comparisons, + input_transform=input_transform, + **options, + ) + + @abstractmethod + def _fit_pairwise( + self, + datapoints: torch.Tensor, + comparisons: torch.Tensor, + input_transform: Optional[InputTransform] = None, + **kwargs, + ): + """Fit the underlying pairwise model. + + Args: + datapoints: Unique candidate features, shape ``(n, d)``, float64. + comparisons: Long tensor of shape ``(m, 2)`` where each row is + ``[winner_idx, loser_idx]`` into ``datapoints``. + input_transform: Optional BoTorch input transform to attach to + the underlying model. + """ diff --git a/docs/tutorials/advanced_examples/pairwise_gp.qmd b/docs/tutorials/advanced_examples/pairwise_gp.qmd new file mode 100644 index 000000000..b8a32bc00 --- /dev/null +++ b/docs/tutorials/advanced_examples/pairwise_gp.qmd @@ -0,0 +1,191 @@ +--- +title: Preference Learning with the Pairwise GP +jupyter: python3 +--- + +Not every objective can be read off a number line. Sometimes the only signal +available is a *comparison*: a taste panel says cake A is nicer than cake B, a +chemist judges one crystallisation cleaner than another, a user clicks one +layout over another. The quantity actually being compared — a latent *utility* — +is never observed directly. + +BoFire's `PairwiseGPSurrogate` learns that latent utility from pairwise +comparison data. It wraps BoTorch's `PairwiseGP`, a Gaussian process whose +likelihood models the probability that one design is preferred over another. + +This tutorial fits a `PairwiseGPSurrogate` to synthetic preference data and +checks that it recovers the hidden utility. + +## Imports + +```{python} +import warnings + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from scipy.stats import kendalltau + +import bofire.surrogates.api as surrogates +from bofire.data_models.domain.api import Inputs, Outputs +from bofire.data_models.features.api import ContinuousInput, ContinuousOutput +from bofire.data_models.surrogates.api import PairwiseGPSurrogate + +warnings.filterwarnings("ignore") +rng = np.random.default_rng(42) +``` + +## A synthetic preference problem + +We work on a two-dimensional input space. The *latent utility* — the thing an +expert implicitly scores when they pick a winner — is a smooth bump that peaks +near `(0.7, 0.3)`. In a real preference experiment this function is unknown; we +define it here only so we can check what the surrogate learned. + +```{python} +def latent_utility(X: np.ndarray) -> np.ndarray: + """Ground-truth utility. Unobserved in a real preference experiment.""" + peak = np.array([0.7, 0.3]) + return -np.sum((X - peak) ** 2, axis=-1) +``` + +We draw a handful of candidate designs and give each a unique `labcode` — the +identifier BoFire uses to link a design to the comparisons it appears in. + +```{python} +n_candidates = 40 +X = rng.random((n_candidates, 2)) +labcodes = [f"design_{i:02d}" for i in range(n_candidates)] + +experiments = pd.DataFrame(X, columns=["x_1", "x_2"]) +experiments["labcode"] = labcodes +experiments.head() +``` + +Now we simulate an expert comparing random pairs of designs. The expert prefers +whichever design has the higher latent utility, but their judgement is noisy, so +they occasionally pick the worse one. + +```{python} +def make_comparisons(n_comparisons: int) -> pd.DataFrame: + rows = [] + for _ in range(n_comparisons): + i, j = rng.choice(n_candidates, size=2, replace=False) + # noisy utilities -> the expert occasionally prefers the worse design + u_i = latent_utility(X[i]) + rng.normal(scale=0.05) + u_j = latent_utility(X[j]) + rng.normal(scale=0.05) + winner, loser = (i, j) if u_i > u_j else (j, i) + # record the winner in slot A or B at random: the *sign* of + # `preference` carries the label, not which slot the winner sits in + if rng.random() < 0.5: + rows.append((labcodes[winner], labcodes[loser], 1.0)) # A preferred + else: + rows.append((labcodes[loser], labcodes[winner], -1.0)) # B preferred + return pd.DataFrame(rows, columns=["labcode_A", "labcode_B", "preference"]) + + +preferences = make_comparisons(n_comparisons=120) +preferences.head() +``` + +## Setting up the surrogate + +A `PairwiseGPSurrogate` needs an input space and exactly one output — the latent +utility it will infer. + +```{python} +inputs = Inputs( + features=[ + ContinuousInput(key="x_1", bounds=(0.0, 1.0)), + ContinuousInput(key="x_2", bounds=(0.0, 1.0)), + ] +) +outputs = Outputs(features=[ContinuousOutput(key="utility")]) + +surrogate_data = PairwiseGPSurrogate(inputs=inputs, outputs=outputs) +surrogate = surrogates.map(surrogate_data) +``` + +Unlike a standard surrogate, `fit` takes *two* DataFrames — the designs and the +comparisons between them: + +```{python} +surrogate.fit(experiments, preferences) +surrogate.is_fitted +``` + +## Inspecting the learned utility + +`predict` returns the posterior mean (`utility_pred`) and standard deviation +(`utility_sd`) of the latent utility on new designs. Pairwise data pins down the +*ranking* of designs, not the absolute utility values — the GP recovers utility +only up to an arbitrary scale and offset — so we score it with the Kendall-Tau +rank correlation against the true utility. + +```{python} +test_X = rng.random((500, 2)) +test_df = pd.DataFrame(test_X, columns=["x_1", "x_2"]) +predictions = surrogate.predict(test_df) + +true_utility = latent_utility(test_X) +tau = kendalltau(predictions["utility_pred"], true_utility).correlation +print(f"Kendall-Tau rank correlation vs. true utility: {tau:.3f}") +``` + +A correlation close to 1 means the surrogate ranks unseen designs almost exactly +as the latent utility would. We can see this directly by plotting the learned +posterior mean next to the ground truth. + +```{python} +fig, axes = plt.subplots(1, 2, figsize=(11, 4.5)) + +grid = np.linspace(0, 1, 60) +gx, gy = np.meshgrid(grid, grid) +grid_df = pd.DataFrame({"x_1": gx.ravel(), "x_2": gy.ravel()}) + +true_grid = latent_utility(grid_df.to_numpy()).reshape(gx.shape) +pred_grid = surrogate.predict(grid_df)["utility_pred"].to_numpy().reshape(gx.shape) + +for ax, surface, title in [ + (axes[0], true_grid, "True latent utility"), + (axes[1], pred_grid, "Learned posterior mean"), +]: + contour = ax.contourf(gx, gy, surface, levels=20, cmap="viridis") + ax.scatter( + X[:, 0], X[:, 1], c="white", edgecolors="black", s=25, + label="compared designs", + ) + ax.set(xlabel="x_1", ylabel="x_2", title=title) + fig.colorbar(contour, ax=ax) +axes[0].legend(loc="upper left") +plt.tight_layout() +plt.show() +``` + +Both surfaces peak in the same region: from comparisons alone, the surrogate has +located where utility is highest. The absolute contour values differ — pairwise +learning identifies the latent utility only up to a monotone transform — but the +*ranking*, which is what matters for optimization, is recovered. + +## The preference encoding + +The `preferences` DataFrame must have exactly these three columns: + +| column | meaning | +|---|---| +| `labcode_A` | first design in the comparison (must appear in `experiments`) | +| `labcode_B` | second design in the comparison (must appear in `experiments`) | +| `preference` | sign marks the winner: `> 0` → A preferred, `< 0` → B preferred | + +Only the **sign** of `preference` is used — its magnitude is currently ignored. +A `preference` of exactly `0` denotes a tie; tied rows are dropped with a +warning. Because the slot (A vs. B) is just bookkeeping, swapping `labcode_A` +with `labcode_B` and flipping the sign of `preference` describes the very same +comparison and yields the same fit. + +## Where to go next + +`PairwiseGPSurrogate` is the modelling building block for preference-based +Bayesian optimization: combine it with an acquisition function such as BoTorch's +*Expected Utility of the Best Option* (EUBO) to decide which pair of designs to +put in front of the expert next. diff --git a/tests/bofire/data_models/specs/kernels.py b/tests/bofire/data_models/specs/kernels.py index d28cf7cbb..d404a1aa7 100644 --- a/tests/bofire/data_models/specs/kernels.py +++ b/tests/bofire/data_models/specs/kernels.py @@ -5,6 +5,7 @@ GammaPrior, LogNormalPrior, NonTransformedInterval, + SmoothedBoxPrior, ) from tests.bofire.data_models.specs.prior_constraints import specs as prior_constraints from tests.bofire.data_models.specs.priors import specs as priors @@ -283,6 +284,14 @@ .model_dump(), }, ) +specs.add_valid( + kernels.ScaleKernel, + lambda: { + "base_kernel": specs.valid(kernels.RBFKernel).obj().model_dump(), + "outputscale_prior": priors.valid(SmoothedBoxPrior).obj().model_dump(), + "outputscale_constraint": None, + }, +) specs.add_valid( kernels.AdditiveKernel, lambda: { diff --git a/tests/bofire/data_models/specs/prior_constraints.py b/tests/bofire/data_models/specs/prior_constraints.py index c8dc2c3e6..5ca63e7d9 100644 --- a/tests/bofire/data_models/specs/prior_constraints.py +++ b/tests/bofire/data_models/specs/prior_constraints.py @@ -89,3 +89,43 @@ error=ValueError, message="The initial value must be less than or equal to the upper bound.", ) + +specs.add_valid( + priors.Interval, + lambda: { + "lower_bound": 0.005, + "upper_bound": 200.0, + "initial_value": 1.0, + }, +) + +specs.add_valid( + priors.Interval, + lambda: { + "lower_bound": 0.005, + "upper_bound": 200.0, + "initial_value": None, + }, +) + +specs.add_invalid( + priors.Interval, + lambda: { + "lower_bound": 2.0, + "upper_bound": 1.0, + "initial_value": 1.5, + }, + error=ValueError, + message="The lower bound must be less than the upper bound for an interval.", +) + +specs.add_invalid( + priors.Interval, + lambda: { + "lower_bound": 1.0, + "upper_bound": 10.0, + "initial_value": 11.0, + }, + error=ValueError, + message="The initial value must be within the bounds of the interval.", +) diff --git a/tests/bofire/data_models/specs/priors.py b/tests/bofire/data_models/specs/priors.py index b65088bda..8989497a5 100644 --- a/tests/bofire/data_models/specs/priors.py +++ b/tests/bofire/data_models/specs/priors.py @@ -105,3 +105,31 @@ "scale_scaling": random.random(), }, ) + + +specs.add_valid( + priors.SmoothedBoxPrior, + lambda: { + "lower_bound": 0.01, + "upper_bound": 100.0, + "sigma": 0.01, + }, +) + +specs.add_invalid( + priors.SmoothedBoxPrior, + lambda: {"lower_bound": 4.0, "upper_bound": 1.0, "sigma": 0.01}, + error=ValueError, + message="The lower bound must be less than the upper bound for an interval.", +) + +for sigma in [-1.0, 0.0]: + specs.add_invalid( + priors.SmoothedBoxPrior, + lambda sigma=sigma: { + "lower_bound": 0.01, + "upper_bound": 100.0, + "sigma": sigma, + }, + error=ValidationError, + ) diff --git a/tests/bofire/data_models/specs/surrogates.py b/tests/bofire/data_models/specs/surrogates.py index 97443acdf..66491967d 100644 --- a/tests/bofire/data_models/specs/surrogates.py +++ b/tests/bofire/data_models/specs/surrogates.py @@ -15,12 +15,17 @@ HammingDistanceKernel, InfiniteWidthBNNKernel, MaternKernel, + RBFKernel, ScaleKernel, TanimotoKernel, WassersteinKernel, ) from bofire.data_models.molfeatures.api import Fingerprints from bofire.data_models.priors.api import ( + PAIRWISEGP_LENGTHSCALE_CONSTRAINT, + PAIRWISEGP_LENGTHSCALE_PRIOR, + PAIRWISEGP_OUTPUTSCALE_CONSTRAINT, + PAIRWISEGP_OUTPUTSCALE_PRIOR, ROBUSTGP_LENGTHSCALE_CONSTRAINT, ROBUSTGP_OUTPUTSCALE_CONSTRAINT, THREESIX_LENGTHSCALE_PRIOR, @@ -731,6 +736,7 @@ ).model_dump(), "input_preprocessing_specs": {"x_cat": CategoricalEncodingEnum.ORDINAL}, "categorical_encodings": {"x_cat": CategoricalEncodingEnum.ORDINAL}, + "engineered_features": EngineeredFeatures().model_dump(), "mapping": {"a": 0.1, "b": 0.2, "c": 1.0}, "dump": None, }, @@ -1180,3 +1186,97 @@ error=ValueError, message="Feature keys do not match input keys.", ) + +specs.add_valid( + models.PairwiseGPSurrogate, + lambda: { + "inputs": Inputs( + features=[ + ContinuousInput(key="a", bounds=[0, 1]), + ContinuousInput(key="b", bounds=[0, 1]), + ], + ).model_dump(), + "outputs": Outputs( + features=[features.valid(ContinuousOutput).obj()], + ).model_dump(), + "kernel": ScaleKernel( + base_kernel=RBFKernel( + ard=True, + lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(), + lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(), + ), + outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(), + outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(), + ).model_dump(), + "scaler": Normalize().model_dump(), + "likelihood": "probit", + "engineered_features": EngineeredFeatures().model_dump(), + "hyperconfig": None, + "input_preprocessing_specs": {}, + "categorical_encodings": {}, + "dump": None, + }, +) + +specs.add_valid( + models.PairwiseGPSurrogate, + lambda: { + "inputs": Inputs( + features=[ + ContinuousInput(key="a", bounds=[0, 1]), + ContinuousInput(key="b", bounds=[0, 1]), + ], + ).model_dump(), + "outputs": Outputs( + features=[features.valid(ContinuousOutput).obj()], + ).model_dump(), + "kernel": ScaleKernel( + base_kernel=RBFKernel( + ard=True, + lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(), + lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(), + ), + outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(), + outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(), + ).model_dump(), + "scaler": Normalize().model_dump(), + "likelihood": "logit", + "engineered_features": EngineeredFeatures().model_dump(), + "hyperconfig": None, + "input_preprocessing_specs": {}, + "categorical_encodings": {}, + "dump": None, + }, +) + +specs.add_invalid( + models.PairwiseGPSurrogate, + lambda: { + "inputs": Inputs( + features=[ContinuousInput(key="a", bounds=[0, 1])], + ).model_dump(), + "outputs": Outputs( + features=[ + ContinuousOutput(key="y1"), + ContinuousOutput(key="y2"), + ], + ).model_dump(), + }, + error=ValueError, + message="PairwiseGPSurrogate supports exactly one output", +) + +specs.add_invalid( + models.PairwiseGPSurrogate, + lambda: { + "inputs": Inputs( + features=[ContinuousInput(key="a", bounds=[0, 1])], + ).model_dump(), + "outputs": Outputs( + features=[features.valid(ContinuousOutput).obj()], + ).model_dump(), + "kernel": RBFKernel(ard=True).model_dump(), + }, + error=ValueError, + message="PairwiseGPSurrogate.kernel must be a ScaleKernel", +) diff --git a/tests/bofire/surrogates/test_pairwise_gp.py b/tests/bofire/surrogates/test_pairwise_gp.py new file mode 100644 index 000000000..bcce2b02d --- /dev/null +++ b/tests/bofire/surrogates/test_pairwise_gp.py @@ -0,0 +1,301 @@ +import numpy as np +import pandas as pd +import pytest +from botorch.models.likelihoods.pairwise import ( + PairwiseLogitLikelihood, + PairwiseProbitLikelihood, +) +from botorch.models.pairwise_gp import PairwiseGP +from pandas.testing import assert_frame_equal +from scipy.stats import kendalltau + +import bofire.surrogates.api as surrogates +from bofire.data_models.domain.api import Inputs, Outputs +from bofire.data_models.features.api import ( + CategoricalInput, + ContinuousInput, + ContinuousOutput, +) +from bofire.data_models.kernels.api import ( + AdditiveKernel, + HammingDistanceKernel, + RBFKernel, + ScaleKernel, +) +from bofire.data_models.surrogates.api import PairwiseGPSurrogate + + +DIM = 3 + + +def _utility(X: np.ndarray) -> np.ndarray: + """Latent ground-truth utility: weighted sum of the inputs.""" + return X @ np.sqrt(np.arange(1, X.shape[1] + 1)) + + +def _make_domain(): + inputs = Inputs( + features=[ + ContinuousInput(key=f"x_{i + 1}", bounds=(0.0, 1.0)) for i in range(DIM) + ] + ) + outputs = Outputs(features=[ContinuousOutput(key="utility")]) + return inputs, outputs + + +def _make_data(n_points: int = 30, n_comparisons: int = 80, seed: int = 0): + """Build (experiments, preferences) plus the true utility for scoring. + + Each comparison places the winner in slot A or B at random, so the *sign* + of `preference` (not its slot) carries the label. + """ + rng = np.random.default_rng(seed) + X = rng.random((n_points, DIM)) + utility = _utility(X) + labcodes = [f"cand_{i}" for i in range(n_points)] + experiments = pd.DataFrame(X, columns=[f"x_{i + 1}" for i in range(DIM)]) + experiments["labcode"] = labcodes + + rows = [] + for _ in range(n_comparisons): + i, j = rng.choice(n_points, size=2, replace=False) + winner, loser = (i, j) if utility[i] > utility[j] else (j, i) + if rng.random() < 0.5: + rows.append((labcodes[winner], labcodes[loser], 1.0)) # A preferred + else: + rows.append((labcodes[loser], labcodes[winner], -1.0)) # B preferred + preferences = pd.DataFrame(rows, columns=["labcode_A", "labcode_B", "preference"]) + return experiments, preferences, utility + + +def test_pairwise_gp_fit_and_predict(): + inputs, outputs = _make_domain() + experiments, preferences, utility = _make_data() + + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + + # predicting before fitting raises + with pytest.raises(ValueError, match="not fitted"): + surrogate.predict(experiments[[f"x_{i + 1}" for i in range(DIM)]]) + + surrogate.fit(experiments, preferences) + assert surrogate.is_fitted + assert isinstance(surrogate.model, PairwiseGP) + + preds = surrogate.predict(experiments[[f"x_{i + 1}" for i in range(DIM)]]) + assert list(preds.columns) == ["utility_pred", "utility_sd"] + assert len(preds) == len(experiments) + assert preds["utility_sd"].ge(0).all() + assert np.isfinite(preds.to_numpy()).all() + + # the surrogate should recover the latent utility ranking + corr = kendalltau(preds["utility_pred"].to_numpy(), utility).correlation + assert corr > 0.8 + + +def test_pairwise_gp_sign_convention_is_consistent(): + """Flipping every A/B slot and every preference sign must not change the fit.""" + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + + flipped = pd.DataFrame( + { + "labcode_A": preferences["labcode_B"], + "labcode_B": preferences["labcode_A"], + "preference": -preferences["preference"], + } + ) + + test_X = experiments[[f"x_{i + 1}" for i in range(DIM)]] + + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + surrogate.fit(experiments, preferences) + preds = surrogate.predict(test_X) + + surrogate_flipped = surrogates.map( + PairwiseGPSurrogate(inputs=inputs, outputs=outputs) + ) + surrogate_flipped.fit(experiments, flipped) + preds_flipped = surrogate_flipped.predict(test_X) + + assert_frame_equal(preds, preds_flipped) + + +def test_pairwise_gp_dump_and_load(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + test_X = experiments[[f"x_{i + 1}" for i in range(DIM)]] + + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + surrogate.fit(experiments, preferences) + preds = surrogate.predict(test_X) + dump = surrogate.dumps() + + reloaded = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + reloaded.loads(dump) + assert reloaded.is_fitted + assert_frame_equal(preds, reloaded.predict(test_X)) + + +def test_pairwise_gp_requires_labcode_column(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.raises(ValueError, match="labcode"): + surrogate.fit(experiments.drop(columns=["labcode"]), preferences) + + +def test_pairwise_gp_rejects_duplicate_labcodes(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + experiments.loc[1, "labcode"] = experiments.loc[0, "labcode"] + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.raises(ValueError, match="Duplicate labcodes"): + surrogate.fit(experiments, preferences) + + +def test_pairwise_gp_rejects_missing_preference_columns(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.raises(ValueError, match="missing required columns"): + surrogate.fit(experiments, preferences.drop(columns=["preference"])) + + +def test_pairwise_gp_rejects_unknown_labcode(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + preferences.loc[0, "labcode_A"] = "does_not_exist" + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.raises(ValueError, match="not present in experiments"): + surrogate.fit(experiments, preferences) + + +def test_pairwise_gp_drops_ties_with_warning(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + preferences.loc[0, "preference"] = 0.0 + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.warns(UserWarning, match="ties"): + surrogate.fit(experiments, preferences) + assert surrogate.is_fitted + + +def test_pairwise_gp_all_ties_raises(): + inputs, outputs = _make_domain() + experiments, preferences, _ = _make_data() + preferences["preference"] = 0.0 + surrogate = surrogates.map(PairwiseGPSurrogate(inputs=inputs, outputs=outputs)) + with pytest.warns(UserWarning, match="ties"): + with pytest.raises(ValueError, match="No valid pairs"): + surrogate.fit(experiments, preferences) + + +def test_pairwise_gp_data_model_validation(): + inputs, _ = _make_domain() + + # exactly one output is required + with pytest.raises(ValueError, match="exactly one output"): + PairwiseGPSurrogate( + inputs=inputs, + outputs=Outputs( + features=[ContinuousOutput(key="y1"), ContinuousOutput(key="y2")] + ), + ) + + # the kernel must be a ScaleKernel + with pytest.raises(ValueError, match="must be a ScaleKernel"): + PairwiseGPSurrogate( + inputs=inputs, + outputs=Outputs(features=[ContinuousOutput(key="utility")]), + kernel=RBFKernel(ard=True), + ) + + +@pytest.mark.parametrize( + "likelihood, expected_cls", + [ + ("probit", PairwiseProbitLikelihood), + ("logit", PairwiseLogitLikelihood), + ], +) +def test_pairwise_gp_likelihood(likelihood, expected_cls): + """Both pairwise likelihoods fit and map to the right BoTorch class.""" + inputs, outputs = _make_domain() + experiments, preferences, utility = _make_data() + + surrogate = surrogates.map( + PairwiseGPSurrogate(inputs=inputs, outputs=outputs, likelihood=likelihood) + ) + surrogate.fit(experiments, preferences) + + assert isinstance(surrogate.model.likelihood, expected_cls) + + preds = surrogate.predict(experiments[[f"x_{i + 1}" for i in range(DIM)]]) + corr = kendalltau(preds["utility_pred"].to_numpy(), utility).correlation + assert corr > 0.8 + + +def test_pairwise_gp_likelihood_default_is_probit(): + inputs, outputs = _make_domain() + assert PairwiseGPSurrogate(inputs=inputs, outputs=outputs).likelihood == "probit" + + +def test_pairwise_gp_feature_specific_kernels(): + """A kernel built from feature-specific sub-kernels over a mixed + continuous/categorical space fits and predicts. + + Exercises the `features_to_idx_mapper` wiring: each sub-kernel is restricted + to a subset of feature keys, which must resolve to the right columns of the + (categorically encoded) datapoints tensor. + """ + rng = np.random.default_rng(0) + n = 30 + cats = ["a", "b", "c"] + cat_offset = {"a": 0.0, "b": 0.6, "c": 1.2} + cat_vals = rng.choice(cats, size=n) + X = rng.random((n, 2)) + utility = X @ np.array([1.0, 2.0]) + np.array([cat_offset[c] for c in cat_vals]) + labcodes = [f"cand_{i}" for i in range(n)] + + experiments = pd.DataFrame(X, columns=["x_1", "x_2"]) + experiments["cat"] = cat_vals + experiments["labcode"] = labcodes + + rows = [] + for _ in range(80): + i, j = rng.choice(n, size=2, replace=False) + winner, loser = (i, j) if utility[i] > utility[j] else (j, i) + rows.append((labcodes[winner], labcodes[loser], 1.0)) + preferences = pd.DataFrame(rows, columns=["labcode_A", "labcode_B", "preference"]) + + inputs = Inputs( + features=[ + ContinuousInput(key="x_1", bounds=(0.0, 1.0)), + ContinuousInput(key="x_2", bounds=(0.0, 1.0)), + CategoricalInput(key="cat", categories=cats), + ] + ) + outputs = Outputs(features=[ContinuousOutput(key="utility")]) + # an RBF restricted to the continuous features + a Hamming kernel on the + # categorical -- a kernel that is *not* the same over all inputs + kernel = ScaleKernel( + base_kernel=AdditiveKernel( + kernels=[ + RBFKernel(ard=True, features=["x_1", "x_2"]), + HammingDistanceKernel(features=["cat"]), + ] + ) + ) + + surrogate = surrogates.map( + PairwiseGPSurrogate(inputs=inputs, outputs=outputs, kernel=kernel) + ) + surrogate.fit(experiments, preferences) + assert surrogate.is_fitted + + preds = surrogate.predict(experiments[["x_1", "x_2", "cat"]]) + assert list(preds.columns) == ["utility_pred", "utility_sd"] + assert np.isfinite(preds.to_numpy()).all() + corr = kendalltau(preds["utility_pred"].to_numpy(), utility).correlation + assert corr > 0.6