experimental-design · jduerholt · May 26, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -162,3 +162,5 @@ notebook_test_stats.csv
 **/*.quarto_ipynb
 
 **/.jupyter_cache
+
+scripts/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,9 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment
 - `plot_gp_slice_plotly` now supports fixed input features that can be a mix of `ContinuousInput` and `CategoricalInput` (with string categorical fixed values).
 - Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, and `MultiTaskGP`) and corresponding linear/polynomial wrappers.
 - Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`.
+- `PairwiseGPSurrogate`, a Gaussian process surrogate that learns a latent utility function from pairwise preference/comparison data, wrapping BoTorch's `PairwiseGP`.
+- `SmoothedBoxPrior` prior, and a concrete instantiable `Interval` prior constraint.
+- Optional `initial_value` on the `GreaterThan` prior constraint.
 
 ### Changed
 

diff --git a/_quarto.yml b/_quarto.yml
@@ -222,6 +222,7 @@ website:
             - docs/tutorials/advanced_examples/transfer_learning_bo.qmd
             - docs/tutorials/advanced_examples/octane_number.qmd
             - docs/tutorials/advanced_examples/llm_molecular.qmd
+            - docs/tutorials/advanced_examples/pairwise_gp.qmd
         - section: "Benchmarks"
           contents:
             - docs/tutorials/benchmarks/index.qmd

diff --git a/bofire/data_models/priors/api.py b/bofire/data_models/priors/api.py
@@ -23,6 +23,7 @@
     NormalPrior,
 )
 from bofire.data_models.priors.prior import Prior
+from bofire.data_models.priors.smoothedbox import SmoothedBoxPrior
 from bofire.data_models.unions import tagged_union
 
 
@@ -32,11 +33,13 @@
     LKJPrior,
     LogNormalPrior,
     DimensionalityScaledLogNormalPrior,
+    SmoothedBoxPrior,
 ]
 
 AnyPrior = tagged_union(*_PRIOR_TYPES)
 
 _PRIOR_CONSTRAINT_TYPES: list[type] = [
+    Interval,
     NonTransformedInterval,
     LogTransformedInterval,
     Positive,
@@ -49,7 +52,9 @@
 
 # these are priors that are generally applicable
 # and do not depend on problem specific extra parameters
-AnyGeneralPrior = tagged_union(GammaPrior, NormalPrior, LKJPrior, LogNormalPrior)
+AnyGeneralPrior = tagged_union(
+    GammaPrior, NormalPrior, LKJPrior, LogNormalPrior, SmoothedBoxPrior
+)
 
 # default priors of interest
 # botorch defaults
@@ -72,7 +77,7 @@
     sd_prior=GammaPrior(concentration=2.0, rate=0.15),
 )
 
-# prior for RobustSingleTaskGPSurrogate
+# priors for RobustSingleTaskGPSurrogate
 ROBUSTGP_LENGTHSCALE_CONSTRAINT = partial(
     NonTransformedInterval,
     lower_bound=0.05,
@@ -87,6 +92,34 @@
     initial_value=0.1,
 )
 
+
+# Priors for PairwiseGPSurrogate based on botorch defaults
+PAIRWISEGP_LENGTHSCALE_PRIOR = partial(
+    GammaPrior,
+    concentration=2.4,
+    rate=2.7,
+)
+
+PAIRWISEGP_LENGTHSCALE_CONSTRAINT = partial(
+    GreaterThan,
+    lower_bound=1e-4,
+    initial_value=0.5185,  # mode of the lengthscale GammaPrior(2.4, 2.7)
+)
+
+PAIRWISEGP_OUTPUTSCALE_PRIOR = partial(
+    SmoothedBoxPrior,
+    lower_bound=0.01,
+    upper_bound=100,
+    sigma=0.01,
+)
+
+PAIRWISEGP_OUTPUTSCALE_CONSTRAINT = partial(
+    Interval,
+    lower_bound=5e-3,
+    upper_bound=200,
+    initial_value=1,
+)
+
 # Hvarfner priors
 HVARFNER_NOISE_PRIOR = partial(LogNormalPrior, loc=-4, scale=1)
 HVARFNER_LENGTHSCALE_PRIOR = DimensionalityScaledLogNormalPrior

diff --git a/bofire/data_models/priors/constraint.py b/bofire/data_models/priors/constraint.py
@@ -1,4 +1,4 @@
-from typing import Any, Literal
+from typing import Any, Literal, Optional
 
 from bofire.data_models.base import BaseModel
 
@@ -24,10 +24,15 @@ class GreaterThan(PriorConstraint):
 
     Attributes:
         type (Literal): A string literal to identify the class type.
+        lower_bound: The value the parameter is constrained to be greater than.
+        initial_value: Optional value the parameter is initialized to. Required
+            for numerical stability when the constraint is used untransformed
+            (e.g. as a kernel lengthscale constraint).
     """
 
     type: Literal["GreaterThan"] = "GreaterThan"
     lower_bound: float
+    initial_value: Optional[float] = None
 
 
 class LessThan(PriorConstraint):

diff --git a/bofire/data_models/priors/interval.py b/bofire/data_models/priors/interval.py
@@ -1,12 +1,12 @@
-from typing import Any, Literal
+from typing import Literal
 
 from pydantic import PositiveFloat, model_validator
 
 from bofire.data_models.priors.constraint import PriorConstraint
 
 
 class Interval(PriorConstraint):
-    """Abstract Interval class.
+    """Interval constraint on a GP hyperparameter.
 
     It is used to define interval constraints on GP hyperparameters.
 
@@ -16,7 +16,7 @@ class Interval(PriorConstraint):
         initial_value: The initial value within the interval.
     """
 
-    type: Any
+    type: Literal["Interval"] = "Interval"
     lower_bound: PositiveFloat
     upper_bound: PositiveFloat
     initial_value: PositiveFloat

diff --git a/bofire/data_models/priors/smoothedbox.py b/bofire/data_models/priors/smoothedbox.py
@@ -0,0 +1,37 @@
+from typing import Literal
+
+from pydantic import PositiveFloat, model_validator
+
+from bofire.data_models.priors.prior import Prior
+
+
+class SmoothedBoxPrior(Prior):
+    """A smoothed approximation of a uniform prior.
+
+    .. math::
+
+    \begin{equation*}
+        B = {x: a_i <= x_i <= b_i}
+        d(x, B) = min_{x' in B} |x - x'|
+        pdf(x) \\sim exp(- d(x, B)**2 / sqrt(2 * sigma^2))
+    \\end{equation*}
+
+    Attributes:
+        lower_bound: lower bound of the uniform prior
+        upper_bound: upper bound of the uniform prior
+        sigma: related to pdf(x)
+
+    """
+
+    type: Literal["SmoothedBoxPrior"] = "SmoothedBoxPrior"
+    lower_bound: float
+    upper_bound: float
+    sigma: PositiveFloat = 0.01
+
+    @model_validator(mode="after")
+    def validate_bounds(self):
+        if self.lower_bound >= self.upper_bound:
+            raise ValueError(
+                "The lower bound must be less than the upper bound for an interval."
+            )
+        return self
diff --git a/bofire/data_models/surrogates/api.py b/bofire/data_models/surrogates/api.py
@@ -31,6 +31,7 @@
     MultiTaskGPHyperconfig,
     MultiTaskGPSurrogate,
 )
+from bofire.data_models.surrogates.pairwise_gp import PairwiseGPSurrogate
 from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
 from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
 from bofire.data_models.surrogates.robust_single_task_gp import (
@@ -72,6 +73,7 @@
     PiecewiseLinearGPSurrogate,
     AdditiveMapSaasSingleTaskGPSurrogate,
     EnsembleMapSaasSingleTaskGPSurrogate,
+    PairwiseGPSurrogate,
 )
 
 AnyTrainableSurrogate = tagged_union(

diff --git a/bofire/data_models/surrogates/pairwise_gp.py b/bofire/data_models/surrogates/pairwise_gp.py
@@ -0,0 +1,73 @@
+from typing import Literal, Type
+
+from pydantic import Field, model_validator
+
+from bofire.data_models.features.api import AnyOutput, ContinuousOutput
+from bofire.data_models.kernels.api import AnyKernel, RBFKernel, ScaleKernel
+from bofire.data_models.priors.api import (
+    PAIRWISEGP_LENGTHSCALE_CONSTRAINT,
+    PAIRWISEGP_LENGTHSCALE_PRIOR,
+    PAIRWISEGP_OUTPUTSCALE_CONSTRAINT,
+    PAIRWISEGP_OUTPUTSCALE_PRIOR,
+)
+from bofire.data_models.surrogates.botorch import BotorchSurrogate
+from bofire.data_models.surrogates.scaler import AnyScaler, Normalize
+from bofire.data_models.surrogates.trainable import TrainableSurrogate
+
+
+class PairwiseGPSurrogate(BotorchSurrogate, TrainableSurrogate):
+    """Pairwise Gaussian Process surrogate built on top of BoTorch's PairwiseGP.
+
+    Fits a latent utility function from binary winner/loser pair labels. The
+    `preferences` DataFrame references rows of the standard BoFire `experiments`
+    DataFrame by `labcode`; the single output feature represents the latent
+    utility inferred from those comparisons.
+    """
+
+    type: Literal["PairwiseGPSurrogate"] = "PairwiseGPSurrogate"
+
+    kernel: AnyKernel = Field(
+        default_factory=lambda: ScaleKernel(
+            base_kernel=RBFKernel(
+                ard=True,
+                lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(),
+                lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(),
+            ),
+            outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(),
+            outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(),
+        )
+    )
+    scaler: AnyScaler = Field(default_factory=Normalize)
+
+    @classmethod
+    def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool:
+        return isinstance(my_type, type(ContinuousOutput))
+
+    @model_validator(mode="after")
+    def validate_single_output(self):
+        if len(self.outputs) != 1:
+            raise ValueError(
+                "PairwiseGPSurrogate supports exactly one output (the latent utility)."
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_scalekernel(self):
+        if not isinstance(self.kernel, ScaleKernel):
+            raise ValueError(
+                "PairwiseGPSurrogate.kernel must be a ScaleKernel "
+                "(BoTorch's PairwiseGP requires the covariance module to be a ScaleKernel)."
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_scaler_features(self):
+        if self.scaler and len(self.scaler.features) > 0:
+            missing_features = list(
+                set(self.scaler.features) - set(self.inputs.get_keys())
+            )
+            if missing_features:
+                raise ValueError(
+                    f"The following features are missing in inputs: {missing_features}"
+                )
+        return self
diff --git a/bofire/priors/mapper.py b/bofire/priors/mapper.py
@@ -103,6 +103,25 @@ def map_DimensionalityScaledLogNormalPrior(
     )
 
 
+def map_SmoothedBoxPrior(
+    data_model: data_models.SmoothedBoxPrior,
+    **kwargs,
+) -> gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior:
+    return gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior(
+        a=data_model.lower_bound, b=data_model.upper_bound, sigma=data_model.sigma
+    )
+
+
+def map_Interval(
+    data_model: data_models.Interval,
+) -> gpytorch.constraints.Interval:
+    return gpytorch.constraints.Interval(
+        lower_bound=data_model.lower_bound,
+        upper_bound=data_model.upper_bound,
+        initial_value=data_model.initial_value,
+    )
+
+
 def map_NonTransformedInterval(
     data_model: data_models.NonTransformedInterval,
 ) -> NonTransformedInterval:
@@ -132,7 +151,11 @@ def map_Positive(
 def map_GreaterThan(
     data_model: data_models.GreaterThan,
 ) -> GreaterThan:
-    return GreaterThan(lower_bound=data_model.lower_bound, transform=None)
+    return GreaterThan(
+        lower_bound=data_model.lower_bound,
+        transform=None,
+        initial_value=data_model.initial_value,
+    )
 
 
 def map_LessThan(
@@ -147,6 +170,8 @@ def map_LessThan(
     data_models.LKJPrior: map_LKJPrior,
     data_models.LogNormalPrior: map_LogNormalPrior,
     data_models.DimensionalityScaledLogNormalPrior: map_DimensionalityScaledLogNormalPrior,
+    data_models.SmoothedBoxPrior: map_SmoothedBoxPrior,
+    data_models.Interval: map_Interval,
     data_models.NonTransformedInterval: map_NonTransformedInterval,
     data_models.LogTransformedInterval: map_LogTransformedInterval,
     data_models.Positive: map_Positive,

diff --git a/bofire/surrogates/api.py b/bofire/surrogates/api.py
@@ -12,6 +12,8 @@
     RegressionMLPEnsemble,
 )
 from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
+from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate
+from bofire.surrogates.pairwise_trainable import PairwiseTrainableSurrogate
 from bofire.surrogates.random_forest import RandomForestSurrogate
 from bofire.surrogates.shape import PiecewiseLinearGPSurrogate
 from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate

diff --git a/bofire/surrogates/mapper.py b/bofire/surrogates/mapper.py
@@ -18,6 +18,7 @@
 )
 from bofire.surrogates.mlp import ClassificationMLPEnsemble, RegressionMLPEnsemble
 from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
+from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate
 from bofire.surrogates.random_forest import RandomForestSurrogate
 from bofire.surrogates.robust_single_task_gp import RobustSingleTaskGPSurrogate
 from bofire.surrogates.shape import PiecewiseLinearGPSurrogate
@@ -99,6 +100,7 @@ def map_MixedSingleTaskGPSurrogate(
     data_models.CategoricalDeterministicSurrogate: CategoricalDeterministicSurrogate,
     data_models.AdditiveMapSaasSingleTaskGPSurrogate: AdditiveMapSaasSingleTaskGPSurrogate,
     data_models.EnsembleMapSaasSingleTaskGPSurrogate: EnsembleMapSaasSingleTaskGPSurrogate,
+    data_models.PairwiseGPSurrogate: PairwiseGPSurrogate,
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -162,3 +162,5 @@ notebook_test_stats.csv
		*/.quarto_ipynb

		**/.jupyter_cache

		scripts/*