Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
457ab02
vanilla claude stuff
jduerholt May 20, 2026
6821f42
fix(surrogates): noise_prior and noise_constraint on SingleTaskGPSurr…
R-M-Lee May 20, 2026
af1404c
roundtrip for bug
jduerholt May 20, 2026
7e7b003
generalize
jduerholt May 20, 2026
2aad969
add priors, some refactor
Jimbo994 May 20, 2026
f9a01e1
fixes and addittions
Jimbo994 May 20, 2026
2f6541e
add tests
Jimbo994 May 20, 2026
65b7152
update mutltiask gp to botorch defaults
jduerholt May 20, 2026
45fb402
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 20, 2026
73481ac
added docs
Jimbo994 May 20, 2026
efa3a6c
add to changelog
Jimbo994 May 20, 2026
45d5278
fix hypeopt
jduerholt May 20, 2026
27a5538
some changes
jduerholt May 21, 2026
92c648d
cleaning up the api
jduerholt May 21, 2026
bdad654
make test less flaky
jduerholt May 21, 2026
7d81545
fix seruialization
jduerholt May 21, 2026
6bcab72
changelog updated
jduerholt May 21, 2026
2b381c0
Merge branch 'hotfix/noiseprior' into feature/pairwise
Jimbo994 May 21, 2026
2b11a81
Address PR #768 review comments (#4, #6, #7, #9)
Jimbo994 May 22, 2026
bb4d1fd
Wire features_to_idx_mapper for PairwiseGPSurrogate (#5)
Jimbo994 May 22, 2026
5485c0c
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 22, 2026
b478663
Hoist engineered_features to BotorchSurrogate; share get_feature_indi…
Jimbo994 May 22, 2026
897abd0
rename validators
Jimbo994 May 26, 2026
e912190
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ notebook_test_stats.csv
**/*.quarto_ipynb

**/.jupyter_cache

scripts/*
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment
- `plot_gp_slice_plotly` now supports fixed input features that can be a mix of `ContinuousInput` and `CategoricalInput` (with string categorical fixed values).
- Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, and `MultiTaskGP`) and corresponding linear/polynomial wrappers.
- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`.
- `PairwiseGPSurrogate`, a Gaussian process surrogate that learns a latent utility function from pairwise preference/comparison data, wrapping BoTorch's `PairwiseGP`.
- `SmoothedBoxPrior` prior, and a concrete instantiable `Interval` prior constraint.
- Optional `initial_value` on the `GreaterThan` prior constraint.

### Changed

Expand Down
1 change: 1 addition & 0 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ website:
- docs/tutorials/advanced_examples/transfer_learning_bo.qmd
- docs/tutorials/advanced_examples/octane_number.qmd
- docs/tutorials/advanced_examples/llm_molecular.qmd
- docs/tutorials/advanced_examples/pairwise_gp.qmd
- section: "Benchmarks"
contents:
- docs/tutorials/benchmarks/index.qmd
Expand Down
37 changes: 35 additions & 2 deletions bofire/data_models/priors/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
NormalPrior,
)
from bofire.data_models.priors.prior import Prior
from bofire.data_models.priors.smoothedbox import SmoothedBoxPrior
from bofire.data_models.unions import tagged_union


Expand All @@ -32,11 +33,13 @@
LKJPrior,
LogNormalPrior,
DimensionalityScaledLogNormalPrior,
SmoothedBoxPrior,
]

AnyPrior = tagged_union(*_PRIOR_TYPES)

_PRIOR_CONSTRAINT_TYPES: list[type] = [
Interval,
NonTransformedInterval,
LogTransformedInterval,
Positive,
Expand All @@ -49,7 +52,9 @@

# these are priors that are generally applicable
# and do not depend on problem specific extra parameters
AnyGeneralPrior = tagged_union(GammaPrior, NormalPrior, LKJPrior, LogNormalPrior)
AnyGeneralPrior = tagged_union(
GammaPrior, NormalPrior, LKJPrior, LogNormalPrior, SmoothedBoxPrior
)

# default priors of interest
# botorch defaults
Expand All @@ -72,7 +77,7 @@
sd_prior=GammaPrior(concentration=2.0, rate=0.15),
)

# prior for RobustSingleTaskGPSurrogate
# priors for RobustSingleTaskGPSurrogate
ROBUSTGP_LENGTHSCALE_CONSTRAINT = partial(
NonTransformedInterval,
lower_bound=0.05,
Expand All @@ -87,6 +92,34 @@
initial_value=0.1,
)


# Priors for PairwiseGPSurrogate based on botorch defaults
PAIRWISEGP_LENGTHSCALE_PRIOR = partial(
GammaPrior,
concentration=2.4,
rate=2.7,
)

PAIRWISEGP_LENGTHSCALE_CONSTRAINT = partial(
GreaterThan,
lower_bound=1e-4,
initial_value=0.5185, # mode of the lengthscale GammaPrior(2.4, 2.7)
)

PAIRWISEGP_OUTPUTSCALE_PRIOR = partial(
SmoothedBoxPrior,
lower_bound=0.01,
upper_bound=100,
sigma=0.01,
)

PAIRWISEGP_OUTPUTSCALE_CONSTRAINT = partial(
Interval,
lower_bound=5e-3,
upper_bound=200,
initial_value=1,
)

# Hvarfner priors
HVARFNER_NOISE_PRIOR = partial(LogNormalPrior, loc=-4, scale=1)
HVARFNER_LENGTHSCALE_PRIOR = DimensionalityScaledLogNormalPrior
Expand Down
7 changes: 6 additions & 1 deletion bofire/data_models/priors/constraint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Literal
from typing import Any, Literal, Optional

from bofire.data_models.base import BaseModel

Expand All @@ -24,10 +24,15 @@ class GreaterThan(PriorConstraint):

Attributes:
type (Literal): A string literal to identify the class type.
lower_bound: The value the parameter is constrained to be greater than.
initial_value: Optional value the parameter is initialized to. Required
for numerical stability when the constraint is used untransformed
(e.g. as a kernel lengthscale constraint).
"""

type: Literal["GreaterThan"] = "GreaterThan"
lower_bound: float
initial_value: Optional[float] = None


class LessThan(PriorConstraint):
Expand Down
6 changes: 3 additions & 3 deletions bofire/data_models/priors/interval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Any, Literal
from typing import Literal

from pydantic import PositiveFloat, model_validator

from bofire.data_models.priors.constraint import PriorConstraint


class Interval(PriorConstraint):
"""Abstract Interval class.
"""Interval constraint on a GP hyperparameter.

It is used to define interval constraints on GP hyperparameters.

Expand All @@ -16,7 +16,7 @@ class Interval(PriorConstraint):
initial_value: The initial value within the interval.
"""

type: Any
type: Literal["Interval"] = "Interval"
Comment thread
jduerholt marked this conversation as resolved.
lower_bound: PositiveFloat
upper_bound: PositiveFloat
initial_value: PositiveFloat
Expand Down
37 changes: 37 additions & 0 deletions bofire/data_models/priors/smoothedbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Literal

from pydantic import PositiveFloat, model_validator

from bofire.data_models.priors.prior import Prior


class SmoothedBoxPrior(Prior):
"""A smoothed approximation of a uniform prior.

.. math::

\begin{equation*}
B = {x: a_i <= x_i <= b_i}
d(x, B) = min_{x' in B} |x - x'|
pdf(x) \\sim exp(- d(x, B)**2 / sqrt(2 * sigma^2))
\\end{equation*}

Attributes:
lower_bound: lower bound of the uniform prior
upper_bound: upper bound of the uniform prior
sigma: related to pdf(x)

"""

type: Literal["SmoothedBoxPrior"] = "SmoothedBoxPrior"
lower_bound: float
upper_bound: float
sigma: PositiveFloat = 0.01

@model_validator(mode="after")
def validate_bounds(self):
if self.lower_bound >= self.upper_bound:
raise ValueError(
"The lower bound must be less than the upper bound for an interval."
)
return self
2 changes: 2 additions & 0 deletions bofire/data_models/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
MultiTaskGPHyperconfig,
MultiTaskGPSurrogate,
)
from bofire.data_models.surrogates.pairwise_gp import PairwiseGPSurrogate
from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
from bofire.data_models.surrogates.robust_single_task_gp import (
Expand Down Expand Up @@ -72,6 +73,7 @@
PiecewiseLinearGPSurrogate,
AdditiveMapSaasSingleTaskGPSurrogate,
EnsembleMapSaasSingleTaskGPSurrogate,
PairwiseGPSurrogate,
)

AnyTrainableSurrogate = tagged_union(
Expand Down
73 changes: 73 additions & 0 deletions bofire/data_models/surrogates/pairwise_gp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import Literal, Type

from pydantic import Field, model_validator

from bofire.data_models.features.api import AnyOutput, ContinuousOutput
from bofire.data_models.kernels.api import AnyKernel, RBFKernel, ScaleKernel
from bofire.data_models.priors.api import (
PAIRWISEGP_LENGTHSCALE_CONSTRAINT,
PAIRWISEGP_LENGTHSCALE_PRIOR,
PAIRWISEGP_OUTPUTSCALE_CONSTRAINT,
PAIRWISEGP_OUTPUTSCALE_PRIOR,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import AnyScaler, Normalize
from bofire.data_models.surrogates.trainable import TrainableSurrogate


class PairwiseGPSurrogate(BotorchSurrogate, TrainableSurrogate):
"""Pairwise Gaussian Process surrogate built on top of BoTorch's PairwiseGP.

Fits a latent utility function from binary winner/loser pair labels. The
`preferences` DataFrame references rows of the standard BoFire `experiments`
DataFrame by `labcode`; the single output feature represents the latent
utility inferred from those comparisons.
"""

type: Literal["PairwiseGPSurrogate"] = "PairwiseGPSurrogate"

kernel: AnyKernel = Field(
default_factory=lambda: ScaleKernel(
base_kernel=RBFKernel(
ard=True,
lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(),
lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(),
),
outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(),
outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(),
)
)
scaler: AnyScaler = Field(default_factory=Normalize)

@classmethod
def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool:
return isinstance(my_type, type(ContinuousOutput))

@model_validator(mode="after")
Comment thread
jduerholt marked this conversation as resolved.
def validate_single_output(self):
if len(self.outputs) != 1:
raise ValueError(
"PairwiseGPSurrogate supports exactly one output (the latent utility)."
)
return self

@model_validator(mode="after")
Comment thread
jduerholt marked this conversation as resolved.
def validate_scalekernel(self):
if not isinstance(self.kernel, ScaleKernel):
raise ValueError(
"PairwiseGPSurrogate.kernel must be a ScaleKernel "
"(BoTorch's PairwiseGP requires the covariance module to be a ScaleKernel)."
)
return self

@model_validator(mode="after")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this? We do not have it anywhere else, or?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, this is not needed and should also not be enforced here.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes you are right, will remove it.

def validate_scaler_features(self):
if self.scaler and len(self.scaler.features) > 0:
missing_features = list(
set(self.scaler.features) - set(self.inputs.get_keys())
)
if missing_features:
raise ValueError(
f"The following features are missing in inputs: {missing_features}"
)
return self
27 changes: 26 additions & 1 deletion bofire/priors/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,25 @@ def map_DimensionalityScaledLogNormalPrior(
)


def map_SmoothedBoxPrior(
data_model: data_models.SmoothedBoxPrior,
**kwargs,
) -> gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior:
return gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior(
a=data_model.lower_bound, b=data_model.upper_bound, sigma=data_model.sigma
)


def map_Interval(
data_model: data_models.Interval,
) -> gpytorch.constraints.Interval:
return gpytorch.constraints.Interval(
lower_bound=data_model.lower_bound,
upper_bound=data_model.upper_bound,
initial_value=data_model.initial_value,
)


def map_NonTransformedInterval(
data_model: data_models.NonTransformedInterval,
) -> NonTransformedInterval:
Expand Down Expand Up @@ -132,7 +151,11 @@ def map_Positive(
def map_GreaterThan(
data_model: data_models.GreaterThan,
) -> GreaterThan:
return GreaterThan(lower_bound=data_model.lower_bound, transform=None)
return GreaterThan(
lower_bound=data_model.lower_bound,
transform=None,
initial_value=data_model.initial_value,
)


def map_LessThan(
Expand All @@ -147,6 +170,8 @@ def map_LessThan(
data_models.LKJPrior: map_LKJPrior,
data_models.LogNormalPrior: map_LogNormalPrior,
data_models.DimensionalityScaledLogNormalPrior: map_DimensionalityScaledLogNormalPrior,
data_models.SmoothedBoxPrior: map_SmoothedBoxPrior,
data_models.Interval: map_Interval,
data_models.NonTransformedInterval: map_NonTransformedInterval,
data_models.LogTransformedInterval: map_LogTransformedInterval,
data_models.Positive: map_Positive,
Expand Down
2 changes: 2 additions & 0 deletions bofire/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
RegressionMLPEnsemble,
)
from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate
from bofire.surrogates.pairwise_trainable import PairwiseTrainableSurrogate
from bofire.surrogates.random_forest import RandomForestSurrogate
from bofire.surrogates.shape import PiecewiseLinearGPSurrogate
from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
Expand Down
2 changes: 2 additions & 0 deletions bofire/surrogates/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)
from bofire.surrogates.mlp import ClassificationMLPEnsemble, RegressionMLPEnsemble
from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate
from bofire.surrogates.random_forest import RandomForestSurrogate
from bofire.surrogates.robust_single_task_gp import RobustSingleTaskGPSurrogate
from bofire.surrogates.shape import PiecewiseLinearGPSurrogate
Expand Down Expand Up @@ -99,6 +100,7 @@ def map_MixedSingleTaskGPSurrogate(
data_models.CategoricalDeterministicSurrogate: CategoricalDeterministicSurrogate,
data_models.AdditiveMapSaasSingleTaskGPSurrogate: AdditiveMapSaasSingleTaskGPSurrogate,
data_models.EnsembleMapSaasSingleTaskGPSurrogate: EnsembleMapSaasSingleTaskGPSurrogate,
data_models.PairwiseGPSurrogate: PairwiseGPSurrogate,
}


Expand Down
Loading
Loading