Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
457ab02
vanilla claude stuff
jduerholt May 20, 2026
6821f42
fix(surrogates): noise_prior and noise_constraint on SingleTaskGPSurr…
R-M-Lee May 20, 2026
af1404c
roundtrip for bug
jduerholt May 20, 2026
7e7b003
generalize
jduerholt May 20, 2026
2aad969
add priors, some refactor
Jimbo994 May 20, 2026
f9a01e1
fixes and addittions
Jimbo994 May 20, 2026
2f6541e
add tests
Jimbo994 May 20, 2026
65b7152
update mutltiask gp to botorch defaults
jduerholt May 20, 2026
45fb402
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 20, 2026
73481ac
added docs
Jimbo994 May 20, 2026
efa3a6c
add to changelog
Jimbo994 May 20, 2026
45d5278
fix hypeopt
jduerholt May 20, 2026
27a5538
some changes
jduerholt May 21, 2026
92c648d
cleaning up the api
jduerholt May 21, 2026
bdad654
make test less flaky
jduerholt May 21, 2026
7d81545
fix seruialization
jduerholt May 21, 2026
6bcab72
changelog updated
jduerholt May 21, 2026
2b381c0
Merge branch 'hotfix/noiseprior' into feature/pairwise
Jimbo994 May 21, 2026
2b11a81
Address PR #768 review comments (#4, #6, #7, #9)
Jimbo994 May 22, 2026
bb4d1fd
Wire features_to_idx_mapper for PairwiseGPSurrogate (#5)
Jimbo994 May 22, 2026
5485c0c
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 22, 2026
b478663
Hoist engineered_features to BotorchSurrogate; share get_feature_indi…
Jimbo994 May 22, 2026
897abd0
rename validators
Jimbo994 May 26, 2026
e912190
Merge remote-tracking branch 'origin/main' into feature/pairwise
Jimbo994 May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ notebook_test_stats.csv
**/*.quarto_ipynb

**/.jupyter_cache

scripts/*
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment
- Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, `MultiTaskGP`, and `RobustSingleTaskGP`) and corresponding linear/polynomial wrappers.
- Optional `initial_value` field on the `GreaterThan`, `LessThan`, and `Positive` prior constraint data models (already present on `Interval`), letting users opt-in to a warm-start of the constrained gpytorch parameter at construction time.
- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`.
- `PairwiseGPSurrogate`, a Gaussian process surrogate that learns a latent utility function from pairwise preference/comparison data, wrapping BoTorch's `PairwiseGP`. The pairwise likelihood is selectable via `likelihood="probit"` (default) or `"logit"`.
- `SmoothedBoxPrior` prior, and a concrete instantiable `Interval` prior constraint.
- Aggregation of duplicated experiments in the `cross_validate` method of trainable surrogates to avoid data leakage, controlled via the `aggregate` boolean flag, default `False`.

### Changed
Expand Down
1 change: 1 addition & 0 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ website:
- docs/tutorials/advanced_examples/transfer_learning_bo.qmd
- docs/tutorials/advanced_examples/octane_number.qmd
- docs/tutorials/advanced_examples/llm_molecular.qmd
- docs/tutorials/advanced_examples/pairwise_gp.qmd
- section: "Benchmarks"
contents:
- docs/tutorials/benchmarks/index.qmd
Expand Down
37 changes: 35 additions & 2 deletions bofire/data_models/priors/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
NormalPrior,
)
from bofire.data_models.priors.prior import Prior
from bofire.data_models.priors.smoothedbox import SmoothedBoxPrior
from bofire.data_models.unions import tagged_union


Expand All @@ -32,11 +33,13 @@
LKJPrior,
LogNormalPrior,
DimensionalityScaledLogNormalPrior,
SmoothedBoxPrior,
]

AnyPrior = tagged_union(*_PRIOR_TYPES)

_PRIOR_CONSTRAINT_TYPES: list[type] = [
Interval,
NonTransformedInterval,
LogTransformedInterval,
Positive,
Expand All @@ -49,7 +52,9 @@

# these are priors that are generally applicable
# and do not depend on problem specific extra parameters
AnyGeneralPrior = tagged_union(GammaPrior, NormalPrior, LKJPrior, LogNormalPrior)
AnyGeneralPrior = tagged_union(
GammaPrior, NormalPrior, LKJPrior, LogNormalPrior, SmoothedBoxPrior
)

# default priors of interest
# botorch defaults
Expand All @@ -72,7 +77,7 @@
sd_prior=GammaPrior(concentration=2.0, rate=0.15),
)

# prior for RobustSingleTaskGPSurrogate
# priors for RobustSingleTaskGPSurrogate
ROBUSTGP_LENGTHSCALE_CONSTRAINT = partial(
NonTransformedInterval,
lower_bound=0.05,
Expand All @@ -87,6 +92,34 @@
initial_value=0.1,
)


# Priors for PairwiseGPSurrogate based on botorch defaults
PAIRWISEGP_LENGTHSCALE_PRIOR = partial(
GammaPrior,
concentration=2.4,
rate=2.7,
)

PAIRWISEGP_LENGTHSCALE_CONSTRAINT = partial(
GreaterThan,
lower_bound=1e-4,
initial_value=0.5185, # mode of the lengthscale GammaPrior(2.4, 2.7)
)

PAIRWISEGP_OUTPUTSCALE_PRIOR = partial(
SmoothedBoxPrior,
lower_bound=0.01,
upper_bound=100,
sigma=0.01,
)

PAIRWISEGP_OUTPUTSCALE_CONSTRAINT = partial(
Interval,
lower_bound=5e-3,
upper_bound=200,
initial_value=1,
)

# Hvarfner priors
HVARFNER_NOISE_PRIOR = partial(LogNormalPrior, loc=-4, scale=1)
HVARFNER_LENGTHSCALE_PRIOR = DimensionalityScaledLogNormalPrior
Expand Down
6 changes: 3 additions & 3 deletions bofire/data_models/priors/interval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Any, Literal, Optional
from typing import Literal, Optional

from pydantic import PositiveFloat, model_validator

from bofire.data_models.priors.constraint import PriorConstraint


class Interval(PriorConstraint):
"""Abstract Interval class.
"""Interval constraint on a GP hyperparameter.

It is used to define interval constraints on GP hyperparameters.

Expand All @@ -19,7 +19,7 @@ class Interval(PriorConstraint):
the raw parameter at its default (no warm-start).
"""

type: Any
type: Literal["Interval"] = "Interval"
Comment thread
jduerholt marked this conversation as resolved.
lower_bound: PositiveFloat
upper_bound: PositiveFloat
initial_value: Optional[PositiveFloat] = None
Expand Down
37 changes: 37 additions & 0 deletions bofire/data_models/priors/smoothedbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Literal

from pydantic import PositiveFloat, model_validator

from bofire.data_models.priors.prior import Prior


class SmoothedBoxPrior(Prior):
"""A smoothed approximation of a uniform prior.

.. math::

\begin{equation*}
B = {x: a_i <= x_i <= b_i}
d(x, B) = min_{x' in B} |x - x'|
pdf(x) \\sim exp(- d(x, B)**2 / sqrt(2 * sigma^2))
\\end{equation*}

Attributes:
lower_bound: lower bound of the uniform prior
upper_bound: upper bound of the uniform prior
sigma: related to pdf(x)

"""

type: Literal["SmoothedBoxPrior"] = "SmoothedBoxPrior"
lower_bound: float
upper_bound: float
sigma: PositiveFloat = 0.01

@model_validator(mode="after")
def validate_bounds(self):
if self.lower_bound >= self.upper_bound:
raise ValueError(
"The lower bound must be less than the upper bound for an interval."
)
return self
2 changes: 2 additions & 0 deletions bofire/data_models/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
MultiTaskGPHyperconfig,
MultiTaskGPSurrogate,
)
from bofire.data_models.surrogates.pairwise_gp import PairwiseGPSurrogate
from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
from bofire.data_models.surrogates.robust_single_task_gp import (
Expand Down Expand Up @@ -72,6 +73,7 @@
PiecewiseLinearGPSurrogate,
AdditiveMapSaasSingleTaskGPSurrogate,
EnsembleMapSaasSingleTaskGPSurrogate,
PairwiseGPSurrogate,
)

AnyTrainableSurrogate = tagged_union(
Expand Down
11 changes: 10 additions & 1 deletion bofire/data_models/surrogates/botorch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Type

from pydantic import Field, field_validator
from pydantic import Field, field_validator, model_validator

from bofire.data_models.domain.api import EngineeredFeatures
from bofire.data_models.domain.features import Inputs
from bofire.data_models.enum import CategoricalEncodingEnum
from bofire.data_models.features.api import (
Expand Down Expand Up @@ -37,6 +38,9 @@ class BotorchSurrogate(Surrogate):
categorical_encodings: InputTransformSpecs = Field(
default_factory=dict, validate_default=True
)
engineered_features: EngineeredFeatures = Field(
default_factory=lambda: EngineeredFeatures()
)

@field_validator("input_preprocessing_specs")
@classmethod
Expand Down Expand Up @@ -99,3 +103,8 @@ def validate_categorical_encodings(cls, v, info):
v = cls._generate_default_categorical_encodings(inputs, v)
inputs._validate_transform_specs(v)
return v

@model_validator(mode="after")
def validate_engineered_features(self):
self.engineered_features.validate_inputs(self.inputs)
return self
6 changes: 1 addition & 5 deletions bofire/data_models/surrogates/deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from pydantic import Field, field_validator, model_validator

from bofire.data_models.domain.api import EngineeredFeatures
from bofire.data_models.features.api import (
AnyOutput,
CategoricalInput,
Expand Down Expand Up @@ -67,9 +66,6 @@ def validate_mapping(self):

class LinearDeterministicSurrogate(BotorchSurrogate):
type: Literal["LinearDeterministicSurrogate"] = "LinearDeterministicSurrogate"
engineered_features: EngineeredFeatures = Field(
default_factory=lambda: EngineeredFeatures()
)
coefficients: Annotated[Dict[str, float], Field(min_length=1)]
intercept: float

Expand All @@ -93,7 +89,7 @@ def validate_input_types(self):

@field_validator("engineered_features")
@classmethod
def validate_engineered_features(cls, engineered_features, info):
def validate_linear_engineered_features(cls, engineered_features, info):
for feat in engineered_features.get():
if feat.n_transformed_inputs != 1:
raise ValueError(
Expand Down
68 changes: 68 additions & 0 deletions bofire/data_models/surrogates/pairwise_gp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Literal, Type

from pydantic import Field, model_validator

from bofire.data_models.features.api import AnyOutput, ContinuousOutput
from bofire.data_models.kernels.api import AnyKernel, RBFKernel, ScaleKernel
from bofire.data_models.priors.api import (
PAIRWISEGP_LENGTHSCALE_CONSTRAINT,
PAIRWISEGP_LENGTHSCALE_PRIOR,
PAIRWISEGP_OUTPUTSCALE_CONSTRAINT,
PAIRWISEGP_OUTPUTSCALE_PRIOR,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import AnyScaler, Normalize
from bofire.data_models.surrogates.trainable import TrainableSurrogate


class PairwiseGPSurrogate(BotorchSurrogate, TrainableSurrogate):
"""Pairwise Gaussian Process surrogate built on top of BoTorch's PairwiseGP.

Fits a latent utility function from binary winner/loser pair labels. The
`preferences` DataFrame references rows of the standard BoFire `experiments`
DataFrame by `labcode`; the single output feature represents the latent
utility inferred from those comparisons.

Attributes:
likelihood: The pairwise likelihood linking latent-utility differences
to preference probabilities -- ``"probit"`` (Gaussian comparison
noise, BoTorch's default) or ``"logit"`` (logistic noise, i.e. the
Bradley-Terry model).
"""

type: Literal["PairwiseGPSurrogate"] = "PairwiseGPSurrogate"

kernel: AnyKernel = Field(
default_factory=lambda: ScaleKernel(
base_kernel=RBFKernel(
ard=True,
lengthscale_prior=PAIRWISEGP_LENGTHSCALE_PRIOR(),
lengthscale_constraint=PAIRWISEGP_LENGTHSCALE_CONSTRAINT(),
),
outputscale_prior=PAIRWISEGP_OUTPUTSCALE_PRIOR(),
outputscale_constraint=PAIRWISEGP_OUTPUTSCALE_CONSTRAINT(),
)
)
scaler: AnyScaler = Field(default_factory=Normalize)
likelihood: Literal["probit", "logit"] = "probit"

@classmethod
def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool:
return isinstance(my_type, type(ContinuousOutput))

@model_validator(mode="after")
Comment thread
jduerholt marked this conversation as resolved.
def validate_single_output(self):
if len(self.outputs) != 1:
raise ValueError(
"PairwiseGPSurrogate supports exactly one output (the latent utility)."
)
return self

@model_validator(mode="after")
Comment thread
jduerholt marked this conversation as resolved.
def validate_scalekernel(self):
if not isinstance(self.kernel, ScaleKernel):
raise ValueError(
"PairwiseGPSurrogate.kernel must be a ScaleKernel "
"(BoTorch's PairwiseGP requires the covariance module to be a ScaleKernel)."
)
return self
14 changes: 2 additions & 12 deletions bofire/data_models/surrogates/trainable.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Annotated, Any, Literal, Optional

import pandas as pd
from pydantic import Field, field_validator, model_validator
from pydantic import Field, field_validator

from bofire.data_models.base import BaseModel
from bofire.data_models.domain.api import Domain, EngineeredFeatures, Inputs, Outputs
from bofire.data_models.domain.api import Domain, Inputs, Outputs
from bofire.data_models.enum import RegressionMetricsEnum, UQRegressionMetricsEnum
from bofire.data_models.features.api import ContinuousOutput
from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective
Expand Down Expand Up @@ -76,16 +76,6 @@ def _update_hyperparameters(surrogate_data, hyperparameters: pd.Series):

class TrainableSurrogate(BaseModel):
hyperconfig: Optional[Hyperconfig] = None
engineered_features: EngineeredFeatures = Field(
default_factory=lambda: EngineeredFeatures()
)

@model_validator(mode="after")
def validate_aggregations(self):
self.engineered_features.validate_inputs(
self.inputs # ty: ignore[unresolved-attribute]
)
return self

def update_hyperparameters(self, hyperparameters: pd.Series):
if self.hyperconfig is not None:
Expand Down
21 changes: 21 additions & 0 deletions bofire/priors/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,25 @@ def map_DimensionalityScaledLogNormalPrior(
)


def map_SmoothedBoxPrior(
data_model: data_models.SmoothedBoxPrior,
**kwargs,
) -> gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior:
return gpytorch.priors.smoothed_box_prior.SmoothedBoxPrior(
a=data_model.lower_bound, b=data_model.upper_bound, sigma=data_model.sigma
)


def map_Interval(
data_model: data_models.Interval,
) -> gpytorch.constraints.Interval:
return gpytorch.constraints.Interval(
lower_bound=data_model.lower_bound,
upper_bound=data_model.upper_bound,
initial_value=data_model.initial_value,
)


def map_NonTransformedInterval(
data_model: data_models.NonTransformedInterval,
) -> NonTransformedInterval:
Expand Down Expand Up @@ -155,6 +174,8 @@ def map_LessThan(
data_models.LKJPrior: map_LKJPrior,
data_models.LogNormalPrior: map_LogNormalPrior,
data_models.DimensionalityScaledLogNormalPrior: map_DimensionalityScaledLogNormalPrior,
data_models.SmoothedBoxPrior: map_SmoothedBoxPrior,
data_models.Interval: map_Interval,
data_models.NonTransformedInterval: map_NonTransformedInterval,
data_models.LogTransformedInterval: map_LogTransformedInterval,
data_models.Positive: map_Positive,
Expand Down
2 changes: 2 additions & 0 deletions bofire/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
RegressionMLPEnsemble,
)
from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
from bofire.surrogates.pairwise_gp import PairwiseGPSurrogate
from bofire.surrogates.pairwise_trainable import PairwiseTrainableSurrogate
from bofire.surrogates.random_forest import RandomForestSurrogate
from bofire.surrogates.shape import PiecewiseLinearGPSurrogate
from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
Expand Down
Loading
Loading