Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Botorch MultiTaskGP for transfer learning #484

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `BCUT2D` encoding for `SubstanceParameter`
- Stored benchmarking results now include the Python environment and version
- `qPSTD` acquisition function
- Models with `TaskParameter`s` now uses BoTorch's `MultiTaskGP` model.

### Changed
- Acquisition function indicator `is_mc` has been removed in favor of new indicators
Expand Down
79 changes: 64 additions & 15 deletions baybe/surrogates/gaussian_process/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from attrs.validators import instance_of
from typing_extensions import override

from baybe.parameters import TaskParameter
from baybe.parameters.base import Parameter
from baybe.searchspace.core import SearchSpace
from baybe.surrogates.base import Surrogate
Expand Down Expand Up @@ -111,6 +112,14 @@ class GaussianProcessSurrogate(Surrogate):
_model = field(init=False, default=None, eq=False)
"""The actual model."""

_task_stratified_outtransform: bool = field(default=False)
"""Should task-stratified output transform be used for multi-task model.

This is experimental and may be removed before merging to main.
Also, the StratifiedStandardise would need to be adapted to work
with multi-output models.
"""

@staticmethod
def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate:
"""Create a Gaussian process surrogate from one of the defined presets."""
Expand Down Expand Up @@ -156,7 +165,20 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
input_transform = botorch.models.transforms.Normalize(
train_x.shape[-1], bounds=context.parameter_bounds, indices=numerical_idxs
)
outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1])

if context.is_multitask and self._task_stratified_outtransform:
# TODO See https://github.com/pytorch/botorch/issues/2739
if train_y.shape[-1] != 1:
raise NotImplementedError(
"Task-stratified output transform currently does not support"
+ "multiple outputs."
)
outcome_transform = botorch.models.transforms.outcome.StratifiedStandardize(
task_values=train_x[..., context.task_idx].unique().to(torch.long),
stratification_idx=context.task_idx,
)
else:
outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1])

# extract the batch shape of the training data
batch_shape = train_x.shape[:-2]
Expand All @@ -169,37 +191,64 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
context.searchspace, train_x, train_y
).to_gpytorch(
ard_num_dims=train_x.shape[-1] - context.n_task_dimensions,
active_dims=numerical_idxs,
batch_shape=batch_shape,
# The active_dims parameter is omitted as it is not needed for both
# - single-task SingleTaskGP: all features are used
# - multi-task MultiTaskGP: the model splits task and non-task features
# before passing them to the covariance kernel
)

# create GP covariance
if not context.is_multitask:
covar_module = base_covar_module
else:
task_covar_module = gpytorch.kernels.IndexKernel(
num_tasks=context.n_tasks,
active_dims=context.task_idx,
rank=context.n_tasks, # TODO: make controllable
)
covar_module = base_covar_module * task_covar_module

# create GP likelihood
noise_prior = _default_noise_factory(context.searchspace, train_x, train_y)
likelihood = gpytorch.likelihoods.GaussianLikelihood(
noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape
)
likelihood.noise = torch.tensor([noise_prior[1]])

# Whether to use multi- or single-task model
if not context.is_multitask:
model_cls = botorch.models.SingleTaskGP
model_kwargs = {}
else:
model_cls = botorch.models.MultiTaskGP
# TODO
# It is assumed that there is only one task parameter with only
# one active value.
# One active task value is required for MultiTaskGP as else
# one posterior per task would be returned:
# https://github.com/pytorch/botorch/blob/a018a5ffbcbface6229d6c39f7ac6ef9baf5765e/botorch/models/gpytorch.py#L951
# TODO
# The below code implicitly assumes there is single task parameter,
# which is already checked in the SearchSpace.
task_param = [
p
for p in context.searchspace.discrete.parameters
if isinstance(p, TaskParameter)
][0]
if len(task_param.active_values) > 1:
raise NotImplementedError(
"Does not support multiple active task values."
)
model_kwargs = {
"task_feature": context.task_idx,
"output_tasks": [
task_param.comp_df.at[task_param.active_values[0], task_param.name]
],
"rank": context.n_tasks,
"task_covar_prior": None,
"all_tasks": task_param.comp_df[task_param.name].astype(int).to_list(),
}

# construct and fit the Gaussian process
self._model = botorch.models.SingleTaskGP(
self._model = model_cls(
train_x,
train_y,
input_transform=input_transform,
outcome_transform=outcome_transform,
mean_module=mean_module,
covar_module=covar_module,
covar_module=base_covar_module,
likelihood=likelihood,
**model_kwargs,
)

# TODO: This is still a temporary workaround to avoid overfitting seen in
Expand Down
63 changes: 63 additions & 0 deletions tests/test_transfer_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Tests for transfer-learning."""

from copy import deepcopy

import pandas as pd
import pytest

from baybe import Campaign
from baybe.objectives import SingleTargetObjective
from baybe.parameters import NumericalContinuousParameter, TaskParameter
from baybe.recommenders import (
BotorchRecommender,
RandomRecommender,
TwoPhaseMetaRecommender,
)
from baybe.searchspace import SearchSpace
from baybe.surrogates import GaussianProcessSurrogate
from baybe.targets import NumericalTarget
from baybe.utils.interval import Interval


# @pytest.mark.parametrize("task_stratified_outtransform", [True, False])
# @pytest.mark.parametrize("observed_test_data", [True, False])
@pytest.mark.parametrize(
"task_stratified_outtransform,observed_test_data",
([True, True], [False, True], [False, True]),
)
def test_recommendation(task_stratified_outtransform: bool, observed_test_data: bool):
"""Test a BO iteration with multi-task model."""
objective = SingleTargetObjective(target=NumericalTarget(name="y", mode="MAX"))
parameters = [
NumericalContinuousParameter(name="x", bounds=Interval(0, 10)),
TaskParameter(name="task", values=("A", "B"), active_values=("A",)),
]
searchspace = SearchSpace.from_product(parameters=parameters)
lookup = pd.DataFrame(
{
"x": [1.0, 2.0, 3.0, 4.0],
"y": [1.0, 2.0, 3.0, 4.0],
"task": ["A", "A", "B", "B"] if observed_test_data else ["B"] * 4,
}
)
campaign = deepcopy(
Campaign(
searchspace=searchspace,
objective=objective,
recommender=TwoPhaseMetaRecommender(
recommender=BotorchRecommender(
surrogate_model=GaussianProcessSurrogate(
task_stratified_outtransform=task_stratified_outtransform
)
),
initial_recommender=RandomRecommender(),
),
)
)
campaign.add_measurements(lookup)
_ = campaign.recommend(batch_size=1)


# TODO once stratified standardise works with recommendation
# without data for active task is fixed
# add the missing test where no active task data is added at the start
Loading