emdgroup · Hrovatin · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `BCUT2D` encoding for `SubstanceParameter`
 - Stored benchmarking results now include the Python environment and version
 - `qPSTD` acquisition function
+- Models with `TaskParameter`s` now uses BoTorch's `MultiTaskGP` model.
 
 ### Changed
 - Acquisition function indicator `is_mc` has been removed in favor of new indicators 

@@ -9,6 +9,7 @@
 from attrs.validators import instance_of
 from typing_extensions import override
 
+from baybe.parameters import TaskParameter
 from baybe.parameters.base import Parameter
 from baybe.searchspace.core import SearchSpace
 from baybe.surrogates.base import Surrogate
@@ -111,6 +112,14 @@ class GaussianProcessSurrogate(Surrogate):
     _model = field(init=False, default=None, eq=False)
     """The actual model."""
 
+    _task_stratified_outtransform: bool = field(default=False)
+    """Should task-stratified output transform be used for multi-task model.
+
+    This is experimental and may be removed before merging to main.
+    Also, the StratifiedStandardise would need to be adapted to work
+    with multi-output models.
+    """
+
     @staticmethod
     def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate:
         """Create a Gaussian process surrogate from one of the defined presets."""
@@ -156,7 +165,20 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
         input_transform = botorch.models.transforms.Normalize(
             train_x.shape[-1], bounds=context.parameter_bounds, indices=numerical_idxs
         )
-        outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1])
+
+        if context.is_multitask and self._task_stratified_outtransform:
+            # TODO See https://github.com/pytorch/botorch/issues/2739
+            if train_y.shape[-1] != 1:
+                raise NotImplementedError(
+                    "Task-stratified output transform currently does not support"
+                    + "multiple outputs."
+                )
+            outcome_transform = botorch.models.transforms.outcome.StratifiedStandardize(
+                task_values=train_x[..., context.task_idx].unique().to(torch.long),
+                stratification_idx=context.task_idx,
+            )
+        else:
+            outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1])
 
         # extract the batch shape of the training data
         batch_shape = train_x.shape[:-2]
@@ -169,37 +191,64 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
             context.searchspace, train_x, train_y
         ).to_gpytorch(
             ard_num_dims=train_x.shape[-1] - context.n_task_dimensions,
-            active_dims=numerical_idxs,
             batch_shape=batch_shape,
+            # The active_dims parameter is omitted as it is not needed for both
+            # - single-task SingleTaskGP: all features are used
+            # - multi-task MultiTaskGP: the model splits task and non-task features
+            #   before passing them to the covariance kernel
         )
 
-        # create GP covariance
-        if not context.is_multitask:
-            covar_module = base_covar_module
-        else:
-            task_covar_module = gpytorch.kernels.IndexKernel(
-                num_tasks=context.n_tasks,
-                active_dims=context.task_idx,
-                rank=context.n_tasks,  # TODO: make controllable
-            )
-            covar_module = base_covar_module * task_covar_module
-
         # create GP likelihood
         noise_prior = _default_noise_factory(context.searchspace, train_x, train_y)
         likelihood = gpytorch.likelihoods.GaussianLikelihood(
             noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape
         )
         likelihood.noise = torch.tensor([noise_prior[1]])
 
+        # Whether to use multi- or single-task model
+        if not context.is_multitask:
+            model_cls = botorch.models.SingleTaskGP
+            model_kwargs = {}
+        else:
+            model_cls = botorch.models.MultiTaskGP
+            # TODO
+            #  It is assumed that there is only one task parameter with only
+            #  one active value.
+            #  One active task value is required for MultiTaskGP as else
+            #  one posterior per task would be returned:
+            #  https://github.com/pytorch/botorch/blob/a018a5ffbcbface6229d6c39f7ac6ef9baf5765e/botorch/models/gpytorch.py#L951
+            # TODO
+            #  The below code implicitly assumes there is single task parameter,
+            #  which is already checked in the SearchSpace.
+            task_param = [
+                p
+                for p in context.searchspace.discrete.parameters
+                if isinstance(p, TaskParameter)
+            ][0]
+            if len(task_param.active_values) > 1:
+                raise NotImplementedError(
+                    "Does not support multiple active task values."
+                )
+            model_kwargs = {
+                "task_feature": context.task_idx,
+                "output_tasks": [
+                    task_param.comp_df.at[task_param.active_values[0], task_param.name]
+                ],
+                "rank": context.n_tasks,
+                "task_covar_prior": None,
+                "all_tasks": task_param.comp_df[task_param.name].astype(int).to_list(),
+            }
+
         # construct and fit the Gaussian process
-        self._model = botorch.models.SingleTaskGP(
+        self._model = model_cls(
             train_x,
             train_y,
             input_transform=input_transform,
             outcome_transform=outcome_transform,
             mean_module=mean_module,
-            covar_module=covar_module,
+            covar_module=base_covar_module,
             likelihood=likelihood,
+            **model_kwargs,
         )
 
         # TODO: This is still a temporary workaround to avoid overfitting seen in

@@ -0,0 +1,63 @@
+"""Tests for transfer-learning."""
+
+from copy import deepcopy
+
+import pandas as pd
+import pytest
+
+from baybe import Campaign
+from baybe.objectives import SingleTargetObjective
+from baybe.parameters import NumericalContinuousParameter, TaskParameter
+from baybe.recommenders import (
+    BotorchRecommender,
+    RandomRecommender,
+    TwoPhaseMetaRecommender,
+)
+from baybe.searchspace import SearchSpace
+from baybe.surrogates import GaussianProcessSurrogate
+from baybe.targets import NumericalTarget
+from baybe.utils.interval import Interval
+
+
+# @pytest.mark.parametrize("task_stratified_outtransform", [True, False])
+# @pytest.mark.parametrize("observed_test_data", [True, False])
+@pytest.mark.parametrize(
+    "task_stratified_outtransform,observed_test_data",
+    ([True, True], [False, True], [False, True]),
+)
+def test_recommendation(task_stratified_outtransform: bool, observed_test_data: bool):
+    """Test a BO iteration with multi-task model."""
+    objective = SingleTargetObjective(target=NumericalTarget(name="y", mode="MAX"))
+    parameters = [
+        NumericalContinuousParameter(name="x", bounds=Interval(0, 10)),
+        TaskParameter(name="task", values=("A", "B"), active_values=("A",)),
+    ]
+    searchspace = SearchSpace.from_product(parameters=parameters)
+    lookup = pd.DataFrame(
+        {
+            "x": [1.0, 2.0, 3.0, 4.0],
+            "y": [1.0, 2.0, 3.0, 4.0],
+            "task": ["A", "A", "B", "B"] if observed_test_data else ["B"] * 4,
+        }
+    )
+    campaign = deepcopy(
+        Campaign(
+            searchspace=searchspace,
+            objective=objective,
+            recommender=TwoPhaseMetaRecommender(
+                recommender=BotorchRecommender(
+                    surrogate_model=GaussianProcessSurrogate(
+                        task_stratified_outtransform=task_stratified_outtransform
+                    )
+                ),
+                initial_recommender=RandomRecommender(),
+            ),
+        )
+    )
+    campaign.add_measurements(lookup)
+    _ = campaign.recommend(batch_size=1)
+
+
+# TODO once stratified standardise works with recommendation
+#  without data for active task is fixed
+#  add the missing test where no active task data is added at the start