Upstream merge + pathwise test coverage + build + lint

Sahran Ashoor · Sahran Ashoor · commit 04ae7c4ecb29 · 2025-07-29T03:00:18.000-04:00
diff --git a/botorch/models/fully_bayesian_multitask.py b/botorch/models/fully_bayesian_multitask.py
@@ -24,7 +24,7 @@
 from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.posteriors.fully_bayesian import GaussianMixturePosterior
 from gpytorch.distributions import MultivariateNormal
-from gpytorch.kernels import MaternKernel
+from gpytorch.kernels import IndexKernel, MaternKernel
 from gpytorch.kernels.kernel import Kernel
 from gpytorch.likelihoods.likelihood import Likelihood
 from gpytorch.means.mean import Mean
diff --git a/botorch/optim/optimize_mixed.py b/botorch/optim/optimize_mixed.py
@@ -5,7 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 
 import dataclasses
+import itertools
+import random
 import warnings
+from collections.abc import Sequence
 from typing import Any, Callable
 
 import torch
@@ -745,6 +748,7 @@ def discrete_step(
 def continuous_step(
     opt_inputs: OptimizeAcqfInputs,
     discrete_dims: Tensor,
+    cat_dims: Tensor,
     current_x: Tensor,
 ) -> tuple[Tensor, Tensor]:
     """Continuous search using L-BFGS-B through optimize_acqf.
diff --git a/botorch/sampling/pathwise/paths.py b/botorch/sampling/pathwise/paths.py
@@ -7,7 +7,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from collections.abc import Callable, Iterable, Iterator, Mapping
+from collections.abc import Callable, Iterable, Mapping
 from string import ascii_letters
 from typing import Any
 
@@ -142,7 +142,6 @@ def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
             path.set_ensemble_as_batch(ensemble_as_batch)
 
 
-
 class GeneralizedLinearPath(SamplePath):
     r"""A sample path in the form of a generalized linear model."""
 
diff --git a/botorch/sampling/pathwise/prior_samplers.py b/botorch/sampling/pathwise/prior_samplers.py
@@ -149,19 +149,59 @@ def _draw_kernel_feature_paths_MultiTaskGP(
         else model._task_feature
     )
 
-    # NOTE: May want to use a `ProductKernel` instead in `MultiTaskGP`
-    base_kernel = deepcopy(model.covar_module)
-    base_kernel.active_dims = torch.LongTensor(
-        [index for index in range(train_X.shape[-1]) if index != task_index],
-        device=base_kernel.device,
-    )
-
-    task_kernel = deepcopy(model.task_covar_module)
-    task_kernel.active_dims = torch.tensor([task_index], device=base_kernel.device)
+    # Extract kernels from the product kernel structure
+    # model.covar_module is a ProductKernel
+    # containing data_covar_module * task_covar_module
+    from gpytorch.kernels import ProductKernel
+
+    if isinstance(model.covar_module, ProductKernel):
+        # Get the individual kernels from the product kernel
+        kernels = model.covar_module.kernels
+
+        # Find data and task kernels based on their active_dims
+        data_kernel = None
+        task_kernel = None
+
+        for kernel in kernels:
+            if hasattr(kernel, "active_dims") and kernel.active_dims is not None:
+                if task_index in kernel.active_dims:
+                    task_kernel = deepcopy(kernel)
+                else:
+                    data_kernel = deepcopy(kernel)
+            else:
+                # If no active_dims, it's likely the data kernel
+                data_kernel = deepcopy(kernel)
+                data_kernel.active_dims = torch.LongTensor(
+                    [
+                        index
+                        for index in range(train_X.shape[-1])
+                        if index != task_index
+                    ],
+                    device=data_kernel.device,
+                )
+
+        # If we couldn't find the task kernel, create it based on the structure
+        if task_kernel is None:
+            from gpytorch.kernels import IndexKernel
+
+            task_kernel = IndexKernel(
+                num_tasks=model.num_tasks,
+                rank=model._rank,
+                active_dims=[task_index],
+            ).to(device=model.covar_module.device, dtype=model.covar_module.dtype)
+
+        # Set task kernel active dims correctly
+        task_kernel.active_dims = torch.tensor([task_index], device=task_kernel.device)
+
+        # Use the existing product kernel structure
+        combined_kernel = data_kernel * task_kernel
+    else:
+        # Fallback to using the original covar_module directly
+        combined_kernel = model.covar_module
 
     return _draw_kernel_feature_paths_fallback(
         mean_module=model.mean_module,
-        covar_module=base_kernel * task_kernel,
+        covar_module=combined_kernel,
         input_transform=get_input_transform(model),
         output_transform=get_output_transform(model),
         num_ambient_inputs=num_ambient_inputs,
diff --git a/botorch/sampling/pathwise/update_strategies.py b/botorch/sampling/pathwise/update_strategies.py
@@ -172,17 +172,58 @@ def _draw_kernel_feature_paths_MultiTaskGP(
         if model._task_feature < 0
         else model._task_feature
     )
-    base_kernel = deepcopy(model.covar_module)
-    base_kernel.active_dims = torch.LongTensor(
-        [index for index in range(num_inputs) if index != task_index],
-        device=base_kernel.device,
-    )
-    task_kernel = deepcopy(model.task_covar_module)
-    task_kernel.active_dims = torch.LongTensor([task_index], device=base_kernel.device)
+
+    # Extract kernels from the product kernel structure
+    # model.covar_module is a ProductKernel
+    # containing data_covar_module * task_covar_module
+    from gpytorch.kernels import ProductKernel
+
+    if isinstance(model.covar_module, ProductKernel):
+        # Get the individual kernels from the product kernel
+        kernels = model.covar_module.kernels
+
+        # Find data and task kernels based on their active_dims
+        data_kernel = None
+        task_kernel = None
+
+        for kernel in kernels:
+            if hasattr(kernel, "active_dims") and kernel.active_dims is not None:
+                if task_index in kernel.active_dims:
+                    task_kernel = deepcopy(kernel)
+                else:
+                    data_kernel = deepcopy(kernel)
+            else:
+                # If no active_dims, it's likely the data kernel
+                data_kernel = deepcopy(kernel)
+                data_kernel.active_dims = torch.LongTensor(
+                    [index for index in range(num_inputs) if index != task_index],
+                    device=data_kernel.device,
+                )
+
+        # If we couldn't find the task kernel, create it based on the structure
+        if task_kernel is None:
+            from gpytorch.kernels import IndexKernel
+
+            task_kernel = IndexKernel(
+                num_tasks=model.num_tasks,
+                rank=model._rank,
+                active_dims=[task_index],
+            ).to(device=model.covar_module.device, dtype=model.covar_module.dtype)
+
+        # Set task kernel active dims correctly
+        task_kernel.active_dims = torch.LongTensor(
+            [task_index], device=task_kernel.device
+        )
+
+        # Use the existing product kernel structure
+        combined_kernel = data_kernel * task_kernel
+    else:
+        # Fallback to using the original covar_module directly
+        combined_kernel = model.covar_module
 
     # Return exact update using product kernel
     return _gaussian_update_exact(
-        kernel=base_kernel * task_kernel,
+        kernel=combined_kernel,
         points=points,
         target_values=target_values,
         sample_values=sample_values,
diff --git a/test/models/test_fully_bayesian_multitask.py b/test/models/test_fully_bayesian_multitask.py
@@ -31,7 +31,11 @@
 )
 from botorch.models import ModelList, ModelListGP
 from botorch.models.deterministic import GenericDeterministicModel
-from botorch.models.fully_bayesian import MCMC_DIM, MIN_INFERRED_NOISE_LEVEL
+from botorch.models.fully_bayesian import (
+    matern52_kernel,
+    MCMC_DIM,
+    MIN_INFERRED_NOISE_LEVEL,
+)
 from botorch.models.fully_bayesian_multitask import (
     MultitaskSaasPyroModel,
     SaasFullyBayesianMultiTaskGP,
@@ -46,7 +50,7 @@
 )
 from botorch.utils.test_helpers import gen_multi_task_dataset
 from botorch.utils.testing import BotorchTestCase
-from gpytorch.kernels import MaternKernel, ScaleKernel
+from gpytorch.kernels import IndexKernel, MaternKernel, ScaleKernel
 from gpytorch.likelihoods import FixedNoiseGaussianLikelihood
 from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
 from gpytorch.means import ConstantMean
diff --git a/test/optim/test_optimize_mixed.py b/test/optim/test_optimize_mixed.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+import random
 from dataclasses import fields
 from itertools import product
 from typing import Any, Callable
@@ -29,6 +30,7 @@
     continuous_step,
     discrete_step,
     generate_starting_points,
+    get_categorical_neighbors,
     get_nearest_neighbors,
     get_spray_points,
     MAX_DISCRETE_VALUES,
diff --git a/test/sampling/pathwise/helpers.py b/test/sampling/pathwise/helpers.py
@@ -84,7 +84,28 @@ def gen_random_inputs(
         tkwargs = {"device": train_X.device, "dtype": train_X.dtype}
         X = torch.rand((*batch_shape, train_X.shape[-1]), **tkwargs)
         if isinstance(model, models.MultiTaskGP):
-            num_tasks = model.task_covar_module.raw_var.shape[-1]
+            # Extract task kernel from the product kernel structure
+            from gpytorch.kernels import ProductKernel
+
+            if isinstance(model.covar_module, ProductKernel):
+                # Find the task kernel based on active_dims
+                task_kernel = None
+                for kernel in model.covar_module.kernels:
+                    if (
+                        hasattr(kernel, "active_dims")
+                        and kernel.active_dims is not None
+                    ):
+                        if model._task_feature in kernel.active_dims:
+                            task_kernel = kernel
+                            break
+
+                if task_kernel is not None and hasattr(task_kernel, "raw_var"):
+                    num_tasks = task_kernel.raw_var.shape[-1]
+                else:
+                    num_tasks = model.num_tasks
+            else:
+                num_tasks = model.num_tasks
+
             X[..., model._task_feature] = (
                 torch.randint(num_tasks, size=X.shape[:-1], **tkwargs)
                 if task_id is None
diff --git a/test/sampling/pathwise/test_posterior_samplers.py b/test/sampling/pathwise/test_posterior_samplers.py
diff --git a/test/sampling/pathwise/test_prior_samplers.py b/test/sampling/pathwise/test_prior_samplers.py
diff --git a/test/sampling/pathwise/test_update_strategies.py b/test/sampling/pathwise/test_update_strategies.py