Skip to content

Commit a2f2ef5

Browse files
committed
Merge progress from pathwise-test-coverage branch
1 parent 0321b49 commit a2f2ef5

40 files changed

+5105
-1824
lines changed

botorch/acquisition/knowledge_gradient.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def evaluate(self, X: Tensor, bounds: Tensor, **kwargs: Any) -> Tensor:
223223
kwargs: Additional keyword arguments. This includes the options for
224224
optimization of the inner problem, i.e. `num_restarts`, `raw_samples`,
225225
an `options` dictionary to be passed on to the optimization helpers, and
226-
a `scipy_options` dictionary to be passed to `scipy.optimize.minimize`.
226+
a `scipy_options` dictionary to be passed to `scipy.minimize`.
227227
228228
Returns:
229229
A Tensor of shape `b`. For t-batch b, the q-KG value of the design

botorch/generation/gen.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ def gen_candidates_scipy(
5656
5757
Optimizes an acquisition function starting from a set of initial candidates
5858
using `scipy.optimize.minimize` via a numpy converter.
59-
We use SLSQP, if constraints are present, and LBFGS-B otherwise.
60-
As `scipy.optimize.minimize` does not support optimizating a batch of problems, we
61-
treat optimizing a set of candidates as a single optimization problem by
62-
summing together their acquisition values.
6359
6460
Args:
6561
initial_conditions: Starting points for optimization, with shape
@@ -86,7 +82,7 @@ def gen_candidates_scipy(
8682
`optimize_acqf()`. The constraints will later be passed to the scipy
8783
solver.
8884
options: Options used to control the optimization including "method"
89-
and "maxiter". Select method for `scipy.optimize.minimize` using the
85+
and "maxiter". Select method for `scipy.minimize` using the
9086
"method" key. By default uses L-BFGS-B for box-constrained problems
9187
and SLSQP if inequality or equality constraints are present. If
9288
`with_grad=False`, then we use a two-point finite difference estimate
@@ -447,13 +443,13 @@ def _process_scipy_result(res: OptimizeResult, options: dict[str, Any]) -> None:
447443
or "Iteration limit reached" in res.message
448444
):
449445
logger.info(
450-
"`scipy.optimize.minimize` exited by reaching the iteration limit of "
446+
"`scipy.minimize` exited by reaching the iteration limit of "
451447
f"`maxiter: {options.get('maxiter')}`."
452448
)
453449
elif "EVALUATIONS EXCEEDS LIMIT" in res.message:
454450
logger.info(
455-
"`scipy.optimize.minimize` exited by reaching the function evaluation "
456-
f"limit of `maxfun: {options.get('maxfun')}`."
451+
"`scipy.minimize` exited by reaching the function evaluation limit of "
452+
f"`maxfun: {options.get('maxfun')}`."
457453
)
458454
elif "Optimization timed out after" in res.message:
459455
logger.info(res.message)

botorch/models/fully_bayesian_multitask.py

Lines changed: 41 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@
1818
reshape_and_detach,
1919
SaasPyroModel,
2020
)
21-
from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
2221
from botorch.models.multitask import MultiTaskGP
2322
from botorch.models.transforms.input import InputTransform
2423
from botorch.models.transforms.outcome import OutcomeTransform
2524
from botorch.posteriors.fully_bayesian import GaussianMixturePosterior, MCMC_DIM
26-
from gpytorch.distributions import MultivariateNormal
25+
from gpytorch.distributions.multivariate_normal import MultivariateNormal
2726
from gpytorch.kernels import MaternKernel
28-
from gpytorch.kernels.index_kernel import IndexKernel
2927
from gpytorch.kernels.kernel import Kernel
3028
from gpytorch.likelihoods.likelihood import Likelihood
3129
from gpytorch.means.mean import Mean
@@ -134,7 +132,7 @@ def sample_task_lengthscale(
134132

135133
def load_mcmc_samples(
136134
self, mcmc_samples: dict[str, Tensor]
137-
) -> tuple[Mean, Kernel, Likelihood, Kernel]:
135+
) -> tuple[Mean, Kernel, Likelihood, Kernel, Parameter]:
138136
r"""Load the MCMC samples into the mean_module, covar_module, and likelihood."""
139137
tkwargs = {"device": self.train_X.device, "dtype": self.train_X.dtype}
140138
num_mcmc_samples = len(mcmc_samples["mean"])
@@ -144,32 +142,27 @@ def load_mcmc_samples(
144142
mcmc_samples=mcmc_samples
145143
)
146144

147-
latent_covar_module = MaternKernel(
145+
task_covar_module = MaternKernel(
148146
nu=2.5,
149147
ard_num_dims=self.task_rank,
150148
batch_shape=batch_shape,
151149
).to(**tkwargs)
152-
latent_covar_module.lengthscale = reshape_and_detach(
153-
target=latent_covar_module.lengthscale,
150+
task_covar_module.lengthscale = reshape_and_detach(
151+
target=task_covar_module.lengthscale,
154152
new_value=mcmc_samples["task_lengthscale"],
155153
)
156-
latent_features = mcmc_samples["latent_features"]
157-
task_covar = latent_covar_module(latent_features)
158-
task_covar_module = IndexKernel(
159-
num_tasks=self.num_tasks,
160-
rank=self.task_rank,
161-
batch_shape=latent_features.shape[:-2],
154+
latent_features = Parameter(
155+
torch.rand(
156+
batch_shape + torch.Size([self.num_tasks, self.task_rank]),
157+
requires_grad=True,
158+
**tkwargs,
159+
)
162160
)
163-
task_covar_module.covar_factor = Parameter(
164-
task_covar.cholesky().to_dense().detach()
161+
latent_features = reshape_and_detach(
162+
target=latent_features,
163+
new_value=mcmc_samples["latent_features"],
165164
)
166-
167-
# NOTE: 'var' is implicitly assumed to be zero from the sampling procedure in
168-
# the FBMTGP model but not in the regular MTGP. I dont how if the var parameter
169-
# affects predictions in practice, but setting it to zero is consistent with the
170-
# previous implementation.
171-
task_covar_module.var = torch.zeros_like(task_covar_module.var)
172-
return mean_module, covar_module, likelihood, task_covar_module
165+
return mean_module, covar_module, likelihood, task_covar_module, latent_features
173166

174167

175168
class SaasFullyBayesianMultiTaskGP(MultiTaskGP):
@@ -368,6 +361,7 @@ def load_mcmc_samples(self, mcmc_samples: dict[str, Tensor]) -> None:
368361
self.covar_module,
369362
self.likelihood,
370363
self.task_covar_module,
364+
self.latent_features,
371365
) = self.pyro_model.load_mcmc_samples(mcmc_samples=mcmc_samples)
372366

373367
def posterior(
@@ -397,7 +391,30 @@ def posterior(
397391

398392
def forward(self, X: Tensor) -> MultivariateNormal:
399393
self._check_if_fitted()
400-
return super().forward(X)
394+
x_basic, task_idcs = self._split_inputs(X)
395+
396+
mean_x = self.mean_module(x_basic)
397+
covar_x = self.covar_module(x_basic)
398+
399+
tsub_idcs = task_idcs.squeeze(-1)
400+
if tsub_idcs.ndim > 1:
401+
tsub_idcs = tsub_idcs.squeeze(-2)
402+
latent_features = self.latent_features[:, tsub_idcs, :]
403+
404+
if X.ndim > 3:
405+
# batch eval mode
406+
# for X (batch_shape x num_samples x q x d), task_idcs[:,i,:,] are the same
407+
# reshape X to (batch_shape x num_samples x q x d)
408+
latent_features = latent_features.permute(
409+
[-i for i in range(X.ndim - 1, 2, -1)]
410+
+ [0]
411+
+ [-i for i in range(2, 0, -1)]
412+
)
413+
414+
# Combine the two in an ICM fashion
415+
covar_i = self.task_covar_module(latent_features)
416+
covar = covar_x.mul(covar_i)
417+
return MultivariateNormal(mean_x, covar)
401418

402419
def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
403420
r"""Custom logic for loading the state dict.
@@ -439,40 +456,7 @@ def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
439456
self.covar_module,
440457
self.likelihood,
441458
self.task_covar_module,
459+
self.latent_features,
442460
) = self.pyro_model.load_mcmc_samples(mcmc_samples=mcmc_samples)
443461
# Load the actual samples from the state dict
444462
super().load_state_dict(state_dict=state_dict, strict=strict)
445-
446-
def condition_on_observations(
447-
self, X: Tensor, Y: Tensor, **kwargs: Any
448-
) -> BatchedMultiOutputGPyTorchModel:
449-
"""Conditions on additional observations for a Fully Bayesian model (either
450-
identical across models or unique per-model).
451-
452-
Args:
453-
X: A `batch_shape x num_samples x d`-dim Tensor, where `d` is
454-
the dimension of the feature space and `batch_shape` is the number of
455-
sampled models.
456-
Y: A `batch_shape x num_samples x 1`-dim Tensor, where `d` is
457-
the dimension of the feature space and `batch_shape` is the number of
458-
sampled models.
459-
460-
Returns:
461-
BatchedMultiOutputGPyTorchModel: A fully bayesian model conditioned on
462-
given observations. The returned model has `batch_shape` copies of the
463-
training data in case of identical observations (and `batch_shape`
464-
training datasets otherwise).
465-
"""
466-
if X.ndim == 2 and Y.ndim == 2:
467-
# To avoid an error in GPyTorch when inferring the batch dimension, we add
468-
# the explicit batch shape here. The result is that the conditioned model
469-
# will have 'batch_shape' copies of the training data.
470-
X = X.repeat(self.batch_shape + (1, 1))
471-
Y = Y.repeat(self.batch_shape + (1, 1))
472-
473-
elif X.ndim < Y.ndim:
474-
# We need to duplicate the training data to enable correct batch
475-
# size inference in gpytorch.
476-
X = X.repeat(*(Y.shape[:-2] + (1, 1)))
477-
478-
return super().condition_on_observations(X, Y, **kwargs)

botorch/models/gpytorch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,7 @@ def _apply_noise(
816816
self,
817817
X: Tensor,
818818
mvn: MultivariateNormal,
819+
num_outputs: int,
819820
observation_noise: bool | Tensor,
820821
) -> MultivariateNormal:
821822
"""Adds the observation noise to the posterior.
@@ -947,6 +948,7 @@ def posterior(
947948
mvn = self._apply_noise(
948949
X=X_full,
949950
mvn=mvn,
951+
num_outputs=num_outputs,
950952
observation_noise=observation_noise,
951953
)
952954
# If single-output, return the posterior of a single-output model

botorch/optim/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ def scipy_minimize(
7878
bounds: A dictionary mapping parameter names to lower and upper bounds.
7979
callback: A callable taking `parameters` and an OptimizationResult as arguments.
8080
x0: An optional initialization vector passed to scipy.optimize.minimize.
81-
method: Solver type, passed along to scipy.optimize.minimize.
82-
options: Dictionary of solver options, passed along to scipy.optimize.minimize.
81+
method: Solver type, passed along to scipy.minimize.
82+
options: Dictionary of solver options, passed along to scipy.minimize.
8383
timeout_sec: Timeout in seconds to wait before aborting the optimization loop
8484
if not converged (will return the best found solution thus far).
8585

botorch/optim/fit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def fit_gpytorch_mll_scipy(
6969
Responsible for setting the `grad` attributes of `parameters`. If no closure
7070
is provided, one will be obtained by calling `get_loss_closure_with_grads`.
7171
closure_kwargs: Keyword arguments passed to `closure`.
72-
method: Solver type, passed along to scipy.optimize.minimize.
73-
options: Dictionary of solver options, passed along to scipy.optimize.minimize.
72+
method: Solver type, passed along to scipy.minimize.
73+
options: Dictionary of solver options, passed along to scipy.minimize.
7474
callback: Optional callback taking `parameters` and an OptimizationResult as its
7575
sole arguments.
7676
timeout_sec: Timeout in seconds after which to terminate the fitting loop

botorch/optim/optimize.py

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -528,29 +528,7 @@ def optimize_acqf(
528528
retry_on_optimization_warning: bool = True,
529529
**ic_gen_kwargs: Any,
530530
) -> tuple[Tensor, Tensor]:
531-
r"""Optimize the acquisition function for a single or multiple joint candidates.
532-
533-
A high-level description (missing exceptions for special setups):
534-
535-
This function optimizes the acquisition function `acq_function` in two steps:
536-
537-
i) It will sample `raw_samples` random points using Sobol sampling in the bounds
538-
`bounds` and pass on the "best" `num_restarts` many.
539-
The default way to find these "best" is via `gen_batch_initial_conditions`
540-
(deviating for some acq functions, see `get_ic_generator`),
541-
which by default performs Boltzmann sampling on the acquisition function value
542-
(The behavior of step (i) can be further controlled by specifying `ic_generator`
543-
or `batch_initial_conditions`.)
544-
545-
ii) A batch of the `num_restarts` points (or joint sets of points)
546-
with the highest acquisition values in the previous step are then further
547-
optimized. This is by default done by LBFGS-B optimization, if no constraints are
548-
present, and SLSQP, if constraints are present (can be changed to
549-
other optmizers via `gen_candidates`).
550-
551-
While the optimization procedure runs on CPU by default for this function,
552-
the acq_function can be implemented on GPU and simply move the inputs
553-
to GPU internally.
531+
r"""Generate a set of candidates via multi-start optimization.
554532
555533
Args:
556534
acq_function: An AcquisitionFunction.
@@ -559,13 +537,10 @@ def optimize_acqf(
559537
+inf, respectively).
560538
q: The number of candidates.
561539
num_restarts: The number of starting points for multistart acquisition
562-
function optimization. Even though the name suggests this happens
563-
sequentually, it is done in parallel (using batched evaluations)
564-
for up to `options.batch_limit` candidates (by default completely parallel).
540+
function optimization.
565541
raw_samples: The number of samples for initialization. This is required
566542
if `batch_initial_conditions` is not specified.
567-
options: Options for both optimization, passed to `gen_candidates`,
568-
and initialization, passed to the `ic_generator` via the `options` kwarg.
543+
options: Options for candidate generation.
569544
inequality_constraints: A list of tuples (indices, coefficients, rhs),
570545
with each tuple encoding an inequality constraint of the form
571546
`\sum_i (X[indices[i]] * coefficients[i]) >= rhs`. `indices` and
@@ -611,11 +586,10 @@ def optimize_acqf(
611586
acquisition values) given a tensor of initial conditions and an
612587
acquisition function. Other common inputs include lower and upper bounds
613588
and a dictionary of options, but refer to the documentation of specific
614-
generation functions (e.g., botorch.optim.optimize.gen_candidates_scipy
615-
and botorch.generation.gen.gen_candidates_torch) for method-specific
616-
inputs. Default: `gen_candidates_scipy`
589+
generation functions (e.g gen_candidates_scipy and gen_candidates_torch)
590+
for method-specific inputs. Default: `gen_candidates_scipy`
617591
sequential: If False, uses joint optimization, otherwise uses sequential
618-
optimization for optimizing multiple joint candidates (q > 1).
592+
optimization.
619593
ic_generator: Function for generating initial conditions. Not needed when
620594
`batch_initial_conditions` are provided. Defaults to
621595
`gen_one_shot_kg_initial_conditions` for `qKnowledgeGradient` acquisition

0 commit comments

Comments
 (0)