utility for greedily selecting an approximate HV maximizing subset (#2936)

sdaulton · facebook-github-bot · commit 29877b8c6c51 · 2025-07-24T14:06:09.000-07:00
Summary: Pull Request resolved: #2936 This adds a utility for obtaining an approximate HV maximizing set using a sequential greedy algorithm. Reviewed By: bletham Differential Revision: D77696625 fbshipit-source-id: 5f1203d99c5422c5691acf64348e4ce5847f5640
diff --git a/botorch/utils/multi_objective/hypervolume.py b/botorch/utils/multi_objective/hypervolume.py
@@ -21,6 +21,8 @@
 
 from __future__ import annotations
 
+import random
+
 import warnings
 from collections.abc import Callable
 from copy import deepcopy
@@ -833,3 +835,56 @@ def _hypervolumes(self) -> Tensor:
             .to(self.ref_point)  # for m > 2, the partitioning is on the CPU
             .view(self._batch_sample_shape)
         )
+
+
+def get_hypervolume_maximizing_subset(
+    n: int, Y: Tensor, ref_point: Tensor
+) -> tuple[Tensor, Tensor]:
+    """Find an approximately hypervolume-maximizing subset of size `n`.
+
+    This greedily selects points from Y to maximize the hypervolume of
+    the subset sequentially. This has bounded error since hypervolume is
+    submodular.
+
+    Args:
+        n: The size of the subset to return.
+        Y: A `n' x m`-dim tensor of outcomes.
+        ref_point: A `m`-dim tensor containing the reference point.
+
+    Returns:
+        A two-element tuple containing
+            - A `n x m`-dim tensor of outcomes.
+            - A `n`-dim tensor of indices of the outcomes in the original set.
+    """
+    if Y.ndim != 2:
+        raise NotImplementedError(
+            "Only two dimensions are supported (no additional) batch dims."
+        )
+    elif Y.shape[0] < n:
+        raise ValueError(
+            f"Y has fewer points ({Y.shape[0]}) than the requested subset size ({n})."
+        )
+    Y_subset = torch.zeros(0, Y.shape[1], dtype=Y.dtype, device=Y.device)
+    selected_indices = []
+    remaining_idcs = set(range(Y.shape[0]))
+    best_hv = 0.0
+    for _ in range(n):
+        # Add each point and compute the hypervolume
+        best_idx = None
+        for i in remaining_idcs:
+            partitioning = DominatedPartitioning(
+                ref_point=ref_point, Y=torch.cat((Y_subset, Y[i : i + 1]), dim=0)
+            )
+            hv = partitioning.compute_hypervolume().item()
+            if hv > best_hv:
+                best_idx = i
+                best_hv = hv
+        if best_idx is None:
+            # no arm improved HV, so select a random arm. This will only happen if Y is
+            # not a Pareto frontier, where all points are better than the reference
+            # point
+            best_idx = random.choice(list(remaining_idcs))
+        remaining_idcs.remove(best_idx)
+        selected_indices.append(best_idx)
+        Y_subset = torch.cat((Y_subset, Y[best_idx : best_idx + 1]), dim=0)
+    return Y_subset, torch.tensor(selected_indices, dtype=torch.long, device=Y.device)
diff --git a/test/utils/multi_objective/test_hypervolume.py b/test/utils/multi_objective/test_hypervolume.py
@@ -8,7 +8,11 @@
 
 import torch
 from botorch.exceptions.errors import BotorchError, BotorchTensorDimensionError
-from botorch.utils.multi_objective.hypervolume import Hypervolume, infer_reference_point
+from botorch.utils.multi_objective.hypervolume import (
+    get_hypervolume_maximizing_subset,
+    Hypervolume,
+    infer_reference_point,
+)
 from botorch.utils.testing import BotorchTestCase
 
 EPS = 1e-4
@@ -300,3 +304,64 @@ def test_infer_reference_point(self):
                     pareto_Y=Y[:0],
                     max_ref_point=torch.tensor([float("nan"), -1e5], **tkwargs),
                 )
+
+
+class TestGetHypervolumeMaximizingSubset(BotorchTestCase):
+    def test_get_hypervolume_maximizing_subset(self) -> None:
+        # test invalid shapes
+        ref_point = torch.torch.zeros(2)
+        for invalid_y in (torch.zeros(2), torch.zeros(1, 1, 2)):
+            with self.assertRaisesRegex(
+                NotImplementedError,
+                r"Only two dimensions are supported \(no additional\) batch dims.",
+            ):
+                get_hypervolume_maximizing_subset(n=1, Y=invalid_y, ref_point=ref_point)
+            # test n > Y.shape[0]
+            with self.assertRaisesRegex(
+                ValueError,
+                r"Y has fewer points \(1\) than the requested subset size \(2\).",
+            ):
+                get_hypervolume_maximizing_subset(
+                    n=2, Y=torch.zeros(1, 2), ref_point=ref_point
+                )
+        for dtype in (torch.float, torch.double):
+            Y = torch.tensor(
+                [
+                    [-13.9599, -24.0326],
+                    [-19.6755, -11.4721],
+                    [-18.7742, -11.9193],
+                    [-16.6614, -12.3283],
+                    [-17.7663, -11.9941],
+                    [-17.4367, -12.2948],
+                    [-19.4244, -11.9158],
+                    [-14.0806, -22.0004],
+                ],
+                dtype=dtype,
+                device=self.device,
+            )
+            ref_point = torch.tensor([-20.0, -20.0], dtype=dtype, device=self.device)
+
+            Y_subset, idcs = get_hypervolume_maximizing_subset(
+                n=3, Y=Y, ref_point=ref_point
+            )
+            self.assertTrue(torch.equal(Y_subset, Y[idcs]))
+            self.assertTrue(
+                torch.equal(
+                    idcs, torch.tensor([3, 4, 1], dtype=torch.long, device=self.device)
+                )
+            )
+            # test without `n` pareto optimal points
+            Y = torch.tensor(
+                [[-5.0, -5.0], [-10.0, -10.0]],
+                dtype=dtype,
+                device=self.device,
+            )
+            Y_subset, idcs = get_hypervolume_maximizing_subset(
+                n=2, Y=Y, ref_point=ref_point
+            )
+            self.assertTrue(torch.equal(Y_subset, Y))
+            self.assertTrue(
+                torch.equal(
+                    idcs, torch.tensor([0, 1], dtype=torch.long, device=self.device)
+                )
+            )