Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Oct 29, 2025

📄 8% (0.08x) speedup for ConstrainedLogEHVI.eval_acqf in optuna/_gp/acqf.py

⏱️ Runtime : 723 microseconds 668 microseconds (best of 56 runs)

📝 Explanation and details

Key Optimizations:

  • In LogEHVI.eval_acqf, batch computes GP mean/variance for all objectives, and builds Y_post in a fully vectorized fashion, avoiding the slow Python loop that repeatedly called .append() and .stack().
  • Ensures all tensors and samples are allocated on the same device/dtype for fast computation, especially important for CUDA acceleration.
  • Where possible, uses copy=False on .to(...) to reduce unnecessary memory allocation.
  • In both constructors and evaluation logic, moves fixed samples and precomputed tensors to the proper device before first use, eliminating repeated transfer overhead.
  • Preserves all comments, signatures, and preserves the original algorithm and design precisely.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 17 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
import pytest  # used for our unit tests
import torch
from optuna._gp.acqf import ConstrainedLogEHVI

# --- Minimal mocks for GPRegressor and SearchSpace ---

class DummySearchSpace:
    pass  # Not used in eval_acqf, so a stub suffices.

class DummyGPRegressor:
    """A simple GPRegressor mock for deterministic mean/var output."""
    def __init__(self, mean, var):
        # mean and var can be scalars or tensors
        self.length_scales = [1.0]  # arbitrary
        self._mean = mean
        self._var = var
        self._called_with = None

    def posterior(self, x):
        # Save the input for inspection if needed
        self._called_with = x
        # Return mean and var broadcasted to x.shape[:-1] if x is multidimensional
        shape = x.shape[:-1] if x.ndim > 1 else ()
        mean = torch.full(shape, self._mean, dtype=torch.float64)
        var = torch.full(shape, self._var, dtype=torch.float64)
        return mean, var

# --- Imports from the provided code: BaseAcquisitionFunc, LogPI, LogEHVI, ConstrainedLogEHVI ---

# Re-define _EPS here for use in tests
_EPS = 1e-12

# --- Basic Test Cases ---




def test_constrained_logehvi_basic_sum():
    # Test ConstrainedLogEHVI: sum of constraint and objective acqf
    Y_train = torch.tensor([[1.0, 2.0], [3.0, 1.5]], dtype=torch.float64)
    gpr1 = DummyGPRegressor(mean=1.0, var=1.0)
    gpr2 = DummyGPRegressor(mean=2.0, var=2.0)
    constraint_gpr = DummyGPRegressor(mean=0.0, var=1.0)
    acqf = ConstrainedLogEHVI(
        gpr_list=[gpr1, gpr2],
        search_space=DummySearchSpace(),
        Y_feasible=Y_train,
        n_qmc_samples=5,
        qmc_seed=123,
        constraints_gpr_list=[constraint_gpr],
        constraints_threshold_list=[0.0]
    )
    x = torch.tensor([[1.0, 2.0]], dtype=torch.float64)
    codeflash_output = acqf.eval_acqf(x); result = codeflash_output # 261μs -> 233μs (11.9% faster)

# --- Edge Test Cases ---









def test_constrained_logehvi_multiple_constraints():
    # Test ConstrainedLogEHVI with multiple constraints
    Y_train = torch.rand((5, 2), dtype=torch.float64)
    gpr_list = [DummyGPRegressor(mean=1.0, var=1.0) for _ in range(2)]
    constraint_gprs = [DummyGPRegressor(mean=0.0, var=1.0), DummyGPRegressor(mean=2.0, var=2.0)]
    thresholds = [0.0, 1.0]
    acqf = ConstrainedLogEHVI(
        gpr_list=gpr_list,
        search_space=DummySearchSpace(),
        Y_feasible=Y_train,
        n_qmc_samples=100,
        qmc_seed=123,
        constraints_gpr_list=constraint_gprs,
        constraints_threshold_list=thresholds
    )
    x = torch.rand((10, 2), dtype=torch.float64)
    codeflash_output = acqf.eval_acqf(x); result = codeflash_output # 342μs -> 321μs (6.60% faster)


#------------------------------------------------
import math

# imports
import pytest  # used for our unit tests
import torch
from optuna._gp.acqf import ConstrainedLogEHVI


# Minimal stub for SearchSpace (since it's not used in eval_acqf logic)
class DummySearchSpace:
    pass

# Minimal GPRegressor implementation for testing
class DummyGPRegressor:
    def __init__(self, mean, var, length_scales):
        self._mean = mean
        self._var = var
        self.length_scales = length_scales

    def posterior(self, x):
        # Always returns fixed mean and variance for deterministic tests
        shape = x.shape[:-1] if x.ndim > 1 else ()
        mean = torch.full(shape, self._mean, dtype=torch.float64)
        var = torch.full(shape, self._var, dtype=torch.float64)
        return mean, var
from optuna._gp.acqf import ConstrainedLogEHVI

# ---------------- BASIC TEST CASES ----------------




def test_constrained_logehvi_basic():
    # Test ConstrainedLogEHVI with one constraint
    gpr1 = DummyGPRegressor(mean=1.0, var=1.0, length_scales=[1.0])
    constraint_gpr = DummyGPRegressor(mean=0.5, var=0.25, length_scales=[1.0])
    Y_train = torch.tensor([[1.0]], dtype=torch.float64)
    cloghvi = ConstrainedLogEHVI([gpr1], DummySearchSpace(), Y_train, 2, 42, [constraint_gpr], [0.0])
    x = torch.tensor([[0.1]], dtype=torch.float64)
    codeflash_output = cloghvi.eval_acqf(x); result = codeflash_output

# ---------------- EDGE TEST CASES ----------------






def test_constrained_logehvi_large_constraints():
    # Large number of constraints
    gpr1 = DummyGPRegressor(mean=1.0, var=1.0, length_scales=[1.0])
    constraints_gprs = [DummyGPRegressor(mean=i, var=1.0, length_scales=[1.0]) for i in range(10)]
    thresholds = [float(i) for i in range(10)]
    Y_train = torch.ones((5, 1), dtype=torch.float64)
    cloghvi = ConstrainedLogEHVI([gpr1], DummySearchSpace(), Y_train, 10, 42, constraints_gprs, thresholds)
    x = torch.ones((3, 1), dtype=torch.float64)
    codeflash_output = cloghvi.eval_acqf(x); result = codeflash_output

To edit these changes git checkout codeflash/optimize-ConstrainedLogEHVI.eval_acqf-mhbg4g8o and push.

Codeflash

**Key Optimizations:**
- In `LogEHVI.eval_acqf`, batch computes GP mean/variance for all objectives, and builds `Y_post` in a fully vectorized fashion, avoiding the slow Python loop that repeatedly called `.append()` and `.stack()`.
- Ensures all tensors and samples are allocated on the same device/dtype for fast computation, especially important for CUDA acceleration.
- Where possible, uses `copy=False` on `.to(...)` to reduce unnecessary memory allocation.
- In both constructors and evaluation logic, moves fixed samples and precomputed tensors to the proper device before first use, eliminating repeated transfer overhead.
- Preserves all comments, signatures, and preserves the original algorithm and design precisely.
@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 29, 2025 03:38
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 29, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant