Skip to content

Commit

Permalink
Use dataframe-based lookup callable in user-facing code
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianSosic committed Dec 16, 2024
1 parent f976b55 commit 4767692
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 57 deletions.
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `filter_df` utility for filtering dataframe content
- `DiscreteConstraint.get_valid` to conveniently access valid candidates
- Functionality for persisting benchmarking results on S3 from a manual pipeline run
- `label_columns` decorator utility for convenient creation of lookup callables

### Changed
- `SubstanceParameter` encodings are now computed exclusively with the
Expand Down
2 changes: 0 additions & 2 deletions baybe/simulation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,10 @@
"""

from baybe.simulation.core import simulate_experiment
from baybe.simulation.lookup import label_columns
from baybe.simulation.scenarios import simulate_scenarios
from baybe.simulation.transfer_learning import simulate_transfer_learning

__all__ = [
"label_columns",
"simulate_experiment",
"simulate_scenarios",
"simulate_transfer_learning",
Expand Down
41 changes: 1 addition & 40 deletions baybe/simulation/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from __future__ import annotations

import functools
import logging
from collections.abc import Callable, Collection, Sequence
from collections.abc import Callable, Collection
from typing import Literal

import numpy as np
Expand Down Expand Up @@ -139,41 +138,3 @@ def _look_up_targets_from_dataframe(

# Add the lookup values
queries.loc[:, target_names] = np.asarray(all_match_vals)


def label_columns(
input_labels: Sequence[str], output_labels: Sequence[str]
) -> Callable:
"""Create a decorator for labeling the inputs and outputs of array-based callables.
The decorator transforms a callable designed to work with unlabelled arrays such
that it can operate with dataframes instead. The original callable is expected to
accept and return two-dimensional arrays. When decorated, the callable accepts and
returns dataframes whose columns are mapped to the corresponding arrays based on the
specified label sequences.
Args:
input_labels: The sequence of input labels mapping the columns of the input
dataframe to columns of the input array in the specified order.
output_labels: The sequence of output labels mapping the columns of the output
dataframe to columns of the output array in the specified order.
Returns:
The decorator for the given input and output labels.
"""

def decorator(
fn: Callable[[np.ndarray], np.ndarray],
) -> Callable[[pd.DataFrame], pd.DataFrame]:
"""Turn an array-based callable into a dataframe-based callable."""

@functools.wraps(fn)
def wrapper(df: pd.DataFrame, /) -> pd.DataFrame:
"""Translate to/from an array-based callable using dataframes."""
array_in = df[input_labels].to_numpy()
array_out = fn(array_in)
return pd.DataFrame(array_out, columns=output_labels, index=df.index)

return wrapper

return decorator
13 changes: 9 additions & 4 deletions benchmarks/domains/synthetic_2C1D_1C.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
from numpy import pi, sin, sqrt
from pandas import DataFrame

Expand All @@ -13,7 +14,6 @@
from baybe.recommenders import RandomRecommender
from baybe.searchspace import SearchSpace
from baybe.simulation import simulate_scenarios
from baybe.simulation.lookup import label_columns
from baybe.targets import NumericalTarget
from benchmarks.definition import (
Benchmark,
Expand All @@ -25,7 +25,7 @@


def _lookup(arr: np.ndarray, /) -> np.ndarray:
"""Lookup that is used internally in the callable for the benchmark."""
"""Numpy-based lookup callable defining the objective function."""
x, y, z = np.array_split(arr, 3, axis=1)
try:
assert np.all(-2 * pi <= x) and np.all(x <= 2 * pi)
Expand All @@ -42,6 +42,13 @@ def _lookup(arr: np.ndarray, /) -> np.ndarray:
)


def lookup(df: pd.DataFrame, /) -> pd.DataFrame:
"""Dataframe-based lookup callable used as the loop-closing element."""
return pd.DataFrame(
_lookup(df[["x", "y", "z"]].to_numpy()), columns=["target"], index=df.index
)


def synthetic_2C1D_1C(settings: ConvergenceExperimentSettings) -> DataFrame:
"""Hybrid synthetic test function.
Expand Down Expand Up @@ -78,8 +85,6 @@ def synthetic_2C1D_1C(settings: ConvergenceExperimentSettings) -> DataFrame:
),
}

lookup = label_columns([p.name for p in parameters], [target.name])(_lookup)

return simulate_scenarios(
scenarios,
lookup,
Expand Down
17 changes: 7 additions & 10 deletions examples/Backtesting/custom_analytical.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@

import os

import numpy as np
import pandas as pd
import seaborn as sns

from baybe import Campaign
from baybe.parameters.numerical import NumericalContinuousParameter
from baybe.recommenders import RandomRecommender
from baybe.searchspace import SearchSpace
from baybe.simulation import label_columns, simulate_scenarios
from baybe.simulation import simulate_scenarios
from baybe.targets import NumericalTarget
from baybe.utils.plotting import create_example_plots

Expand Down Expand Up @@ -44,16 +44,13 @@
target = NumericalTarget(name="Target", mode="MIN")


# Based on the above, we construct the black-box callable to be optimized.
# Using the {func}`~baybe.simulation.lookup.label_columns` decorator, we can easily map
# the columns of the raw input/output arrays to our parameter and target objects, which
# creates the required dataframe-based lookup for the optimization loop:
# Based on the above, we construct the black-box callable to be optimized, which
# provides the lookup mechanism for closing the optimization loop:


@label_columns([p.name for p in parameters], [target.name])
def sum_of_squares(x: np.ndarray, /) -> np.ndarray:
def blackbox(df: pd.DataFrame, /) -> pd.DataFrame:
"""Calculate the sum of squares."""
return (x**2).sum(axis=1, keepdims=True)
return (df[[p.name for p in parameters]] ** 2).sum(axis=1).to_frame(target.name)


# What remains is to construct the search space and objective for the optimization:
Expand Down Expand Up @@ -89,7 +86,7 @@ def sum_of_squares(x: np.ndarray, /) -> np.ndarray:
}
results = simulate_scenarios(
scenarios,
sum_of_squares,
blackbox,
batch_size=BATCH_SIZE,
n_doe_iterations=N_DOE_ITERATIONS,
n_mc_iterations=N_MC_ITERATIONS,
Expand Down

0 comments on commit 4767692

Please sign in to comment.