Merge pull request #93 from stefanradev93/Development

stefanradev93 · web-flow · commit 98d895c024bf · 2023-08-13T18:17:17.000+03:00
Development
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -67,3 +67,10 @@ General Improvements:
 1. Bugfix in ``SetTransformer`` affecting saving and loading when using the version with inducing points.
 2. Bugfix in ``SetTransformer`` when using ``train_offline`` and batches result in unequal shapes.
 3. Improved documentation with examples
+
+1.1.3 Series
+----------
+
+1. Bugfix in ``SimulationMemory`` affecting the use of empty folders for initializing a ``Trainer``
+2. Bugfix in ``Trainer.train_from_presimulation()`` for model comparison tasks
+3. Added a classifier two-sample test function ``c2st`` in ``computational_utilities``
diff --git a/bayesflow/computational_utilities.py b/bayesflow/computational_utilities.py
@@ -22,6 +22,8 @@
 import tensorflow as tf
 from scipy import stats
 from sklearn.calibration import calibration_curve
+from sklearn.model_selection import KFold, cross_val_score
+from sklearn.neural_network import MLPClassifier
 
 from bayesflow.default_settings import MMD_BANDWIDTH_LIST
 from bayesflow.exceptions import ShapeError
@@ -517,3 +519,87 @@ def aggregated_rmse(x_true, x_pred):
     return aggregated_error(
         x_true=x_true, x_pred=x_pred, inner_error_fun=root_mean_squared_error, outer_aggregation_fun=np.mean
     )
+
+
+def c2st(
+    source_samples,
+    target_samples,
+    n_folds=5,
+    scoring="accuracy",
+    normalize=True,
+    seed=123,
+    hidden_units_per_dim=16,
+    aggregate_output=True,
+):
+    """C2ST metric [1] using an sklearn neural network classifier (i.e., MLP).
+    Code adapted from https://github.com/sbi-benchmark/sbibm/blob/main/sbibm/metrics/c2st.py
+
+    [1] Lopez-Paz, D., & Oquab, M. (2016). Revisiting classifier two-sample tests. arXiv:1610.06545.
+
+    Parameters
+    ----------
+    source_samples       : np.ndarray or tf.Tensor
+        Source samples (e.g., approximate posterior samples)
+    target_samples       : np.ndarray or tf.Tensor
+        Target samples (e.g., samples from a reference posterior)
+    n_folds              : int, optional, default: 5
+        Number of folds in k-fold cross-validation for the classifier evaluation
+    scoring              : str, optional, default: "accuracy"
+        Evaluation score of the sklearn MLP classifier
+    normalize            : bool, optional, default: True
+        Whether the data shall be z-standardized relative to source_samples
+    seed                 : int, optional, default: 123
+        RNG seed for the MLP and k-fold CV
+    hidden_units_per_dim : int, optional, default: 16
+        Number of hidden units in the MLP, relative to the input dimensions.
+        Example: source samples are 5D, hidden_units_per_dim=16 -> 80 hidden units per layer
+    aggregate_output     : bool, optional, default: True
+        Whether to return a single value aggregated over all cross-validation runs
+        or all values from all runs. If left at default, the empirical mean will be returned
+
+    Returns
+    -------
+    c2st_score  :  float
+        The resulting C2ST score
+
+    """
+
+    x = np.array(source_samples)
+    y = np.array(target_samples)
+
+    num_dims = x.shape[1]
+    if not num_dims == y.shape[1]:
+        raise ShapeError(
+            f"source_samples and target_samples can have different number of observations (1st dim)"
+            f"but must have the same dimensionality (2nd dim)"
+            f"found: source_samples {source_samples.shape[1]}, target_samples {target_samples.shape[1]}"
+        )
+
+    if normalize:
+        x_mean = np.mean(x, axis=0)
+        x_std = np.std(x, axis=0)
+        x = (x - x_mean) / x_std
+        y = (y - x_mean) / x_std
+
+    clf = MLPClassifier(
+        activation="relu",
+        hidden_layer_sizes=(hidden_units_per_dim * num_dims, hidden_units_per_dim * num_dims),
+        max_iter=10000,
+        solver="adam",
+        random_state=seed,
+    )
+
+    data = np.concatenate((x, y))
+    target = np.concatenate(
+        (
+            np.zeros((x.shape[0],)),
+            np.ones((y.shape[0],)),
+        )
+    )
+
+    shuffle = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
+    scores = cross_val_score(clf, data, target, cv=shuffle, scoring=scoring)
+
+    if aggregate_output:
+        c2st_score = np.asarray(np.mean(scores)).astype(np.float32)
+    return c2st_score
diff --git a/bayesflow/helper_classes.py b/bayesflow/helper_classes.py
@@ -744,7 +744,7 @@ def load_from_file(self, file_path):
         memory_path = os.path.join(file_path, f"{SimulationMemory.file_name}.pkl")
 
         # Case memory file exists
-        if os.path.exists(file_path):
+        if os.path.exists(memory_path):
             # Load pickle and fill in attributes
             with open(memory_path, "rb") as f:
                 full_memory_dict = pickle.load(f)
diff --git a/tests/test_computational_utilities.py b/tests/test_computational_utilities.py
@@ -3,8 +3,9 @@
 import pytest
 import numpy as np
 from bayesflow import computational_utilities
-from bayesflow.exceptions import ArgumentError
+from bayesflow.exceptions import ArgumentError, ShapeError
 from bayesflow.trainers import Trainer
+import tensorflow as tf
 
 
 @pytest.mark.parametrize("x_true, x_pred, output",
@@ -93,3 +94,44 @@ def test_aggregated_error(x_true, x_pred, inner_error_fun, outer_aggregation_fun
         outer_aggregation_fun=outer_aggregation_fun
     )
     assert aggregated_error_result == pytest.approx(output)
+
+
+def test_c2st_shape_error():
+    source_samples = np.random.random(size=(5, 2))
+    target_samples = np.random.random(size=(5, 3))
+    with pytest.raises(ShapeError):
+        computational_utilities.c2st(source_samples, target_samples)
+
+
+@pytest.mark.parametrize(
+    "source_samples, target_samples",
+    [
+        (np.random.random((5, 2)), np.random.random((5, 2))),
+        (np.random.random((10, 2)), np.random.random((5, 2))),
+        (tf.constant(np.random.random((5, 2))), tf.constant(np.random.random((5, 2))))
+    ]
+)
+def test_c2st(source_samples, target_samples):
+    c2st_score = computational_utilities.c2st(source_samples, target_samples)
+    assert 0.0 <= c2st_score <= 1.0
+
+
+@pytest.mark.parametrize(
+    "n_folds, scoring, normalize, seed, hidden_units_per_dim",
+    [
+        (3, "accuracy", False, 42, 5),
+        (7, "f1", True, 12, 10)
+    ]
+)
+def test_c2st_params(n_folds, scoring, normalize, seed, hidden_units_per_dim):
+    source_samples = np.random.random((5, 2))
+    target_samples = np.random.random((10, 2))
+    _ = computational_utilities.c2st(
+        source_samples=source_samples,
+        target_samples=target_samples,
+        n_folds=n_folds,
+        scoring=scoring,
+        normalize=normalize,
+        seed=seed,
+        hidden_units_per_dim=hidden_units_per_dim
+    )