Refactor

robinthibaut · Jul 8, 2024 · 37922a0 · 37922a0
1 parent 1170f20
commit 37922a0
Show file tree

Hide file tree

Showing 7 changed files with 109 additions and 156 deletions.
diff --git a/docs/examples/demo.ipynb b/docs/examples/demo.ipynb
diff --git a/skbel/algorithms/statistics.py b/skbel/algorithms/statistics.py
@@ -86,15 +86,15 @@ class KDE:
     """
 
     def __init__(
-            self,
-            *,
-            kernel_type: str = None,
-            bandwidth: float = None,
-            grid_search: bool = True,
-            bandwidth_space: np.array = None,
-            gridsize: int = 200,
-            cut: float = 1,
-            clip: list = None,
+        self,
+        *,
+        kernel_type: str = None,
+        bandwidth: float = None,
+        grid_search: bool = True,
+        bandwidth_space: np.array = None,
+        gridsize: int = 200,
+        cut: float = 1,
+        clip: list = None,
     ):
         """Initialize the estimator with its parameters.
 
@@ -130,7 +130,7 @@ def __init__(
 
     @staticmethod
     def _define_support_grid(
-            x: np.array, bandwidth: float, cut: float, clip: list, gridsize: int
+        x: np.array, bandwidth: float, cut: float, clip: list, gridsize: int
     ):
         """Create the grid of evaluation points depending for vector x.
 
@@ -182,10 +182,10 @@ def _define_support_bivariate(self, x1: np.array, x2: np.array):
         return grid1, grid2
 
     def define_support(
-            self,
-            x1: np.array,
-            x2: np.array = None,
-            cache: bool = True,
+        self,
+        x1: np.array,
+        x2: np.array = None,
+        cache: bool = True,
     ):
         """Create the evaluation grid for a given data set.
 
@@ -295,8 +295,8 @@ def __call__(self, x1, x2=None):
 
 
 def _univariate_density(
-        data_variable: pd.DataFrame,
-        estimate_kws: dict,
+    data_variable: pd.DataFrame,
+    estimate_kws: dict,
 ):
     """Estimate the density of a single variable.
 
@@ -324,8 +324,8 @@ def _univariate_density(
 
 
 def _bivariate_density(
-        data: pd.DataFrame,
-        estimate_kws: dict,
+    data: pd.DataFrame,
+    estimate_kws: dict,
 ):
     """Estimate bivariate KDE.
 
@@ -354,13 +354,13 @@ def _bivariate_density(
 
 
 def kde_params(
-        x: np.array = None,
-        y: np.array = None,
-        bw: float = None,
-        bandwidth_space=None,
-        gridsize: int = 200,
-        cut: float = 1,
-        clip=None,
+    x: np.array = None,
+    y: np.array = None,
+    bw: float = None,
+    bandwidth_space=None,
+    gridsize: int = 200,
+    cut: float = 1,
+    clip=None,
 ):
     """Computes the kernel density estimate (KDE) of one or two data sets.
 
@@ -431,12 +431,12 @@ def _pixel_coordinate(line: list, x_1d: np.array, y_1d: np.array, k: int = None)
 
 
 def _conditional_distribution(
-        kde_array: np.array,
-        x_array: np.array,
-        y_array: np.array,
-        x: float = None,
-        y: float = None,
-        k: int = None,
+    kde_array: np.array,
+    x_array: np.array,
+    y_array: np.array,
+    x: float = None,
+    y: float = None,
+    k: int = None,
 ):
     """Compute the conditional posterior distribution p(x_array|y_array) given
     x or y. Provide only one observation ! Either x or y. Perform a cross-
@@ -493,11 +493,11 @@ def _scale_distribution(post: np.array, support: np.array) -> np.array:
 
 
 def posterior_conditional(
-        X_obs: float = None,
-        Y_obs: float = None,
-        dens: np.array = None,
-        support: np.array = None,
-        k: int = None,
+    X_obs: float = None,
+    Y_obs: float = None,
+    dens: np.array = None,
+    support: np.array = None,
+    k: int = None,
 ) -> (np.array, np.array):
     """Computes the posterior distribution p(y|x_obs) or p(x|y_obs) by doing a
     cross-section of the KDE of (d, h).
@@ -537,7 +537,7 @@ def posterior_conditional(
 
 
 def mvn_inference(
-        X: np.array, Y: np.array, X_obs: np.array, **kwargs
+    X: np.array, Y: np.array, X_obs: np.array, **kwargs
 ) -> (np.array, np.array):
     """Estimates the posterior mean and covariance of the target.
        Note that in this implementation, n_samples must be = 1.
@@ -585,7 +585,7 @@ def mvn_inference(
     x_ls_predicted = np.matmul(Y, g.T)  # noqa
     x_modeling_mean_error = np.mean(X - x_ls_predicted, axis=0)  # (n_comp_CCA, 1)
     x_modeling_error = (
-            X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
+        X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
     )
     # (n_comp_CCA, n_training)
 
@@ -611,7 +611,7 @@ def mvn_inference(
     y_posterior_covariance = np.linalg.pinv(d11)  # (n_comp_CCA, n_comp_CCA)
     # Computing the posterior mean is simply a linear operation, given precomputed posterior covariance.
     y_posterior_mean = y_posterior_covariance @ (
-            d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T)  # noqa
+        d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T)  # noqa
     )  # (n_comp_CCA,)
 
     return y_posterior_mean, y_posterior_covariance
@@ -673,7 +673,7 @@ def cdf_number(x):
         else:
             d = np.abs(x - lower_bound)
             if d > 1e-4:  # Check that spacing isn't too small
-                samples = np.linspace(lower_bound, x, 2 ** 7 + 1)
+                samples = np.linspace(lower_bound, x, 2**7 + 1)
                 dx = np.abs(samples[1] - samples[0])
                 y = np.array([pdf_norm(s) for s in samples])
                 return romb(y, dx)
@@ -695,13 +695,13 @@ def cdf_vector(x):
 
 
 def it_sampling(
-        pdf,
-        num_samples: int = 1,
-        lower_bd=-np.inf,
-        upper_bd=np.inf,
-        k: int = None,
-        cdf_y: np.array = None,
-        return_cdf: bool = False,
+    pdf,
+    num_samples: int = 1,
+    lower_bd=-np.inf,
+    upper_bd=np.inf,
+    k: int = None,
+    cdf_y: np.array = None,
+    return_cdf: bool = False,
 ):
     """Sample from an arbitrary, un-normalized PDF.
 

diff --git a/skbel/bnn/models.py b/skbel/bnn/models.py
@@ -3,9 +3,9 @@
 # EXPERIMENTAL FUNCTIONALITY -- USE AT YOUR OWN RISK
 
 import tensorflow as tf
+import tensorflow_probability as tfp
 from sklearn.base import TransformerMixin, MultiOutputMixin, BaseEstimator
 from tensorflow import keras as tfk
-import tensorflow_probability as tfp
 
 from skbel.nn_utilities import (
     prior_regularize,
@@ -377,23 +377,32 @@ def classic_pnn_model(input_shape, output_dim, n_hidden, num_components, learn_r
     """
 
     # Input block
-    inputs = Input(shape=input_shape, name="input")  # Input layer
-    x = Dense(n_hidden)(inputs)  # Simple dense layer with n_hidden units
-    x = Activation("relu")(x)  # ReLU activation function for non-linearity
+    inputs = tfk.layers.Input(shape=input_shape, name="input")  # Input layer
+    x = tfk.layers.Dense(n_hidden)(inputs)  # Simple dense layer with n_hidden units
+    x = tfk.layers.Activation("relu")(x)  # ReLU activation function for non-linearity
 
     # Output block for Mixture Density Network
-    params_size = tfp.layers.MixtureNormal.params_size(num_components, output_dim)  # The number of parameters for
+    params_size = tfp.layers.MixtureNormal.params_size(
+        num_components, output_dim
+    )  # The number of parameters for
     # the mixture model
-    output_params = Dense(params_size, activation=None, name="output")(x)  # Dense layer to compute the mixture
+    output_params = tfk.layers.Dense(params_size, activation=None, name="output")(
+        x
+    )  # Dense layer to compute the mixture
     # density network parameters
-    outputs = tfp.layers.MixtureNormal(num_components, output_dim)(output_params)  # MixtureNormal layer defining a
+    outputs = tfp.layers.MixtureNormal(num_components, output_dim)(
+        output_params
+    )  # MixtureNormal layer defining a
     # mixture of normal distributions for each output variable
 
     # Create and compile the model
-    model_ = Model(inputs=inputs, outputs=outputs)  # Define the model
-    optimizer = tf.keras.optimizers.Adam(learning_rate=learn_r)  # Adam optimizer with the specified learning rate
-    model_.compile(optimizer=optimizer, loss=neg_log_likelihood)  # Compile the model with the negative
+    model_ = tfk.Model(inputs=inputs, outputs=outputs)  # Define the model
+    optimizer = tf.keras.optimizers.Adam(
+        learning_rate=learn_r
+    )  # Adam optimizer with the specified learning rate
+    model_.compile(
+        optimizer=optimizer, loss=neg_log_likelihood
+    )  # Compile the model with the negative
     # log-likelihood loss function
 
     return model_
-
diff --git a/skbel/learning/bel.py b/skbel/learning/bel.py
@@ -390,7 +390,7 @@ def predict(
                                 X_obs=dp.T[comp_n],
                                 dens=dens,
                                 support=support,
-                                k=2 ** 7 + 1,
+                                k=2**7 + 1,
                             )
                             hp[np.abs(hp) < 1e-8] = 0  # Set very small values to 0.
                             kind = "pdf"
@@ -495,7 +495,7 @@ def random_sample(
 
         # Set the seed for later use
         if self.seed is None:
-            self.seed = np.random.randint(2 ** 32 - 1, dtype="uint32")
+            self.seed = np.random.randint(2**32 - 1, dtype="uint32")
 
         if X_obs_f is None:
             X_obs_f = self.X_obs_f
@@ -547,7 +547,7 @@ def random_sample(
                                 num_samples=self.n_posts,
                                 lower_bd=pdf.x.min(),
                                 upper_bd=pdf.x.max(),
-                                k=2 ** 7 + 1,
+                                k=2**7 + 1,
                             )
                         elif (
                             fun["kind"] == "linear"
@@ -571,7 +571,7 @@ def random_sample(
                                 num_samples=self.n_posts,
                                 lower_bd=pdf.x.min(),
                                 upper_bd=pdf.x.max(),
-                                k=2 ** 7 + 1,
+                                k=2**7 + 1,
                                 cdf_y=pv,
                             )
                         elif fun["kind"] == "linear":
@@ -648,7 +648,7 @@ def kde_init(self, X_obs_f: np.array, obs_n: int = None):
                         pdf=pdf,
                         lower_bd=pdf.x.min(),
                         upper_bd=pdf.x.max(),
-                        k=2 ** 7
+                        k=2**7
                         + 1,  # Number of samples. It is a power of 2 + 1 because Romberg integration will be used
                         return_cdf=True,
                     )

diff --git a/skbel/nn_utilities/functions.py b/skbel/nn_utilities/functions.py
@@ -2,8 +2,8 @@
 
 import numpy as np
 import tensorflow as tf
-from tensorflow import keras as tfk
 import tensorflow_probability as tfp
+from tensorflow import keras as tfk
 from tensorflow_probability import distributions as tfd
 
 __all__ = [
@@ -49,9 +49,13 @@ def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
                 lambda t: tfd.Independent(
                     tfd.Normal(
                         loc=t[..., :n],
-                        scale=1e-5 + 1e-2 * tf.nn.softplus(c + t[..., n:]),  # softplus ensures positivity and avoids numerical instability
+                        scale=1e-5
+                        + 1e-2
+                        * tf.nn.softplus(
+                            c + t[..., n:]
+                        ),  # softplus ensures positivity and avoids numerical instability
                     ),
-                    reinterpreted_batch_ndims=1, # each weight is independent
+                    reinterpreted_batch_ndims=1,  # each weight is independent
                 )  # reinterpreted_batch_ndims=1 means that the last dimension is the event dimension
             ),
         ]

diff --git a/skbel/probabilities/functions.py b/skbel/probabilities/functions.py
@@ -2,8 +2,8 @@
 
 import numpy as np
 import tensorflow as tf
-from tensorflow import keras
 import tensorflow_probability as tfp
+from tensorflow import keras
 
 tfd = tfp.distributions