Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rthibaut committed Jul 8, 2024
1 parent 1170f20 commit 37922a0
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 156 deletions.
71 changes: 18 additions & 53 deletions docs/examples/demo.ipynb

Large diffs are not rendered by default.

94 changes: 47 additions & 47 deletions skbel/algorithms/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ class KDE:
"""

def __init__(
self,
*,
kernel_type: str = None,
bandwidth: float = None,
grid_search: bool = True,
bandwidth_space: np.array = None,
gridsize: int = 200,
cut: float = 1,
clip: list = None,
self,
*,
kernel_type: str = None,
bandwidth: float = None,
grid_search: bool = True,
bandwidth_space: np.array = None,
gridsize: int = 200,
cut: float = 1,
clip: list = None,
):
"""Initialize the estimator with its parameters.
Expand Down Expand Up @@ -130,7 +130,7 @@ def __init__(

@staticmethod
def _define_support_grid(
x: np.array, bandwidth: float, cut: float, clip: list, gridsize: int
x: np.array, bandwidth: float, cut: float, clip: list, gridsize: int
):
"""Create the grid of evaluation points depending for vector x.
Expand Down Expand Up @@ -182,10 +182,10 @@ def _define_support_bivariate(self, x1: np.array, x2: np.array):
return grid1, grid2

def define_support(
self,
x1: np.array,
x2: np.array = None,
cache: bool = True,
self,
x1: np.array,
x2: np.array = None,
cache: bool = True,
):
"""Create the evaluation grid for a given data set.
Expand Down Expand Up @@ -295,8 +295,8 @@ def __call__(self, x1, x2=None):


def _univariate_density(
data_variable: pd.DataFrame,
estimate_kws: dict,
data_variable: pd.DataFrame,
estimate_kws: dict,
):
"""Estimate the density of a single variable.
Expand Down Expand Up @@ -324,8 +324,8 @@ def _univariate_density(


def _bivariate_density(
data: pd.DataFrame,
estimate_kws: dict,
data: pd.DataFrame,
estimate_kws: dict,
):
"""Estimate bivariate KDE.
Expand Down Expand Up @@ -354,13 +354,13 @@ def _bivariate_density(


def kde_params(
x: np.array = None,
y: np.array = None,
bw: float = None,
bandwidth_space=None,
gridsize: int = 200,
cut: float = 1,
clip=None,
x: np.array = None,
y: np.array = None,
bw: float = None,
bandwidth_space=None,
gridsize: int = 200,
cut: float = 1,
clip=None,
):
"""Computes the kernel density estimate (KDE) of one or two data sets.
Expand Down Expand Up @@ -431,12 +431,12 @@ def _pixel_coordinate(line: list, x_1d: np.array, y_1d: np.array, k: int = None)


def _conditional_distribution(
kde_array: np.array,
x_array: np.array,
y_array: np.array,
x: float = None,
y: float = None,
k: int = None,
kde_array: np.array,
x_array: np.array,
y_array: np.array,
x: float = None,
y: float = None,
k: int = None,
):
"""Compute the conditional posterior distribution p(x_array|y_array) given
x or y. Provide only one observation ! Either x or y. Perform a cross-
Expand Down Expand Up @@ -493,11 +493,11 @@ def _scale_distribution(post: np.array, support: np.array) -> np.array:


def posterior_conditional(
X_obs: float = None,
Y_obs: float = None,
dens: np.array = None,
support: np.array = None,
k: int = None,
X_obs: float = None,
Y_obs: float = None,
dens: np.array = None,
support: np.array = None,
k: int = None,
) -> (np.array, np.array):
"""Computes the posterior distribution p(y|x_obs) or p(x|y_obs) by doing a
cross-section of the KDE of (d, h).
Expand Down Expand Up @@ -537,7 +537,7 @@ def posterior_conditional(


def mvn_inference(
X: np.array, Y: np.array, X_obs: np.array, **kwargs
X: np.array, Y: np.array, X_obs: np.array, **kwargs
) -> (np.array, np.array):
"""Estimates the posterior mean and covariance of the target.
Note that in this implementation, n_samples must be = 1.
Expand Down Expand Up @@ -585,7 +585,7 @@ def mvn_inference(
x_ls_predicted = np.matmul(Y, g.T) # noqa
x_modeling_mean_error = np.mean(X - x_ls_predicted, axis=0) # (n_comp_CCA, 1)
x_modeling_error = (
X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
)
# (n_comp_CCA, n_training)

Expand All @@ -611,7 +611,7 @@ def mvn_inference(
y_posterior_covariance = np.linalg.pinv(d11) # (n_comp_CCA, n_comp_CCA)
# Computing the posterior mean is simply a linear operation, given precomputed posterior covariance.
y_posterior_mean = y_posterior_covariance @ (
d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T) # noqa
d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T) # noqa
) # (n_comp_CCA,)

return y_posterior_mean, y_posterior_covariance
Expand Down Expand Up @@ -673,7 +673,7 @@ def cdf_number(x):
else:
d = np.abs(x - lower_bound)
if d > 1e-4: # Check that spacing isn't too small
samples = np.linspace(lower_bound, x, 2 ** 7 + 1)
samples = np.linspace(lower_bound, x, 2**7 + 1)
dx = np.abs(samples[1] - samples[0])
y = np.array([pdf_norm(s) for s in samples])
return romb(y, dx)
Expand All @@ -695,13 +695,13 @@ def cdf_vector(x):


def it_sampling(
pdf,
num_samples: int = 1,
lower_bd=-np.inf,
upper_bd=np.inf,
k: int = None,
cdf_y: np.array = None,
return_cdf: bool = False,
pdf,
num_samples: int = 1,
lower_bd=-np.inf,
upper_bd=np.inf,
k: int = None,
cdf_y: np.array = None,
return_cdf: bool = False,
):
"""Sample from an arbitrary, un-normalized PDF.
Expand Down
31 changes: 20 additions & 11 deletions skbel/bnn/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# EXPERIMENTAL FUNCTIONALITY -- USE AT YOUR OWN RISK

import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.base import TransformerMixin, MultiOutputMixin, BaseEstimator
from tensorflow import keras as tfk
import tensorflow_probability as tfp

from skbel.nn_utilities import (
prior_regularize,
Expand Down Expand Up @@ -377,23 +377,32 @@ def classic_pnn_model(input_shape, output_dim, n_hidden, num_components, learn_r
"""

# Input block
inputs = Input(shape=input_shape, name="input") # Input layer
x = Dense(n_hidden)(inputs) # Simple dense layer with n_hidden units
x = Activation("relu")(x) # ReLU activation function for non-linearity
inputs = tfk.layers.Input(shape=input_shape, name="input") # Input layer
x = tfk.layers.Dense(n_hidden)(inputs) # Simple dense layer with n_hidden units
x = tfk.layers.Activation("relu")(x) # ReLU activation function for non-linearity

# Output block for Mixture Density Network
params_size = tfp.layers.MixtureNormal.params_size(num_components, output_dim) # The number of parameters for
params_size = tfp.layers.MixtureNormal.params_size(
num_components, output_dim
) # The number of parameters for
# the mixture model
output_params = Dense(params_size, activation=None, name="output")(x) # Dense layer to compute the mixture
output_params = tfk.layers.Dense(params_size, activation=None, name="output")(
x
) # Dense layer to compute the mixture
# density network parameters
outputs = tfp.layers.MixtureNormal(num_components, output_dim)(output_params) # MixtureNormal layer defining a
outputs = tfp.layers.MixtureNormal(num_components, output_dim)(
output_params
) # MixtureNormal layer defining a
# mixture of normal distributions for each output variable

# Create and compile the model
model_ = Model(inputs=inputs, outputs=outputs) # Define the model
optimizer = tf.keras.optimizers.Adam(learning_rate=learn_r) # Adam optimizer with the specified learning rate
model_.compile(optimizer=optimizer, loss=neg_log_likelihood) # Compile the model with the negative
model_ = tfk.Model(inputs=inputs, outputs=outputs) # Define the model
optimizer = tf.keras.optimizers.Adam(
learning_rate=learn_r
) # Adam optimizer with the specified learning rate
model_.compile(
optimizer=optimizer, loss=neg_log_likelihood
) # Compile the model with the negative
# log-likelihood loss function

return model_

10 changes: 5 additions & 5 deletions skbel/learning/bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def predict(
X_obs=dp.T[comp_n],
dens=dens,
support=support,
k=2 ** 7 + 1,
k=2**7 + 1,
)
hp[np.abs(hp) < 1e-8] = 0 # Set very small values to 0.
kind = "pdf"
Expand Down Expand Up @@ -495,7 +495,7 @@ def random_sample(

# Set the seed for later use
if self.seed is None:
self.seed = np.random.randint(2 ** 32 - 1, dtype="uint32")
self.seed = np.random.randint(2**32 - 1, dtype="uint32")

if X_obs_f is None:
X_obs_f = self.X_obs_f
Expand Down Expand Up @@ -547,7 +547,7 @@ def random_sample(
num_samples=self.n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
k=2 ** 7 + 1,
k=2**7 + 1,
)
elif (
fun["kind"] == "linear"
Expand All @@ -571,7 +571,7 @@ def random_sample(
num_samples=self.n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
k=2 ** 7 + 1,
k=2**7 + 1,
cdf_y=pv,
)
elif fun["kind"] == "linear":
Expand Down Expand Up @@ -648,7 +648,7 @@ def kde_init(self, X_obs_f: np.array, obs_n: int = None):
pdf=pdf,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
k=2 ** 7
k=2**7
+ 1, # Number of samples. It is a power of 2 + 1 because Romberg integration will be used
return_cdf=True,
)
Expand Down
10 changes: 7 additions & 3 deletions skbel/nn_utilities/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import numpy as np
import tensorflow as tf
from tensorflow import keras as tfk
import tensorflow_probability as tfp
from tensorflow import keras as tfk
from tensorflow_probability import distributions as tfd

__all__ = [
Expand Down Expand Up @@ -49,9 +49,13 @@ def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
lambda t: tfd.Independent(
tfd.Normal(
loc=t[..., :n],
scale=1e-5 + 1e-2 * tf.nn.softplus(c + t[..., n:]), # softplus ensures positivity and avoids numerical instability
scale=1e-5
+ 1e-2
* tf.nn.softplus(
c + t[..., n:]
), # softplus ensures positivity and avoids numerical instability
),
reinterpreted_batch_ndims=1, # each weight is independent
reinterpreted_batch_ndims=1, # each weight is independent
) # reinterpreted_batch_ndims=1 means that the last dimension is the event dimension
),
]
Expand Down
2 changes: 1 addition & 1 deletion skbel/probabilities/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_probability as tfp
from tensorflow import keras

tfd = tfp.distributions

Expand Down
Loading

0 comments on commit 37922a0

Please sign in to comment.