Skip to content

Commit

Permalink
Minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
robinthibaut committed Jun 3, 2021
1 parent 0b2354e commit b28279a
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 85 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="skbel",
version="1.0.4",
version="1.0.5",
packages=my_pckg,
include_package_data=True,
url="https://github.com/robinthibaut/skbel",
Expand Down
121 changes: 68 additions & 53 deletions skbel/algorithms/_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@

from skbel.algorithms.extmath import get_block

__all__ = ["KDE", "kde_params", "posterior_conditional", "mvn_inference", "it_sampling", "normalize"]
__all__ = [
"KDE",
"kde_params",
"posterior_conditional",
"mvn_inference",
"it_sampling",
"normalize",
]


class KDE:
Expand All @@ -25,14 +32,14 @@ class KDE:
"""

def __init__(
self,
*,
bw_method: str = None,
bw_adjust: float = 1,
gridsize: int = 200,
cut: float = 3,
clip: list = None,
cumulative: bool = False,
self,
*,
bw_method: str = None,
bw_adjust: float = 1,
gridsize: int = 200,
cut: float = 3,
clip: list = None,
cumulative: bool = False,
):
"""Initialize the estimator with its parameters.
Expand Down Expand Up @@ -70,7 +77,7 @@ def __init__(

@staticmethod
def _define_support_grid(
x: np.array, bw: float, cut: float, clip: list, gridsize: int
x: np.array, bw: float, cut: float, clip: list, gridsize: int
):
"""Create the grid of evaluation points depending for vector x."""
clip_lo = -np.inf if clip[0] is None else clip[0]
Expand Down Expand Up @@ -101,11 +108,11 @@ def _define_support_bivariate(self, x1: np.array, x2: np.array, weights: np.arra
return grid1, grid2

def define_support(
self,
x1: np.array,
x2: np.array = None,
weights: np.array = None,
cache: bool = True,
self,
x1: np.array,
x2: np.array = None,
weights: np.array = None,
cache: bool = True,
):
"""Create the evaluation grid for a given data set."""
if x2 is None:
Expand Down Expand Up @@ -176,8 +183,8 @@ def __call__(self, x1, x2=None, weights=None):


def _univariate_density(
data_variable: pd.DataFrame,
estimate_kws: dict,
data_variable: pd.DataFrame,
estimate_kws: dict,
):
# Initialize the estimator object
estimator = KDE(**estimate_kws)
Expand All @@ -200,8 +207,8 @@ def _univariate_density(


def _bivariate_density(
data: pd.DataFrame,
estimate_kws: dict,
data: pd.DataFrame,
estimate_kws: dict,
):
"""
Estimate bivariate KDE
Expand Down Expand Up @@ -237,15 +244,15 @@ def _bivariate_density(


def kde_params(
x: np.array = None,
y: np.array = None,
bw: float = None,
gridsize: int = 200,
cut: float = 3,
clip=None,
cumulative: bool = False,
bw_method: str = "scott",
bw_adjust: int = 1,
x: np.array = None,
y: np.array = None,
bw: float = None,
gridsize: int = 200,
cut: float = 3,
clip=None,
cumulative: bool = False,
bw_method: str = "scott",
bw_adjust: int = 1,
):
"""
Obtain density and support (grid) of the bivariate KDE
Expand Down Expand Up @@ -314,11 +321,11 @@ def _pixel_coordinate(line: list, x_1d: np.array, y_1d: np.array):


def _conditional_distribution(
kde_array: np.array,
x_array: np.array,
y_array: np.array,
x: float = None,
y: float = None,
kde_array: np.array,
x_array: np.array,
y_array: np.array,
x: float = None,
y: float = None,
):
"""
Compute the conditional posterior distribution p(x_array|y_array) given x or y.
Expand Down Expand Up @@ -368,7 +375,7 @@ def _normalize_distribution(post: np.array, support: np.array):


def posterior_conditional(
X: np.array, Y: np.array, X_obs: float = None, Y_obs: float = None
X: np.array, Y: np.array, X_obs: float = None, Y_obs: float = None
):
"""
Computes the posterior distribution p(y|x_obs) or p(x|y_obs) by doing a cross section of the KDE of (d, h).
Expand Down Expand Up @@ -412,7 +419,7 @@ def posterior_conditional(


def mvn_inference(
X: np.array, Y: np.array, X_obs: np.array, **kwargs
X: np.array, Y: np.array, X_obs: np.array, **kwargs
) -> (np.array, np.array):
"""
Estimating posterior mean and covariance of the target.
Expand Down Expand Up @@ -458,7 +465,7 @@ def mvn_inference(
x_ls_predicted = np.matmul(Y, g.T)
x_modeling_mean_error = np.mean(X - x_ls_predicted, axis=0) # (n_comp_CCA, 1)
x_modeling_error = (
X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
X - x_ls_predicted - np.tile(x_modeling_mean_error, (n_training, 1))
)
# (n_comp_CCA, n_training)

Expand All @@ -482,7 +489,7 @@ def mvn_inference(
y_posterior_covariance = np.linalg.pinv(d11) # (n_comp_CCA, n_comp_CCA)
# Computing the posterior mean is simply a linear operation, given precomputed posterior covariance.
y_posterior_mean = y_posterior_covariance @ (
d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T)
d11 @ y_mean - d12 @ (X_obs[0] - x_modeling_mean_error - y_mean @ g.T)
) # (n_comp_CCA,)

return y_posterior_mean, y_posterior_covariance
Expand Down Expand Up @@ -520,10 +527,10 @@ def normalize(pdf, lower_bd=-np.inf, upper_bd=np.inf, vectorize=False):
between lower_bd and upper_bd is close to 1. Maps nicely over iterables.
"""
if lower_bd >= upper_bd:
raise ValueError('Lower bound must be less than upper bound.')
raise ValueError("Lower bound must be less than upper bound.")
quadrature = integrate.quad(pdf, lower_bd, upper_bd, full_output=1)
if not _convergent(quadrature):
raise ValueError('PDF integral likely divergent.')
raise ValueError("PDF integral likely divergent.")
A = quadrature[0]

def pdf_normed(x):
Expand All @@ -533,6 +540,7 @@ def pdf_normed(x):
return 0

if vectorize:

def pdf_vectorized(x):
try:
return pdf_normed(x)
Expand Down Expand Up @@ -569,7 +577,7 @@ def get_cdf(pdf, lower_bd=-np.inf, upper_bd=np.inf):
pdf_norm = normalize(pdf, lower_bd, upper_bd)

def cdf_number(x):
"Numerical cdf"""
"Numerical cdf" ""
if x < lower_bd:
return 0.0
elif x > upper_bd:
Expand All @@ -589,8 +597,13 @@ def cdf_vector(x):
def _chebnodes(a, b, n):
"""Chebyshev nodes of rank n on integral [a,b]."""
if not a < b:
raise ValueError('Lower bound must be less than upper bound.')
return np.array([1 / 2 * ((a + b) + (b - a) * np.cos((2 * k - 1) * np.pi / (2 * n))) for k in range(1, n + 1)])
raise ValueError("Lower bound must be less than upper bound.")
return np.array(
[
1 / 2 * ((a + b) + (b - a) * np.cos((2 * k - 1) * np.pi / (2 * n)))
for k in range(1, n + 1)
]
)


def adaptive_chebfit(pdf, lower_bd, upper_bd, eps=10 ** (-15)):
Expand Down Expand Up @@ -665,9 +678,9 @@ def chebcdf(pdf, lower_bd, upper_bd, eps=10 ** (-15)):
as an argument, and returns an iterable of floats of the same length.
"""
if not (np.isfinite(lower_bd) and np.isfinite(upper_bd)):
raise ValueError('Bounds must be finite.')
raise ValueError("Bounds must be finite.")
if not lower_bd < upper_bd:
raise ValueError('Lower bound must be less than upper bound.')
raise ValueError("Lower bound must be less than upper bound.")

x, coeffs = adaptive_chebfit(pdf, lower_bd, upper_bd, eps)
int_coeffs = chebint(coeffs)
Expand All @@ -681,11 +694,7 @@ def cdf(x_):
return cdf


def it_sampling(pdf,
num_samples,
lower_bd=-np.inf,
upper_bd=np.inf,
chebyshev=False):
def it_sampling(pdf, num_samples, lower_bd=-np.inf, upper_bd=np.inf, chebyshev=False):
"""Sample from an arbitrary, unnormalized PDF.
Copyright (c) 2017 Peter Wills
Expand Down Expand Up @@ -729,11 +738,15 @@ def it_sampling(pdf,
if chebyshev:

if not (np.isfinite(lower_bd) and np.isfinite(upper_bd)):
raise ValueError('Bounds must be finite for Chebyshev approximation of CDF.')
raise ValueError(
"Bounds must be finite for Chebyshev approximation of CDF."
)

cdf = chebcdf(pdf, lower_bd, upper_bd)
cdf_y = cdf(np.linspace(lower_bd, upper_bd, 200))
inverse_cdf = interpolate.interp1d(cdf_y, pdf.x, kind="linear", fill_value="extrapolate")
inverse_cdf = interpolate.interp1d(
cdf_y, pdf.x, kind="linear", fill_value="extrapolate"
)
simple_samples = inverse_cdf(seeds)

# Compute empirical mean and standard deviation
Expand All @@ -749,6 +762,7 @@ def it_sampling(pdf,
guess = np.mean(simple_samples)

for seed in seeds:

def shifted(x):
return cdf(x) - seed

Expand All @@ -759,7 +773,8 @@ def shifted(x):
else:
cdf = get_cdf(pdf, lower_bd, upper_bd)
cdf_y = cdf(np.linspace(lower_bd, upper_bd, 200))
inverse_cdf = interpolate.interp1d(cdf_y, pdf.x, kind="linear", fill_value="extrapolate")
inverse_cdf = interpolate.interp1d(
cdf_y, pdf.x, kind="linear", fill_value="extrapolate"
)
simple_samples = inverse_cdf(seeds)
return simple_samples

4 changes: 3 additions & 1 deletion skbel/examples/demo_kde_cheby.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ def bel_training(
model = init_bel()

# Set model parameters
model.mode = "kde_chebyshev" # How to compute the posterior conditional distribution
model.mode = (
"kde_chebyshev" # How to compute the posterior conditional distribution
)
# Set PC cut
model.X_n_pc = 50
model.Y_n_pc = 30
Expand Down
62 changes: 32 additions & 30 deletions skbel/learning/bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,19 @@ class BEL(TransformerMixin, MultiOutputMixin, BaseEstimator):
"""

def __init__(
self,
mode: str = "mvn",
copy: bool = True,
*,
X_pre_processing=None,
Y_pre_processing=None,
X_post_processing=None,
Y_post_processing=None,
cca=None,
x_pc=None,
y_pc=None,
x_dim=None,
y_dim=None,
self,
mode: str = "mvn",
copy: bool = True,
*,
X_pre_processing=None,
Y_pre_processing=None,
X_post_processing=None,
Y_post_processing=None,
cca=None,
x_pc=None,
y_pc=None,
x_dim=None,
y_dim=None,
):
"""
:param mode: How to infer the posterior distribution. "mvn" (default) or "kde"
Expand Down Expand Up @@ -329,24 +329,26 @@ def random_sample(self, n_posts: int = None, mode: str = None) -> np.array:
if self.mode == "kde":
Y_samples = np.zeros((self._n_posts, self.pdf.shape[0]))
for i, pdf in enumerate(self.pdf):
uniform_samples = it_sampling(pdf=pdf,
num_samples=self._n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
chebyshev=False,
)
uniform_samples = it_sampling(
pdf=pdf,
num_samples=self._n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
chebyshev=False,
)

Y_samples[:, i] = uniform_samples

if self.mode == "kde_chebyshev":
Y_samples = np.zeros((self._n_posts, self.pdf.shape[0]))
for i, pdf in enumerate(self.pdf):
uniform_samples = it_sampling(pdf=pdf,
num_samples=self._n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
chebyshev=True,
)
uniform_samples = it_sampling(
pdf=pdf,
num_samples=self._n_posts,
lower_bd=pdf.x.min(),
upper_bd=pdf.x.max(),
chebyshev=True,
)

Y_samples[:, i] = uniform_samples

Expand Down Expand Up @@ -432,8 +434,8 @@ def predict(self, X_obs, mode: str = None) -> (np.array, np.array):
return self.pdf

def inverse_transform(
self,
Y_pred,
self,
Y_pred,
) -> np.array:
"""
Back-transforms the sampled gaussian distributed posterior Y to their physical space.
Expand All @@ -447,15 +449,15 @@ def inverse_transform(
Y_pred
) # Posterior CCA scores
y_post = (
np.matmul(y_post, self.cca.y_loadings_.T) * self.cca.y_std_
+ self.cca.y_mean_
np.matmul(y_post, self.cca.y_loadings_.T) * self.cca.y_std_
+ self.cca.y_mean_
) # Posterior PC scores

# Back transform PC scores
nc = self.Y_pc.shape[0] # Number of components
dummy = np.zeros((self.n_posts, nc)) # Create a dummy matrix filled with zeros
dummy[
:, : y_post.shape[1]
:, : y_post.shape[1]
] = y_post # Fill the dummy matrix with the posterior PC
y_post = self.Y_pre_processing.inverse_transform(dummy) # Inverse transform

Expand Down

0 comments on commit b28279a

Please sign in to comment.