diff --git a/bilby/bilby_mcmc/proposals.py b/bilby/bilby_mcmc/proposals.py index 6100d75f8..90db8b92d 100644 --- a/bilby/bilby_mcmc/proposals.py +++ b/bilby/bilby_mcmc/proposals.py @@ -6,7 +6,7 @@ from scipy.spatial.distance import jensenshannon from scipy.stats import gaussian_kde -from ..core.fisher import FisherMatrixPosteriorEstimator +from ..core.fisher_matrix import FisherMatrixPosteriorEstimator from ..core.prior import PriorDict from ..core.sampler.base_sampler import SamplerError from ..core.utils import logger, random, reflect diff --git a/bilby/core/__init__.py b/bilby/core/__init__.py index 968f961d0..5d24080fd 100644 --- a/bilby/core/__init__.py +++ b/bilby/core/__init__.py @@ -1 +1 @@ -from . import grid, likelihood, prior, result, sampler, series, utils, fisher +from . import grid, likelihood, prior, result, sampler, series, utils, fisher_matrix, fisher_sampler diff --git a/bilby/core/fisher.py b/bilby/core/fisher.py deleted file mode 100644 index 11b9af386..000000000 --- a/bilby/core/fisher.py +++ /dev/null @@ -1,174 +0,0 @@ -import numpy as np -import pandas as pd -import scipy.linalg -from scipy.optimize import minimize - - -class FisherMatrixPosteriorEstimator(object): - def __init__(self, likelihood, priors, parameters=None, fd_eps=1e-6, n_prior_samples=100): - """ A class to estimate posteriors using the Fisher Matrix approach - - Parameters - ---------- - likelihood: bilby.core.likelihood.Likelihood - A bilby likelihood object - priors: bilby.core.prior.PriorDict - A bilby prior object - parameters: list - Names of parameters to sample in - fd_eps: float - A parameter to control the size of perturbation used when finite - differencing the likelihood - n_prior_samples: int - The number of prior samples to draw and use to attempt estimatation - of the maximum likelihood sample. - """ - self.likelihood = likelihood - if parameters is None: - self.parameter_names = priors.non_fixed_keys - else: - self.parameter_names = parameters - self.fd_eps = fd_eps - self.n_prior_samples = n_prior_samples - self.N = len(self.parameter_names) - - self.prior_samples = [ - priors.sample_subset(self.parameter_names) for _ in range(n_prior_samples) - ] - self.prior_bounds = [(priors[key].minimum, priors[key].maximum) for key in self.parameter_names] - - self.prior_width_dict = {} - for key in self.parameter_names: - width = priors[key].width - if np.isnan(width): - raise ValueError(f"Prior width is ill-formed for {key}") - self.prior_width_dict[key] = width - - def log_likelihood(self, sample): - self.likelihood.parameters.update(sample) - return self.likelihood.log_likelihood() - - def calculate_iFIM(self, sample): - FIM = self.calculate_FIM(sample) - iFIM = scipy.linalg.inv(FIM) - - # Ensure iFIM is positive definite - min_eig = np.min(np.real(np.linalg.eigvals(iFIM))) - if min_eig < 0: - iFIM -= 10 * min_eig * np.eye(*iFIM.shape) - - return iFIM - - def sample_array(self, sample, n=1): - from .utils.random import rng - - if sample == "maxL": - sample = self.get_maximum_likelihood_sample() - - self.mean = np.array(list(sample.values())) - self.iFIM = self.calculate_iFIM(sample) - return rng.multivariate_normal(self.mean, self.iFIM, n) - - def sample_dataframe(self, sample, n=1): - samples = self.sample_array(sample, n) - return pd.DataFrame(samples, columns=self.parameter_names) - - def calculate_FIM(self, sample): - FIM = np.zeros((self.N, self.N)) - for ii, ii_key in enumerate(self.parameter_names): - for jj, jj_key in enumerate(self.parameter_names): - FIM[ii, jj] = -self.get_second_order_derivative(sample, ii_key, jj_key) - - return FIM - - def get_second_order_derivative(self, sample, ii, jj): - if ii == jj: - return self.get_finite_difference_xx(sample, ii) - else: - return self.get_finite_difference_xy(sample, ii, jj) - - def get_finite_difference_xx(self, sample, ii): - # Sample grid - p = self.shift_sample_x(sample, ii, 1) - m = self.shift_sample_x(sample, ii, -1) - - dx = .5 * (p[ii] - m[ii]) - - loglp = self.log_likelihood(p) - logl = self.log_likelihood(sample) - loglm = self.log_likelihood(m) - - return (loglp - 2 * logl + loglm) / dx ** 2 - - def get_finite_difference_xy(self, sample, ii, jj): - # Sample grid - pp = self.shift_sample_xy(sample, ii, 1, jj, 1) - pm = self.shift_sample_xy(sample, ii, 1, jj, -1) - mp = self.shift_sample_xy(sample, ii, -1, jj, 1) - mm = self.shift_sample_xy(sample, ii, -1, jj, -1) - - dx = .5 * (pp[ii] - mm[ii]) - dy = .5 * (pp[jj] - mm[jj]) - - loglpp = self.log_likelihood(pp) - loglpm = self.log_likelihood(pm) - loglmp = self.log_likelihood(mp) - loglmm = self.log_likelihood(mm) - - return (loglpp - loglpm - loglmp + loglmm) / (4 * dx * dy) - - def shift_sample_x(self, sample, x_key, x_coef): - - vx = sample[x_key] - dvx = self.fd_eps * self.prior_width_dict[x_key] - - shift_sample = sample.copy() - shift_sample[x_key] = vx + x_coef * dvx - - return shift_sample - - def shift_sample_xy(self, sample, x_key, x_coef, y_key, y_coef): - - vx = sample[x_key] - vy = sample[y_key] - - dvx = self.fd_eps * self.prior_width_dict[x_key] - dvy = self.fd_eps * self.prior_width_dict[y_key] - - shift_sample = sample.copy() - shift_sample[x_key] = vx + x_coef * dvx - shift_sample[y_key] = vy + y_coef * dvy - return shift_sample - - def get_maximum_likelihood_sample(self, initial_sample=None): - """ A method to attempt optimization of the maximum likelihood - - This uses a simple scipy optimization approach, starting from a number - of draws from the prior to avoid problems with local optimization. - - Note: this approach works well in small numbers of dimensions when the - posterior is narrow relative to the prior. But, if the number of dimensions - is large or the posterior is wide relative to the prior, the method fails - to find the global maximum in high dimensional problems. - """ - minlogL = np.inf - for i in range(self.n_prior_samples): - initial_sample = self.prior_samples[i] - - x0 = list(initial_sample.values()) - - def neg_log_like(x, self, T=1): - sample = {key: val for key, val in zip(self.parameter_names, x)} - return - 1 / T * self.log_likelihood(sample) - - out = minimize( - neg_log_like, - x0, - args=(self, 1), - bounds=self.prior_bounds, - method="L-BFGS-B", - ) - if out.fun < minlogL: - minout = out - - return {key: val for key, val in zip(self.parameter_names, minout.x)} diff --git a/bilby/core/fisher_matrix.py b/bilby/core/fisher_matrix.py new file mode 100644 index 000000000..7011408f2 --- /dev/null +++ b/bilby/core/fisher_matrix.py @@ -0,0 +1,278 @@ +from packaging import version + +import numpy as np +import pandas as pd +import scipy.linalg +from scipy.optimize import minimize +import tqdm + +from .utils import random, logger +from .prior import PriorDict + + +def array_to_dict(keys, array): + return dict(zip(keys, array)) + + +class FisherMatrixPosteriorEstimator(object): + def __init__( + self, + likelihood, + priors, + parameters=None, + minimization_method="Nelder-Mead", + fd_eps=1e-6, + n_prior_samples=100, + ): + """A class to estimate posteriors using a Fisher Information Matrix approach + + Parameters + ---------- + likelihood: bilby.core.likelihood.Likelihood + A bilby likelihood object + priors: bilby.core.prior.PriorDict + A bilby prior object + parameters: list + Names of parameters to sample in + minimization_method: str (Nelder-Mead) + The method to use in scipy.optimize.minimize + fd_eps: float + A parameter to control the size of perturbation used when finite + differencing the likelihood + n_prior_samples: int + The number of prior samples to draw and use to attempt estimatation + of the maximum likelihood sample. + """ + self.likelihood = likelihood + + if isinstance(priors, PriorDict) is False: + priors = PriorDict(priors) + + if parameters is None: + self.parameter_names = priors.non_fixed_keys + else: + self.parameter_names = parameters + self.minimization_method = minimization_method + self.fd_eps = fd_eps + self.n_prior_samples = n_prior_samples + self.N = len(self.parameter_names) + + # Construct prior samples at initialisation so that the prior is not stored + self.prior_samples = [ + priors.sample_subset(self.parameter_names) for _ in range(n_prior_samples) + ] + self.prior_bounds_min = np.array( + [priors[key].minimum for key in self.parameter_names] + ) + self.prior_bounds_max = np.array( + [priors[key].maximum for key in self.parameter_names] + ) + self.prior_bounds = list(zip(self.prior_bounds_min, self.prior_bounds_max)) + + self.prior_width_dict = {} + for key in self.parameter_names: + width = priors[key].width + if np.isnan(width): + raise ValueError(f"Prior width is ill-formed for {key}") + self.prior_width_dict[key] = width + + def log_likelihood(self, sample): + if isinstance(sample, dict) is False: + if isinstance(sample, pd.DataFrame) and len(sample) == 1: + sample = sample.to_dict() + else: + raise ValueError() + self.likelihood.parameters.update(sample) + return self.likelihood.log_likelihood() + + def calculate_iFIM(self, sample): + FIM = self.calculate_FIM(sample) + + # Force the FIM to be symmetric by averaging off-diagonal estimates + upper_off_diagonal_average = 0.5 * (np.triu(FIM, 1) + np.triu(FIM.T, 1)) + FIM = ( + np.diag(np.diag(FIM)) + + upper_off_diagonal_average + + upper_off_diagonal_average.T + ) + + iFIM = scipy.linalg.inv(FIM) + + # Ensure iFIM is positive definite + min_eig = np.min(np.real(np.linalg.eigvals(iFIM))) + if min_eig < 0: + logger.warning("Scaling the iFIM to ensure it is positive definite") + iFIM -= 10 * min_eig * np.eye(*iFIM.shape) + + return iFIM + + def sample_array(self, sample, n=1): + if sample == "maxL": + sample = self.get_maximum_likelihood_sample() + + self.mean = np.array(list(sample.values())) + self.iFIM = self.calculate_iFIM(sample) + return random.rng.multivariate_normal(self.mean, self.iFIM, n) + + def sample_dataframe(self, sample, n=1): + samples = self.sample_array(sample, n) + return pd.DataFrame(samples, columns=self.parameter_names) + + def calculate_FIM(self, sample): + if version.parse(scipy.__version__) < version.parse("1.15"): + logger.info("Scipy version < 1.15, using fallback") + FIM = np.zeros((self.N, self.N)) + for ii, ii_key in enumerate(self.parameter_names): + for jj, jj_key in enumerate(self.parameter_names): + FIM[ii, jj] = -self.get_second_order_derivative( + sample, ii_key, jj_key + ) + return FIM + else: + import scipy.differentiate as sd + + logger.info( + "Using Scipy differentiate to estimate the Fisher information matrix (FIM)" + ) + point = np.array([sample[key] for key in self.parameter_names]) + res = sd.hessian(self.log_likelihood_from_array, point, initial_step=0.5) + FIM = -res.ddf + logger.debug(f"Estimated FIM:\n{FIM}") + + return FIM + + def log_likelihood_from_array(self, x_array): + def wrapped_logl(x_array): + # Map points outside the bounds to the bounds + idxs = x_array < self.prior_bounds_min + x_array[idxs] = self.prior_bounds_min[idxs] + + idxs = x_array > self.prior_bounds_max + x_array[idxs] = self.prior_bounds_max[idxs] + + return self.log_likelihood(array_to_dict(self.parameter_names, x_array)) + + def wrapped_logl_arb(x_array): + return np.apply_along_axis(wrapped_logl, 0, x_array) + + return wrapped_logl_arb(x_array) + + def get_second_order_derivative(self, sample, ii, jj): + if ii == jj: + return self.get_finite_difference_xx(sample, ii) + else: + return self.get_finite_difference_xy(sample, ii, jj) + + def get_finite_difference_xx(self, sample, ii): + # Sample grid + p = self._shift_sample_x(sample, ii, 1) + m = self._shift_sample_x(sample, ii, -1) + + dx = 0.5 * (p[ii] - m[ii]) + + loglp = self.log_likelihood(p) + logl = self.log_likelihood(sample) + loglm = self.log_likelihood(m) + + return (loglp - 2 * logl + loglm) / dx**2 + + def get_finite_difference_xy(self, sample, ii, jj): + # Sample grid + pp = self._shift_sample_xy(sample, ii, 1, jj, 1) + pm = self._shift_sample_xy(sample, ii, 1, jj, -1) + mp = self._shift_sample_xy(sample, ii, -1, jj, 1) + mm = self._shift_sample_xy(sample, ii, -1, jj, -1) + + dx = 0.5 * (pp[ii] - mm[ii]) + dy = 0.5 * (pp[jj] - mm[jj]) + + loglpp = self.log_likelihood(pp) + loglpm = self.log_likelihood(pm) + loglmp = self.log_likelihood(mp) + loglmm = self.log_likelihood(mm) + + return (loglpp - loglpm - loglmp + loglmm) / (4 * dx * dy) + + def _shift_sample_x(self, sample, x_key, x_coef): + + vx = sample[x_key] + dvx = self.fd_eps * self.prior_width_dict[x_key] + + shift_sample = sample.copy() + shift_sample[x_key] = vx + x_coef * dvx + + return shift_sample + + def _shift_sample_xy(self, sample, x_key, x_coef, y_key, y_coef): + + vx = sample[x_key] + vy = sample[y_key] + + dvx = self.fd_eps * self.prior_width_dict[x_key] + dvy = self.fd_eps * self.prior_width_dict[y_key] + + shift_sample = sample.copy() + shift_sample[x_key] = vx + x_coef * dvx + shift_sample[y_key] = vy + y_coef * dvy + return shift_sample + + def _maximize_likelihood_from_initial_sample(self, initial_sample): + x0 = list(initial_sample.values()) + + def neg_log_like(x): + return -self.log_likelihood_from_array(x) + + out = minimize( + neg_log_like, + x0, + bounds=self.prior_bounds, + method=self.minimization_method, + ) + return out + + def get_maximum_likelihood_sample(self, initial_sample=None): + """A method to attempt optimization of the maximum likelihood + + This uses a simple scipy optimization approach, starting from a number + of draws from the prior to avoid problems with local optimization. + + Note: this approach works well in small numbers of dimensions when the + posterior is narrow relative to the prior. But, if the number of dimensions + is large or the posterior is wide relative to the prior, the method fails + to find the global maximum in high dimensional problems. + """ + + if initial_sample: + logger.info( + f"Maximising the likelihood from initial sample {initial_sample}" + ) + minout = self._maximize_likelihood_from_initial_sample(initial_sample) + else: + logger.info( + f"Maximising the likelihood using {self.n_prior_samples} prior samples" + ) + max_logL = -np.inf + logL_list = [] + successes = 0 + for sample in tqdm.tqdm(self.prior_samples): + out = self._maximize_likelihood_from_initial_sample(sample) + logL = -out.fun + logL_list.append(logL) + if out.success: + successes += 1 + if logL > max_logL: + max_logL = logL + minout = out + + if np.isinf(max_logL): + raise ValueError("Maxisation of the likelihood failed") + + logger.info( + f"Finished with {100 * successes / self.n_prior_samples}% success rate| " + f"Maximum log-likelihood {max_logL}| " + f"(max-mu)/sigma= {(max_logL - np.mean(logL_list)) / np.std(logL_list)} " + ) + + self.minimization_metadata = minout + logger.info(f"Maximum likelihood estimation: {minout.message}") + return {key: val for key, val in zip(self.parameter_names, minout.x)} diff --git a/bilby/core/fisher_sampler.py b/bilby/core/fisher_sampler.py new file mode 100644 index 000000000..5bb56a979 --- /dev/null +++ b/bilby/core/fisher_sampler.py @@ -0,0 +1,453 @@ +import datetime +import sys + +import numpy as np +import pandas as pd +from scipy.stats import multivariate_normal +import tqdm + +from .fisher_matrix import FisherMatrixPosteriorEstimator +from .sampler.base_sampler import Sampler, signal_wrapper, SamplerError +from .result import rejection_sample +from .utils import logger, kish_log_effective_sample_size, safe_save_figure, random + + +class Fisher(Sampler): + """ + A sampler class that estimates the maximum likelihood using scipy, then draws + posterior samples using the Fisher information matrix. + + Parameters + ========== + likelihood: likelihood.Likelihood + A object with a log_l method + priors: bilby.core.prior.PriorDict, dict + Priors to be used in the search. + This has attributes for each parameter to be sampled. + outdir: str, optional + Name of the output directory + label: str, optional + Naming scheme of the output files + rejection_sampling: bool + If true, utilise rejection sampling to reweight from the Fisher matrix + Gaussian posterior approximation to the true posterior. Note: if true, + the number of samples is determined by the efficiency which can be + low if the maximum likelihood is not found or the posterior is highly + non-Gaussian. + nsamples: int + The target number of samples to draw in the posterior + minimization_method: str (Nelder-Mead) + The method to use in scipy.optimize.minimize + fd_eps: float + A parameter to control the size of perturbation used when finite + differencing the likelihood + n_prior_samples: int + The number of prior samples to draw and use to attempt estimatation + of the maximum likelihood sample. + """ + + sampler_name = "fisher" + sampling_seed_key = "seed" + default_kwargs = dict( + resample="importance", + target_nsamples=10000, + batch_nsamples=1000, + prior_nsamples=100, + minimization_method="Nelder-Mead", + fd_eps=1e-6, + plot_diagnostic=False, + mirror_diagnostic_plot=False, + cov_scaling=1, + use_injection_for_maxL=True, + fail_on_error=False + ) + + def __init__( + self, + likelihood, + priors, + outdir="outdir", + label="label", + use_ratio=False, + plot=False, + exit_code=77, + skip_import_verification=True, + **kwargs, + ): + super(Fisher, self).__init__( + likelihood=likelihood, + priors=priors, + outdir=outdir, + label=label, + use_ratio=use_ratio, + plot=plot, + skip_import_verification=skip_import_verification, + exit_code=exit_code, + **kwargs, + ) + + @classmethod + def get_expected_outputs(cls, outdir=None, label=None): + """Get lists of the expected outputs directories and files. + + These are used by :code:`bilby_pipe` when transferring files via HTCondor. + + Parameters + ---------- + outdir : str + The output directory. + label : str + The label for the run. + + Returns + ------- + list + List of file names: empty (resuming not yet implemented) + list + List of directory names. Will always be empty for fisher. + """ + return [], [] + + raise NotImplementedError() + + @signal_wrapper + def run_sampler(self): + + self.start_time = datetime.datetime.now() + cov_scaling = self.kwargs["cov_scaling"] + + fisher_mpe = FisherMatrixPosteriorEstimator( + likelihood=self.likelihood, + priors=self.priors, + minimization_method=self.kwargs["minimization_method"], + n_prior_samples=self.kwargs["prior_nsamples"], + fd_eps=self.kwargs["fd_eps"], + ) + + if self.injection_parameters and self.kwargs["use_injection_for_maxL"]: + sample = { + key: self.injection_parameters[key] + for key in fisher_mpe.parameter_names + } + else: + sample = None + maxL_sample_dict = fisher_mpe.get_maximum_likelihood_sample(sample) + mean = np.array(list(maxL_sample_dict.values())) + iFIM = fisher_mpe.calculate_iFIM(maxL_sample_dict) + cov = cov_scaling * iFIM + + msg = "Generation-distribution:\n " + "\n ".join( + [ + f"{key}: {val:0.5f} +/- {np.sqrt(var):.5f}" + for ((key, val), var) in zip(maxL_sample_dict.items(), np.diag(cov)) + ] + ) + logger.info(msg) + + nsamples = 0 + target_nsamples = self.kwargs["target_nsamples"] + batch_nsamples = self.kwargs["batch_nsamples"] + all_g_samples = [] + all_samples = [] + all_logl = [] + all_weights = [] + resample = self.kwargs["resample"] + logger.info( + f"Starting sampling in batches of {batch_nsamples} to produce {target_nsamples} samples" + ) + pbar = tqdm.tqdm( + total=target_nsamples, + desc=f"{resample.capitalize()} sampling", + file=sys.stdout, + initial=0, + ) + while nsamples < target_nsamples: + g_samples, g_logl, g_logpi, discard_inef = ( + self._draw_samples_from_generating_distribution( + mean, cov, fisher_mpe, batch_nsamples + ) + ) + + _methods = dict( + rejection=self._rejection_sample, importance=self._importance_sample + ) + + if resample in _methods: + weights = self._calculate_weights(g_samples, g_logl, g_logpi, mean, cov) + samples, logl = _methods[resample](g_samples, g_logl, weights) + efficiency = 100 * len(samples) / len(g_samples) + else: + logger.info("No resampling applied") + samples = g_samples + logl = g_logl + weights = np.ones_like(logl) + + #if efficiency < 10: + # cov_scaling *= 0.9 + # cov = cov_scaling * iFIM + + nsamples += len(samples) + pbar.set_postfix( + { + "eff": f"{efficiency:.3f}%", + "de": f"{discard_inef:.1f}%", + "cs": f"{cov_scaling:.2f}", + }, + refresh=False, + ) + if len(samples) > self.ndim: + pbar.update(len(samples)) + all_g_samples.append(g_samples) + all_samples.append(samples) + all_logl.append(logl) + all_weights.append(weights) + else: + pbar.update(0) + + pbar.close() + + g_samples = pd.concat(all_g_samples) + samples = pd.concat(all_samples) + logl = np.concatenate(all_logl) + weights = np.concatenate(all_weights) + efficiency = 100 * len(samples) / len(g_samples) + + logger.info(f"Finished sampling: total efficiency is {efficiency:0.3f}%") + + if self.kwargs["plot_diagnostic"]: + self.create_resample_diagnostic( + samples, g_samples, mean, weights, method=self.kwargs["resample"] + ) + + end_time = datetime.datetime.now() + self.sampling_time = end_time - self.start_time + + if self.use_ratio: + logl -= self.likelihood.noise_log_likelihood() + + self._generate_result( + samples, logl, efficiency=efficiency, nlikelihood=len(g_samples) + ) + + return self.result + + def create_resample_diagnostic(self, samples, raw_samples, mean, weights, method): + import corner + import matplotlib.pyplot as plt + import matplotlib.lines as mpllines + + kwargs = dict( + bins=50, + smooth=0.7, + max_n_ticks=5, + truths=np.concatenate((mean, [1])), + truth_color="C3", + ) + + kwargs["labels"] = [ + label.replace("_", " ") for label in self.search_parameter_keys + ] + kwargs["labels"].append("weights") + + # Create the data array to plot and pass everything to corner + xs = samples[self.search_parameter_keys].values + xs = np.concatenate((xs, np.random.uniform(0, 1, len(xs)).reshape(-1, 1)), axis=1) + rxs = raw_samples[self.search_parameter_keys].values + rxs = np.concatenate((rxs, weights.reshape((-1, 1))), axis=1) + + # Sort by weight (only for plotting) + idxs = np.argsort(weights) + rxs = rxs[idxs] + weights = weights[idxs] + + g_color = "k" + g_ls = "--" + f_color = "C0" + f_ls = "-" + + lines = [] + fig = corner.corner( + rxs, + color=g_color, + contour_kwargs={"linestyles": g_ls, "alpha": 0.8}, + hist_kwargs={"density": True, "ls": g_ls, "alpha": 0.8}, + data_kwargs={"alpha": 1}, + no_fill_contours=True, + alpha=0.8, + plot_density=False, + plot_datapoints=False, + fill_contours=False, + levels=(1 - np.exp(-0.5), 1 - np.exp(-2), 1 - np.exp(-9 / 2.0)), + **kwargs, + ) + lines.append(mpllines.Line2D([0], [0], color=g_color, linestyle=g_ls)) + + if len(xs) > len(samples.keys()): + fig = corner.corner( + xs, + color=f_color, + contour_kwargs={"linestyles": f_ls, "alpha": 0.8}, + contourf_kwargs={"alpha": 0.8}, + hist_kwargs={"density": True, "ls": f_ls, "alpha": 0.8}, + no_fill_contours=True, + fig=fig, + alpha=0.1, + plot_density=True, + plot_datapoints=False, + fill_contours=False, + levels=(1 - np.exp(-0.5), 1 - np.exp(-2), 1 - np.exp(-9 / 2.0)), + range=[1] * self.ndim + [(0, 1)], + **kwargs, + ) + + # Remove the weights from the re-weighted samples + axes = fig.get_axes() + axes[-1].patches[-1].remove() + for ii in range(2, self.ndim + 2): + axes[-ii].collections[-1].remove() + axes[-ii].collections[-1].remove() + + lines.append(mpllines.Line2D([0], [0], color=f_color, linestyle=f_ls)) + else: + logger.info("Too few samples to add to the diagnostic plot") + + axes = fig.get_axes() + axes[0].legend(lines, ["$g(x)$", "$f(x)$"]) + + axes = np.array(axes).reshape((self.ndim + 1, self.ndim + 1)) + + base_alpha = 0.1 + alphas = base_alpha + (1 - base_alpha) * weights + for ii in range(1, self.ndim + 1): + for jj in range(0, self.ndim): + if ii <= jj: + continue + if self.kwargs["mirror_diagnostic_plot"]: + ax = axes[jj, ii] + xsamples = rxs[:, ii] + ysamples = rxs[:, jj] + else: + ax = axes[ii, jj] + xsamples = rxs[:, jj] + ysamples = rxs[:, ii] + sc = ax.scatter( + xsamples, + ysamples, + c=np.log(weights), + alpha=alphas, + edgecolor="none", + vmin=-5, + vmax=0, + ) + cbar = fig.colorbar(sc, ax=axes[0, -1]) + cbar.set_label("Log-weight") + + fig.suptitle(f"Resampling method: {method}") + + filename = f"{self.outdir}/{self.label}_resample_{method}.png" + safe_save_figure(fig=fig, filename=filename, dpi=400) + plt.close(fig) + + return fig + + def _generate_result(self, samples, log_likelihood_evaluations, **run_stats): + """ + Extract the information we need from the output. + + Parameters + ========== + out: dynesty.result.Result + The dynesty output. + """ + + self.result.samples = samples + self.result.log_likelihood_evaluations = log_likelihood_evaluations + + run_stats["sampling_time_s"] = self.sampling_time.total_seconds() + self.result.meta_data["run_statistics"] = run_stats + + def _draw_samples_from_generating_distribution( + self, mean, cov, fisher_mpe, nsamples + ): + samples_array = random.rng.multivariate_normal(mean, cov, nsamples) + samples = pd.DataFrame(samples_array, columns=fisher_mpe.parameter_names) + + logpi = [] + logl = [] + logger.debug("Calculating the likelihood and priors") + + logpi = self.priors.ln_prob(samples, axis=0) + logl = np.full_like(logpi, -np.inf) + + in_prior = ~np.isinf(logpi) + outside_prior_count = np.sum(~in_prior) + discard_inef = 100 * outside_prior_count / len(samples) + + logl[in_prior] = fisher_mpe.log_likelihood_from_array( + samples.values[in_prior].T + ) + + if outside_prior_count == len(samples): + msg = "Sampling has failed: no viable samples left" + if self.kwargs["fail_on_error"]: + raise SamplerError(msg) + else: + logger.info(msg) + else: + logger.debug("No samples outside the prior bounds") + + logpi = np.real(np.array(logpi)) + return samples, logl, logpi, discard_inef + + def _calculate_weights(self, g_samples, g_logl, g_logpi, mean, cov): + g_logl_norm = multivariate_normal.logpdf(g_samples, mean=mean, cov=cov) + + ln_weights = g_logl + g_logpi - g_logl_norm + + # Remove impossible samples + idxs = ~np.isinf(ln_weights) + ln_weights_viable = ln_weights[idxs] + + # Scale + ln_weights -= np.max(ln_weights_viable) + + self.ess = int(np.floor(np.exp(kish_log_effective_sample_size(ln_weights_viable)))) + logger.debug(f"Calculated weights have an effective sample size {self.ess}") + + weights = np.exp(ln_weights) + + return weights + + def _importance_sample(self, g_samples, g_logl, weights): + logger.debug(f"Importance sampling the posterior from {len(g_samples)} samples") + + normalized_weights = weights / np.sum(weights) + idxs = np.random.choice(len(g_samples), size=self.ess, p=normalized_weights) + samples = g_samples.iloc[idxs] + logl = g_logl[idxs] + + if self.ess < self.ndim: + msg = "Effective sample size less than ndim: sampling has failed" + if self.kwargs["fail_on_error"]: + raise SamplerError(msg) + else: + logger.debug(msg) + + return samples, logl + + def _rejection_sample(self, g_samples, g_logl, weights): + logger.debug(f"Rejection sampling the posterior from {len(g_samples)} samples") + + samples, idxs = rejection_sample(g_samples, weights, return_idxs=True) + logl = g_logl[idxs] + + nsamples = len(samples) + + if nsamples < self.ndim: + msg = "Number of samples less than ndim: sampling has failed" + if self.kwargs["fail_on_error"]: + raise SamplerError(msg) + else: + logger.debug(msg) + + return samples, logl diff --git a/bilby/core/result.py b/bilby/core/result.py index 3569bc1da..8abc42078 100644 --- a/bilby/core/result.py +++ b/bilby/core/result.py @@ -264,7 +264,7 @@ def eval_pool(this_logl): return ln_weights, new_log_likelihood_array, new_log_prior_array, old_log_likelihood_array, old_log_prior_array -def rejection_sample(posterior, weights): +def rejection_sample(posterior, weights, return_idxs=False): """ Perform rejection sampling on a posterior using weights Parameters @@ -273,6 +273,8 @@ def rejection_sample(posterior, weights): The dataframe or array containing posterior samples weights: np.ndarray An array of weights + return_idxs: bool + If true, also return the boolean indexes Returns ======= @@ -281,7 +283,10 @@ def rejection_sample(posterior, weights): """ keep = weights > random.rng.uniform(0, max(weights), weights.shape) - return posterior[keep] + if return_idxs: + return posterior[keep], keep + else: + return posterior[keep] def reweight(result, label=None, new_likelihood=None, new_prior=None, diff --git a/bilby/gw/conversion.py b/bilby/gw/conversion.py index 2a50d7b5a..f5293d39f 100644 --- a/bilby/gw/conversion.py +++ b/bilby/gw/conversion.py @@ -204,7 +204,6 @@ def convert_to_lal_binary_black_hole_parameters(parameters): added_keys: list keys which are added to parameters during function call """ - converted_parameters = parameters.copy() original_keys = list(converted_parameters.keys()) if 'luminosity_distance' not in original_keys: @@ -236,10 +235,13 @@ def convert_to_lal_binary_black_hole_parameters(parameters): converted_parameters[f"chi_{idx}"] ** 2 + converted_parameters[f"chi_{idx}_in_plane"] ** 2 ) ** 0.5 - converted_parameters[f"cos_tilt_{idx}"] = ( - converted_parameters[f"chi_{idx}"] - / converted_parameters[f"a_{idx}"] - ) + if converted_parameters[f"a_{idx}"] == 0: + converted_parameters[f"cos_tilt_{idx}"] = 0 + else: + converted_parameters[f"cos_tilt_{idx}"] = ( + converted_parameters[f"chi_{idx}"] + / converted_parameters[f"a_{idx}"] + ) elif "a_{}".format(idx) not in original_keys: converted_parameters['a_{}'.format(idx)] = abs( converted_parameters[key]) @@ -276,6 +278,10 @@ def convert_to_lal_binary_black_hole_parameters(parameters): - np.sign(np.cos(converted_parameters["theta_jn"])) * converted_parameters["psi"], 2 * np.pi) + + if "psi_mod_pib2" in original_keys: + converted_parameters["psi"] = np.mod(converted_parameters["psi_mod_pib2"], np.pi / 2) + added_keys = [key for key in converted_parameters.keys() if key not in original_keys] @@ -2540,7 +2546,8 @@ def generate_sky_frame_parameters(samples, likelihood): new_samples.append(likelihood.get_sky_frame_parameters()) new_samples = DataFrame(new_samples) for key in new_samples: - samples[key] = new_samples[key] + # Use values to avoid index-sorting (new-samples are automatically sorted properly) + samples[key] = new_samples[key].values def fill_sample(args): diff --git a/bilby/gw/detector/interferometer.py b/bilby/gw/detector/interferometer.py index ac0c14886..d6ecf5d9b 100644 --- a/bilby/gw/detector/interferometer.py +++ b/bilby/gw/detector/interferometer.py @@ -309,6 +309,9 @@ def get_detector_response(self, waveform_polarizations, parameters, frequencies= else: mask = np.ones(len(frequencies), dtype=bool) + if "psi" not in parameters: + parameters = generate_all_bbh_parameters(parameters) + signal = {} for mode in waveform_polarizations.keys(): det_response = self.antenna_response( diff --git a/examples/core_examples/linear_regression_with_Fisher.py b/examples/core_examples/fisher_example.py similarity index 51% rename from examples/core_examples/linear_regression_with_Fisher.py rename to examples/core_examples/fisher_example.py index ae7f74dd7..7f51e078c 100644 --- a/examples/core_examples/linear_regression_with_Fisher.py +++ b/examples/core_examples/fisher_example.py @@ -1,23 +1,19 @@ #!/usr/bin/env python """ -An example of how to use bilby to perform parameter estimation for -non-gravitational wave data. In this case, fitting a linear function to -data with background Gaussian noise. We then compare the result to posteriors -estimated using the Fisher Information Matrix approximation. +This example, building on the Gaussian example, demonstrates the use of the Fisher +tools within Bilby. """ -import copy - import bilby +import numpy as np from bilby.core.utils.random import rng, seed -# sets seed of bilby's generator "rng" to "123" +# Sets seed of bilby's generator "rng" to "123" to ensure reproducibility seed(123) -import numpy as np - # A few simple setup steps outdir = "outdir" +bilby.utils.check_directory_exists_and_if_not_mkdir(outdir) # First, we define our "signal model", in this case a simple linear function @@ -48,27 +44,44 @@ def model(time, m, c): priors = dict() priors["m"] = bilby.core.prior.Uniform(0, 5, "m") priors["c"] = bilby.core.prior.Uniform(-2, 2, "c") -priors = bilby.core.prior.PriorDict(priors) -# And run sampler -result = bilby.run_sampler( +# Run the "fisher" sampler +result_fisher = bilby.run_sampler( likelihood=likelihood, priors=priors, - sampler="dynesty", - nlive=1000, + sampler="fisher", injection_parameters=injection_parameters, outdir=outdir, - label="Nested Sampling", + label="example_fisher", + nsamples=10000, + clean=True, + plot=True, ) -# Finally plot a corner plot: all outputs are stored in outdir -result.plot_corner() - -fim = bilby.core.fisher.FisherMatrixPosteriorEstimator(likelihood, priors) -result_fim = copy.deepcopy(result) -result_fim.posterior = fim.sample_dataframe("maxL", 10000) -result_fim.label = "Fisher" +result_dynesty = bilby.run_sampler( + likelihood=likelihood, + priors=priors, + sampler="dynesty", + nlive=500, + injection_parameters=injection_parameters, + outdir=outdir, + label="example_dynesty", +) -bilby.core.result.plot_multiple( - [result, result_fim], parameters=injection_parameters, truth_color="k" +# Plot a corner plot: all outputs are stored in outdir +bilby.result.plot_multiple( + [result_fisher, result_dynesty], + filename=f"{outdir}/comparison_fisher_dynesty.png", + labels=[ + f"Fisher ({result_fisher.meta_data['run_statistics']['sampling_time_s']:0.2f} s)", + f"Dynesty ({result_dynesty.meta_data['run_statistics']['sampling_time_s']:0.2f} s)", + ], + parameters=injection_parameters, + truth_color="C3", ) + +# Note that the `fisher` tools can also be accessed directly, e.g: +fisher = bilby.core.fisher_matrix.FisherMatrixPosteriorEstimator(likelihood, priors) +samples = fisher.sample_dataframe( + "maxL", 1000 +) # Draw a set of samples as a pandas dataframe diff --git a/setup.py b/setup.py index c9c00e2e6..85c67d24b 100644 --- a/setup.py +++ b/setup.py @@ -90,6 +90,7 @@ def readfile(filename): "bilby.dynesty=bilby.core.sampler.dynesty:Dynesty", "bilby.dynamic_dynesty=bilby.core.sampler.dynamic_dynesty:DynamicDynesty", "bilby.emcee=bilby.core.sampler.emcee:Emcee", + "bilby.fisher=bilby.core.fisher_sampler:Fisher", "bilby.kombine=bilby.core.sampler.kombine:Kombine", "bilby.nessai=bilby.core.sampler.nessai:Nessai", "bilby.nestle=bilby.core.sampler.nestle:Nestle",