Skip to content
Open
59 changes: 57 additions & 2 deletions src/smefit/covmat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
Based on
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats_utils.py
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats.py

"""

import jax.lax as lax
import jax.numpy as jnp
import numpy as np
import pandas as pd
import scipy.linalg as la
from scipy.linalg import block_diag

from smefit.log import logging

INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR")
_logger = logging.getLogger(__name__)


def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame):
Expand Down Expand Up @@ -91,7 +96,6 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list):
)
)
special_corrs.append(dataset_sys_errors.loc[:, ~is_intra_dataset_error])

# concat systematics across datasets
special_sys = pd.concat(special_corrs, axis=0, sort=False)
# non-overlapping systematics are set to NaN by concat, fill with 0 instead.
Expand All @@ -100,3 +104,54 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list):
diag = la.block_diag(*block_diags)
covmat = diag + special_sys.to_numpy() @ special_sys.to_numpy().T
return covmat


#################################################################
###Algorithm to invert covmat avoiding large condition number####
#################################################################


def compute_blocks_inverse(covmat, tol=1e-25):
"""
Decomposes the covmatrix into diagonal blocks and inverts them.
Parameters
----------
covmat: covariance matrix
tol: what is considered numerically zero in the covmat
"""
n = covmat.shape[0]
mask = jnp.abs(covmat) > tol
col_indices = jnp.arange(n)
# For each row, find the index of the furthest non-zero column.
max_col_per_row = jnp.max(jnp.where(mask, col_indices, 0), axis=1)
max_row_per_col = jnp.max(jnp.where(mask.T, col_indices, 0), axis=1)
combined_reach = jnp.maximum(max_col_per_row, max_row_per_col)
# covmat should be symmetric, however checking both row and columns
# increase safety in case same asymmetry is present

running_max = lax.cummax(combined_reach, axis=0)
# normally running_max==combined_reach already, however interdataset
# correlation can spoil this creating larger blocks connecting elements
# that are far away in the covmat.
# In this case I raise a warning that the dataset should be reordered to
# have full optimization in the decomposition and inversion algo
if jnp.any(running_max != combined_reach):
_logger.warning(
"In the runcard some intercorrelated datasets are not written next to each other."
+ "To increase efficiency and stability in covmat inversion\n please consider reordering"
+ " your runcard datasets entries."
)

# A block boundary exists where the furthest reach so far is <= the current index.
boundaries = jnp.where(running_max <= col_indices)[0]
sizes = np.diff(boundaries, prepend=-1).tolist()
block_cond = []
block_inverses = []
start = 0
for s in sizes:
end = start + s
block = covmat[start:end, start:end]
block_cond.append(jnp.linalg.cond(block))
block_inverses.append(jnp.linalg.inv(block))
start = end
return block_diag(*block_inverses), max(block_cond)
27 changes: 15 additions & 12 deletions src/smefit/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
import scipy.linalg as la
import yaml

from smefit.covmat import (
compute_blocks_inverse,
construct_covmat,
covmat_from_systematics,
)

from .basis_rotation import rotate_to_fit_basis
from .covmat import construct_covmat, covmat_from_systematics
from .log import logging

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,8 +56,7 @@ def check_missing_operators(loaded_corrections, coeff_config):
)


def check_condition_number(fit_covmat, critical_cond=15.5):
condition_number = np.linalg.cond(fit_covmat)
def check_condition_number(condition_number, critical_cond=15.5):
digits = round(np.log10(condition_number), 2)
if digits > critical_cond:
_logger.warning(
Expand All @@ -68,15 +72,15 @@ def check_covmat_positivity(fit_covmat):
_logger.warning(f"Covariance matrix not positive")


def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-3):
if (
jnp.max(np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0])))
> threshold
):
def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-2):
diff = np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0]))
max_deviation = jnp.max(diff)
if max_deviation > threshold:
_logger.warning(
"Failed inverting fit covariance matrix. "
+ "Check matrix condition number and using float64 "
)
return max_deviation


class Loader:
Expand Down Expand Up @@ -664,7 +668,7 @@ def load_datasets(
cutoff_scale: float, optional
kinematic cutoff scale
"""

datasets.sort(key=lambda d: d["name"].lower())
exp_data = []
sm_theory = []
sys_error_t0 = []
Expand All @@ -676,7 +680,6 @@ def load_datasets(
lumi_exp = []
exp_name = []
th_cov = []

Loader.commondata_path = pathlib.Path(commondata_path)
Loader.theory_path = pathlib.Path(theory_path or commondata_path)

Expand Down Expand Up @@ -774,9 +777,9 @@ def load_datasets(
fit_covmat = exp_covmat

# Check fit_covmat condition number
check_condition_number(fit_covmat)
check_covmat_positivity(fit_covmat)
inv_cov_mat = np.linalg.inv(fit_covmat)
inv_cov_mat, cond_number = compute_blocks_inverse(fit_covmat)
check_condition_number(cond_number)
check_covmat_invertibility(fit_covmat, inv_cov_mat)
# Make one large datatuple containing all data, SM theory, corrections, etc.
return DataTuple(
Expand Down
2 changes: 1 addition & 1 deletion src/smefit/rge/rge.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ def load_rge_matrix(
dictionary with the operators generated by the RGE, to be kept in the fit
"""
# Sort the coefficient list alphabetically
datasets.sort(key=lambda d: d["name"].lower())
coeff_list = sorted(coeff_list)
init_scale = rge_dict.get("init_scale", 1e3)
obs_scale = rge_dict.get("obs_scale", 91.1876)
Expand All @@ -623,7 +624,6 @@ def load_rge_matrix(
path_to_rge_mat = rge_dict.get("rg_matrix", False)
if path_to_rge_mat:
rge_cache = load_precomputed_rge_matrix(path_to_rge_mat, rge_settings)

# Load scales
if isinstance(obs_scale, (float, int)):
scales = [float(obs_scale)]
Expand Down
Loading