Skip to content
Open
60 changes: 58 additions & 2 deletions src/smefit/covmat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
Based on
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats_utils.py
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats.py

"""

import jax.lax as lax
import jax.numpy as jnp
import numpy as np
import pandas as pd
import scipy.linalg as la
from scipy.linalg import block_diag

from .log import logging
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated

INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR")
_logger = logging.getLogger(__name__)


def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame):
Expand Down Expand Up @@ -91,7 +96,6 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list):
)
)
special_corrs.append(dataset_sys_errors.loc[:, ~is_intra_dataset_error])

# concat systematics across datasets
special_sys = pd.concat(special_corrs, axis=0, sort=False)
# non-overlapping systematics are set to NaN by concat, fill with 0 instead.
Expand All @@ -100,3 +104,55 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list):
diag = la.block_diag(*block_diags)
covmat = diag + special_sys.to_numpy() @ special_sys.to_numpy().T
return covmat


#################################################################
###Algorithm to invert covmat avoiding large condition number####
#################################################################


def compute_blocks_inverse(covmat, tol=1e-25):
"""
Decomposes the covmatrix into diagonal blocks and inverts them.
Parameters
----------
covmat: covariance matrix
tol: what is considered numerically zero in the covmat
"""
n = covmat.shape[0]
mask = jnp.abs(covmat) > tol
col_indices = jnp.arange(n)
# For each row, find the index of the furthest non-zero column.
max_col_per_row = jnp.max(jnp.where(mask, col_indices, 0), axis=1)
max_row_per_col = jnp.max(jnp.where(mask.T, col_indices, 0), axis=1)
combined_reach = jnp.maximum(max_col_per_row, max_row_per_col)
# covmat should be symmetric, however checking both row and columns
# increase safety in case same asymmetry is present

running_max = lax.cummax(combined_reach, axis=0)
# normally running_max==combined_reach already, however interdataset
# correlation can spoil this creating larger blocks connecting elements
# that are far away in the covmat.
# In this case I raise a warning that the dataset should be reordered to
# have full optimization in the decomposition and inversion algo
if False in (running_max == combined_reach):
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated
print(jnp.where(running_max != combined_reach))
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated
_logger.warning(
"In the runcard some intercorrelated datasets are not written next to each other."
+ "To increase efficiency and stability in covmat inversion\n please consider reordering"
+ " your runcard datasets entries."
)

# A block boundary exists where the furthest reach so far is <= the current index.
boundaries = jnp.where(running_max <= col_indices)[0]
sizes = np.diff(boundaries, prepend=-1).tolist()
block_cond = []
block_inverses = []
start = 0
for s in sizes:
end = start + s
block = covmat[start:end, start:end]
block_cond.append(jnp.linalg.cond(block))
block_inverses.append(np.linalg.inv(block))
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated
start = end
return block_diag(*block_inverses), max(block_cond)
21 changes: 10 additions & 11 deletions src/smefit/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import yaml

from .basis_rotation import rotate_to_fit_basis
from .covmat import construct_covmat, covmat_from_systematics
from .covmat import compute_blocks_inverse, construct_covmat, covmat_from_systematics
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated
from .log import logging

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,8 +51,7 @@ def check_missing_operators(loaded_corrections, coeff_config):
)


def check_condition_number(fit_covmat, critical_cond=15.5):
condition_number = np.linalg.cond(fit_covmat)
def check_condition_number(condition_number, critical_cond=15.5):
digits = round(np.log10(condition_number), 2)
if digits > critical_cond:
_logger.warning(
Expand All @@ -68,15 +67,15 @@ def check_covmat_positivity(fit_covmat):
_logger.warning(f"Covariance matrix not positive")


def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-3):
if (
jnp.max(np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0])))
> threshold
):
def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-2):
diff = np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0]))
max_deviation = jnp.max(diff)
if max_deviation > threshold:
_logger.warning(
"Failed inverting fit covariance matrix. "
+ "Check matrix condition number and using float64 "
)
return max_deviation


class Loader:
Expand Down Expand Up @@ -676,7 +675,6 @@ def load_datasets(
lumi_exp = []
exp_name = []
th_cov = []

Loader.commondata_path = pathlib.Path(commondata_path)
Loader.theory_path = pathlib.Path(theory_path or commondata_path)

Expand Down Expand Up @@ -774,9 +772,10 @@ def load_datasets(
fit_covmat = exp_covmat

# Check fit_covmat condition number
check_condition_number(fit_covmat)
# check_condition_number(fit_covmat)
Comment thread
SimoneTentori marked this conversation as resolved.
Outdated
check_covmat_positivity(fit_covmat)
inv_cov_mat = np.linalg.inv(fit_covmat)
inv_cov_mat, cond_number = compute_blocks_inverse(fit_covmat)
check_condition_number(cond_number)
check_covmat_invertibility(fit_covmat, inv_cov_mat)
# Make one large datatuple containing all data, SM theory, corrections, etc.
return DataTuple(
Expand Down