From 435bf3b5d36e2ab545b3ef2a9509e3ead8642d66 Mon Sep 17 00:00:00 2001 From: Simone Tentori Date: Wed, 25 Feb 2026 18:45:34 +0100 Subject: [PATCH 01/10] Factorized covmat --- src/smefit/covmat.py | 60 ++++++++++++++++++++++++++++++++++++++++++-- src/smefit/loader.py | 21 ++++++++-------- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/smefit/covmat.py b/src/smefit/covmat.py index 09453174..b8750fae 100644 --- a/src/smefit/covmat.py +++ b/src/smefit/covmat.py @@ -4,14 +4,19 @@ Based on https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats_utils.py https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats.py - """ +import jax.lax as lax +import jax.numpy as jnp import numpy as np import pandas as pd import scipy.linalg as la +from scipy.linalg import block_diag + +from .log import logging INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") +_logger = logging.getLogger(__name__) def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): @@ -91,7 +96,6 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list): ) ) special_corrs.append(dataset_sys_errors.loc[:, ~is_intra_dataset_error]) - # concat systematics across datasets special_sys = pd.concat(special_corrs, axis=0, sort=False) # non-overlapping systematics are set to NaN by concat, fill with 0 instead. @@ -100,3 +104,55 @@ def covmat_from_systematics(stat_errors: list, sys_errors: list): diag = la.block_diag(*block_diags) covmat = diag + special_sys.to_numpy() @ special_sys.to_numpy().T return covmat + + +################################################################# +###Algorithm to invert covmat avoiding large condition number#### +################################################################# + + +def compute_blocks_inverse(covmat, tol=1e-25): + """ + Decomposes the covmatrix into diagonal blocks and inverts them. + Parameters + ---------- + covmat: covariance matrix + tol: what is considered numerically zero in the covmat + """ + n = covmat.shape[0] + mask = jnp.abs(covmat) > tol + col_indices = jnp.arange(n) + # For each row, find the index of the furthest non-zero column. + max_col_per_row = jnp.max(jnp.where(mask, col_indices, 0), axis=1) + max_row_per_col = jnp.max(jnp.where(mask.T, col_indices, 0), axis=1) + combined_reach = jnp.maximum(max_col_per_row, max_row_per_col) + # covmat should be symmetric, however checking both row and columns + # increase safety in case same asymmetry is present + + running_max = lax.cummax(combined_reach, axis=0) + # normally running_max==combined_reach already, however interdataset + # correlation can spoil this creating larger blocks connecting elements + # that are far away in the covmat. + # In this case I raise a warning that the dataset should be reordered to + # have full optimization in the decomposition and inversion algo + if False in (running_max == combined_reach): + print(jnp.where(running_max != combined_reach)) + _logger.warning( + "In the runcard some intercorrelated datasets are not written next to each other." + + "To increase efficiency and stability in covmat inversion\n please consider reordering" + + " your runcard datasets entries." + ) + + # A block boundary exists where the furthest reach so far is <= the current index. + boundaries = jnp.where(running_max <= col_indices)[0] + sizes = np.diff(boundaries, prepend=-1).tolist() + block_cond = [] + block_inverses = [] + start = 0 + for s in sizes: + end = start + s + block = covmat[start:end, start:end] + block_cond.append(jnp.linalg.cond(block)) + block_inverses.append(np.linalg.inv(block)) + start = end + return block_diag(*block_inverses), max(block_cond) diff --git a/src/smefit/loader.py b/src/smefit/loader.py index c90087f7..46a50627 100644 --- a/src/smefit/loader.py +++ b/src/smefit/loader.py @@ -10,7 +10,7 @@ import yaml from .basis_rotation import rotate_to_fit_basis -from .covmat import construct_covmat, covmat_from_systematics +from .covmat import compute_blocks_inverse, construct_covmat, covmat_from_systematics from .log import logging _logger = logging.getLogger(__name__) @@ -51,8 +51,7 @@ def check_missing_operators(loaded_corrections, coeff_config): ) -def check_condition_number(fit_covmat, critical_cond=15.5): - condition_number = np.linalg.cond(fit_covmat) +def check_condition_number(condition_number, critical_cond=15.5): digits = round(np.log10(condition_number), 2) if digits > critical_cond: _logger.warning( @@ -68,15 +67,15 @@ def check_covmat_positivity(fit_covmat): _logger.warning(f"Covariance matrix not positive") -def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-3): - if ( - jnp.max(np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0]))) - > threshold - ): +def check_covmat_invertibility(fit_covmat, inv_covmat, threshold=1e-2): + diff = np.abs(inv_covmat @ fit_covmat - np.eye(fit_covmat.shape[0])) + max_deviation = jnp.max(diff) + if max_deviation > threshold: _logger.warning( "Failed inverting fit covariance matrix. " + "Check matrix condition number and using float64 " ) + return max_deviation class Loader: @@ -676,7 +675,6 @@ def load_datasets( lumi_exp = [] exp_name = [] th_cov = [] - Loader.commondata_path = pathlib.Path(commondata_path) Loader.theory_path = pathlib.Path(theory_path or commondata_path) @@ -774,9 +772,10 @@ def load_datasets( fit_covmat = exp_covmat # Check fit_covmat condition number - check_condition_number(fit_covmat) + # check_condition_number(fit_covmat) check_covmat_positivity(fit_covmat) - inv_cov_mat = np.linalg.inv(fit_covmat) + inv_cov_mat, cond_number = compute_blocks_inverse(fit_covmat) + check_condition_number(cond_number) check_covmat_invertibility(fit_covmat, inv_cov_mat) # Make one large datatuple containing all data, SM theory, corrections, etc. return DataTuple( From fd91dc430c72354c80ccc7705ae7ef14d4824b9f Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:00:18 +0100 Subject: [PATCH 02/10] Update src/smefit/covmat.py Co-authored-by: Luca Mantani --- src/smefit/covmat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/covmat.py b/src/smefit/covmat.py index b8750fae..4594e57a 100644 --- a/src/smefit/covmat.py +++ b/src/smefit/covmat.py @@ -13,7 +13,7 @@ import scipy.linalg as la from scipy.linalg import block_diag -from .log import logging +from smefit.log import logging INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") _logger = logging.getLogger(__name__) From 6833b10937897fee56c75af950e6c962b1740388 Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:00:32 +0100 Subject: [PATCH 03/10] Update src/smefit/loader.py Co-authored-by: Luca Mantani --- src/smefit/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/loader.py b/src/smefit/loader.py index 46a50627..28b7f098 100644 --- a/src/smefit/loader.py +++ b/src/smefit/loader.py @@ -10,7 +10,7 @@ import yaml from .basis_rotation import rotate_to_fit_basis -from .covmat import compute_blocks_inverse, construct_covmat, covmat_from_systematics +from smefit.covmat import compute_blocks_inverse, construct_covmat, covmat_from_systematics from .log import logging _logger = logging.getLogger(__name__) From 871a371bd6fe125a66d6036079c4c6886417448b Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:00:49 +0100 Subject: [PATCH 04/10] Update src/smefit/loader.py Co-authored-by: Luca Mantani --- src/smefit/loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/smefit/loader.py b/src/smefit/loader.py index 28b7f098..ea280baf 100644 --- a/src/smefit/loader.py +++ b/src/smefit/loader.py @@ -772,7 +772,6 @@ def load_datasets( fit_covmat = exp_covmat # Check fit_covmat condition number - # check_condition_number(fit_covmat) check_covmat_positivity(fit_covmat) inv_cov_mat, cond_number = compute_blocks_inverse(fit_covmat) check_condition_number(cond_number) From a341ad8abb8e8bffbe6e54d4d5f833fac57d29a7 Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:01:04 +0100 Subject: [PATCH 05/10] Update src/smefit/covmat.py Co-authored-by: Luca Mantani --- src/smefit/covmat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/covmat.py b/src/smefit/covmat.py index 4594e57a..64c38c68 100644 --- a/src/smefit/covmat.py +++ b/src/smefit/covmat.py @@ -153,6 +153,6 @@ def compute_blocks_inverse(covmat, tol=1e-25): end = start + s block = covmat[start:end, start:end] block_cond.append(jnp.linalg.cond(block)) - block_inverses.append(np.linalg.inv(block)) + block_inverses.append(jnp.linalg.inv(block)) start = end return block_diag(*block_inverses), max(block_cond) From 43dda85f62375eaf7c28d829d10f599f0bc15e38 Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:01:43 +0100 Subject: [PATCH 06/10] Update src/smefit/covmat.py Co-authored-by: Luca Mantani --- src/smefit/covmat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/covmat.py b/src/smefit/covmat.py index 64c38c68..da64a007 100644 --- a/src/smefit/covmat.py +++ b/src/smefit/covmat.py @@ -135,7 +135,7 @@ def compute_blocks_inverse(covmat, tol=1e-25): # that are far away in the covmat. # In this case I raise a warning that the dataset should be reordered to # have full optimization in the decomposition and inversion algo - if False in (running_max == combined_reach): + if jnp.any(running_max != combined_reach): print(jnp.where(running_max != combined_reach)) _logger.warning( "In the runcard some intercorrelated datasets are not written next to each other." From 1069703d644dd20bc72f5a85d7017180645707c7 Mon Sep 17 00:00:00 2001 From: Simone Tentori <42617332+SimoneTentori@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:01:54 +0100 Subject: [PATCH 07/10] Update src/smefit/covmat.py Co-authored-by: Luca Mantani --- src/smefit/covmat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/smefit/covmat.py b/src/smefit/covmat.py index da64a007..684517fe 100644 --- a/src/smefit/covmat.py +++ b/src/smefit/covmat.py @@ -136,7 +136,6 @@ def compute_blocks_inverse(covmat, tol=1e-25): # In this case I raise a warning that the dataset should be reordered to # have full optimization in the decomposition and inversion algo if jnp.any(running_max != combined_reach): - print(jnp.where(running_max != combined_reach)) _logger.warning( "In the runcard some intercorrelated datasets are not written next to each other." + "To increase efficiency and stability in covmat inversion\n please consider reordering" From a1409392f75ff1cb3e324fee57d82df5b7290dd0 Mon Sep 17 00:00:00 2001 From: Simone Tentori Date: Thu, 26 Feb 2026 15:06:47 +0100 Subject: [PATCH 08/10] Fixed precommit --- src/smefit/loader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/smefit/loader.py b/src/smefit/loader.py index ea280baf..526259c6 100644 --- a/src/smefit/loader.py +++ b/src/smefit/loader.py @@ -9,8 +9,13 @@ import scipy.linalg as la import yaml +from smefit.covmat import ( + compute_blocks_inverse, + construct_covmat, + covmat_from_systematics, +) + from .basis_rotation import rotate_to_fit_basis -from smefit.covmat import compute_blocks_inverse, construct_covmat, covmat_from_systematics from .log import logging _logger = logging.getLogger(__name__) From b841e6b6b8b23b29eddb08aa2128e88bfcb3bc37 Mon Sep 17 00:00:00 2001 From: Simone Tentori Date: Tue, 21 Apr 2026 12:45:24 +0200 Subject: [PATCH 09/10] Adding reorder of dataset --- src/smefit/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/loader.py b/src/smefit/loader.py index 526259c6..e0b3c824 100644 --- a/src/smefit/loader.py +++ b/src/smefit/loader.py @@ -668,7 +668,7 @@ def load_datasets( cutoff_scale: float, optional kinematic cutoff scale """ - + datasets.sort(key=lambda d: d["name"].lower()) exp_data = [] sm_theory = [] sys_error_t0 = [] From df2fd001af7d48920e31db872be297c34996910d Mon Sep 17 00:00:00 2001 From: Simone Tentori Date: Tue, 5 May 2026 11:26:34 +0200 Subject: [PATCH 10/10] fix ordering in RGE loading --- src/smefit/rge/rge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smefit/rge/rge.py b/src/smefit/rge/rge.py index 7b6612a7..5b9abe45 100644 --- a/src/smefit/rge/rge.py +++ b/src/smefit/rge/rge.py @@ -602,6 +602,7 @@ def load_rge_matrix( dictionary with the operators generated by the RGE, to be kept in the fit """ # Sort the coefficient list alphabetically + datasets.sort(key=lambda d: d["name"].lower()) coeff_list = sorted(coeff_list) init_scale = rge_dict.get("init_scale", 1e3) obs_scale = rge_dict.get("obs_scale", 91.1876) @@ -623,7 +624,6 @@ def load_rge_matrix( path_to_rge_mat = rge_dict.get("rg_matrix", False) if path_to_rge_mat: rge_cache = load_precomputed_rge_matrix(path_to_rge_mat, rge_settings) - # Load scales if isinstance(obs_scale, (float, int)): scales = [float(obs_scale)]