Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/smefit/analyze/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def __init__(self, report_path, result_path, report_config):
for fit in self.fits:
self.dataset_fits.append([data["name"] for data in fit.config["datasets"]])

# Get scales for each fit
self.data_scales = []
for fit in self.fits:
self.data_scales.append(fit.load_data_scales())

# Loads useful information about data
self.data_info = self._load_grouped_data_info(report_config["data_info"])
# Loads coefficients grouped with latex name
Expand Down Expand Up @@ -144,7 +149,9 @@ def _append_section(self, title, links=None, figs=None, tables=None):

def summary(self):
"""Summary Table runner."""
summary = SummaryWriter(self.fits, self.data_info, self.coeff_info)
summary = SummaryWriter(
self.fits, self.data_info, self.coeff_info, self.data_scales
)
section_title = "Summary"
coeff_tab = "coefficient_summary"
data_tab = "dataset_summary"
Expand All @@ -159,6 +166,13 @@ def summary(self):
tables=summary.fit_settings(),
)

figs_list = []
for fit in self.fits:
_logger.info(f"Plotting scales for: {fit.name}")
figs_list.append(f"scales_{fit.name}")
summary.plot_data_scales(path=f"{self.report}")
self._append_section("Scales", figs=figs_list)

def chi2(self, table=True, plot_experiment=None, plot_distribution=None):
r""":math:`\chi^2` table and plots runner.

Expand Down
137 changes: 136 additions & 1 deletion src/smefit/analyze/summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import BoundaryNorm

from ..coefficients import Coefficient
from .latex_tools import latex_packages, multicolum_table_header
Expand Down Expand Up @@ -49,13 +52,15 @@ class SummaryWriter:

"""

def __init__(self, fits, data_groups, coeff_config):
def __init__(self, fits, data_groups, coeff_config, data_scales):
self.fits = fits
self.data_info = data_groups
self.coeff_info = coeff_config
self.nfits = len(self.fits)
# Get names of datasets for each fit
self.dataset_fits = []
self.data_scales = data_scales

for fit in self.fits:
self.dataset_fits.append([data["name"] for data in fit.config["datasets"]])

Expand Down Expand Up @@ -198,3 +203,133 @@ def write_coefficients_table(self):
]
)
return L

def plot_data_scales(self, path):
# Collect scales for each dataset in each group
# Doing it for all the fits
fits_datagroup_scales = []
for fit in self.data_scales:
fit_scales = {}
for group, datasets in self.data_info.groupby(level=0):
fit_scales[group] = np.array([])
datasets = datasets.droplevel(0)
for dataset, _ in datasets.items():
# concatenate the scales for each dataset in the group
fit_scales[group] = np.concatenate(
(fit_scales[group], fit[dataset])
)
fits_datagroup_scales.append(fit_scales)

# Now we plot the scales for each fit
# We plot a heatmap with groups in the x-axis and scales on the y axis
# The color of each cell will represent the scale count
# We will have a plot for each fit
for i, fit_scales in enumerate(fits_datagroup_scales):
group_names = list(fit_scales.keys())

raw_min = min(min(scales) for _, scales in fit_scales.items())
raw_max = max(max(scales) for _, scales in fit_scales.items())
bins = np.logspace(
np.log10(raw_min),
np.log10(raw_max),
21,
)

# Round to 10 if below 300, otherwise round to 100
bins = np.where(
bins < 300, np.round(bins / 10) * 10, np.round(bins / 100) * 100
)

# Adjust the first and last bin if necessary to ensure coverage
if bins[0] > raw_min:
bins[0] = (
np.floor(raw_min / 10) * 10
if raw_min < 300
else np.floor(raw_min / 100) * 100
)
if bins[-1] < raw_max:
bins[-1] = (
np.ceil(raw_max / 10) * 10
if raw_max < 300
else np.ceil(raw_max / 100) * 100
)

order = [
r"$\bar{t}t\bar{t}t + \bar{t}t\bar{b}b$",
r"$\rm Higgs$",
r"$\rm LEP$",
r"$\bar{t}t$",
r"$\bar{t}tV$",
r"$t$",
r"$tV$",
r"$VV$",
r"$\mathrm{FCC\textnormal{-}ee\:91\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:161\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:240\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:365\:GeV}$",
]

# Create a dictionary to map order to their indices
order_index = {name: i for i, name in enumerate(order)}

# Sort group names by their order index, keeping unmatched names in original order
sorted_group_names = sorted(
group_names,
key=lambda x: order_index.get(
x, np.inf
), # Use `np.inf` for unmatched names
)

# Prepare the heatmap data
heatmap_data = []
for group in sorted_group_names:
hist, _ = np.histogram(fit_scales[group], bins=bins)
heatmap_data.append(hist)

heatmap_data = np.array(heatmap_data)

# Replace 0 values with empty strings for annotations
annot_data = np.where(heatmap_data == 0, "", heatmap_data)
# Define the bins for discrete colorbar (adjust as needed)
# Manually define the first few boundaries (0, 1, 2)
boundaries = np.array([0, 1, 2, 5])

# Append the rest of the boundaries starting from 4 and spaced by 4
boundaries = np.concatenate(
[boundaries, np.arange(10, heatmap_data.max() + 10, 10)]
)
norm = BoundaryNorm(boundaries, ncolors=256)
# Plot the heatmap
fig, ax = plt.subplots(figsize=(10, 6))
heatmap = sns.heatmap(
heatmap_data,
annot=annot_data,
fmt="",
cmap="Blues",
ax=ax,
xticklabels=[f"{int(bins[i + 1])}" for i in range(len(bins) - 1)],
yticklabels=sorted_group_names,
cbar_kws={
"ticks": boundaries,
},
norm=norm,
)

cbar = heatmap.collections[0].colorbar
cbar.set_label("\\# of Data points", fontsize=14)

# Adjust the x-tick positions to align with bin edges
xtick_positions = [i for i in range(len(bins))]
ax.set_xticks(xtick_positions) # Set tick positions
ax.set_xticklabels([f"{int(bins[i])}" for i in range(len(bins))])

ax.set_title(f"Data Scales for {self.fits[i].label}", fontsize=16)
ax.set_xlabel(
"Scales [GeV]",
fontsize=14,
)
fig.tight_layout()

# Save the heatmap
fig.savefig(f"{path}/scales_{self.fits[i].name}.pdf")
fig.savefig(f"{path}/scales_{self.fits[i].name}.png")
22 changes: 22 additions & 0 deletions src/smefit/fit_manager.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# -*- coding: utf-8 -*-
import json
import pathlib

import numpy as np
import pandas as pd
import yaml
from rich.progress import track

from smefit.loader import Loader

from .coefficients import CoefficientManager
from .compute_theory import make_predictions
from .loader import load_datasets
Expand Down Expand Up @@ -128,6 +131,25 @@ def load_datasets(self):
self.config.get("external_chi2", False),
)

def load_data_scales(self):
datasets = [data["name"] for data in self.config["datasets"]]
scales = {}
for dataset in datasets:

Loader.theory_path = pathlib.Path(self.config["theory_path"])
# dummy call just to get the scales
_, _, _, _, dataset_scales = Loader.load_theory(
dataset,
operators_to_keep={},
order="LO",
use_quad=False,
use_theory_covmat=False,
use_multiplicative_prescription=False,
)
scales[dataset] = dataset_scales

return scales

@property
def smeft_predictions(self):
"""Compute |SMEFT| predictions for each replica.
Expand Down