Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mlpf/plotting/data_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def load_nano(fn):
"PuppiMET_phi",
"RawPFMET_pt",
"RawPFMET_phi",
"RawPuppiMET_pt",
"RawPuppiMET_phi",
"Pileup_nPU",
"Pileup_nTrueInt",
"GenVtx_z",
Expand Down
48 changes: 27 additions & 21 deletions mlpf/plotting/plot_loss_curves.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def loss_plot(epochs, train, test, margin=0.05, smoothing=False, ylabel="", titl
l0 = "Training"
l1 = "Validation"

p0 = plt.plot(epochs, train, alpha=alpha, label=l0, marker="o", ls="--")
p1 = plt.plot(epochs, test, alpha=alpha, label=l1, marker="x")
p0 = plt.plot(epochs, train, alpha=alpha, label=l0, marker="o", ls="--", lw=3, ms=10)
p1 = plt.plot(epochs, test, alpha=alpha, label=l1, marker="x", lw=3, ms=10)

if smoothing:
train_smooth = np.convolve(train[~np.isnan(train)], np.ones(5) / 5, mode="valid")
Expand All @@ -82,17 +82,23 @@ def loss_plot(epochs, train, test, margin=0.05, smoothing=False, ylabel="", titl
if last_valid_loss is not np.nan:
plt.ylim(last_valid_loss * (1.0 - margin), last_valid_loss * (1.0 + margin))

plt.legend(loc=3, frameon=False, fontsize=30)
plt.xlabel("epoch")
plt.legend(loc=3, frameon=False, fontsize=35)
plt.xlabel("Epoch")
plt.ylabel(ylabel)
mplhep.cms.label("", data=False, rlabel="Run 3 configuration")
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (14 TeV)")
return fig, ax


@click.command()
@click.option("--input-dirs", "-i", multiple=True, required=True, help="Input directories containing history/epoch_*.json files.")
@click.option("--labels", "-l", multiple=True, help="Labels for each input directory. Must be the same number as input-dirs.")
@click.option("--output-dir", "-o", required=True, type=str, help="Output directory for plots.")
@click.option(
"--input-dirs",
"-i",
multiple=True,
help="Input directories containing history/epoch_*.json files.",
default=["/mnt/work/huggingface/particleflow/cms/v2.6.0pre1/pyg-cms_20250722_101813_274478"],
)
@click.option("--labels", "-l", multiple=True, help="Labels for each input directory. Must be the same number as input-dirs.", default=["v2.6.0pre1"])
@click.option("--output-dir", "-o", type=str, help="Output directory for plots.", default="plots")
def main(input_dirs, labels, output_dir):
"""
Generates loss curve plots from training history files.
Expand Down Expand Up @@ -123,7 +129,7 @@ def main(input_dirs, labels, output_dir):

# Total loss
fig, ax = loss_plot(
history.index + 1, history["train_Total"].values, history["valid_Total"].values, margin=0.1, ylabel="Total loss", title=label
history.index + 1, history["train_Total"].values, history["valid_Total"].values, margin=0.15, ylabel="Total Loss [a.u.]", title=label
)
plt.xticks(range(1, len(history) + 1))
plt.savefig(output_path / f"{label}_loss.pdf")
Expand All @@ -136,7 +142,7 @@ def main(input_dirs, labels, output_dir):
history["train_Classification"].values * 100,
history["valid_Classification"].values * 100,
margin=0.05,
ylabel="Particle ID loss x100",
ylabel="Particle ID Loss x100 [a.u.]",
title=label,
)
plt.xticks(range(1, len(history) + 1))
Expand All @@ -149,8 +155,8 @@ def main(input_dirs, labels, output_dir):
history.index + 1,
history["train_Classification_binary"].values,
history["valid_Classification_binary"].values,
margin=0.1,
ylabel="Binary classification loss",
margin=0.15,
ylabel="Binary Classification Loss [a.u.]",
title=label,
)
plt.xticks(range(1, len(history) + 1))
Expand All @@ -162,7 +168,7 @@ def main(input_dirs, labels, output_dir):
if all([f"train_{loss}" in history.columns for loss in reg_losses]):
reg_loss = sum([history[f"train_{loss}"].values for loss in reg_losses])
val_reg_loss = sum([history[f"valid_{loss}"].values for loss in reg_losses])
fig, ax = loss_plot(history.index + 1, reg_loss, val_reg_loss, margin=0.2, ylabel="Regression loss", title=label)
fig, ax = loss_plot(history.index + 1, reg_loss, val_reg_loss, margin=0.2, ylabel="Regression Loss [a.u.]", title=label)
plt.xticks(range(1, len(history) + 1))
plt.savefig(output_path / f"{label}_reg_loss.pdf")
plt.close(fig)
Expand All @@ -174,7 +180,7 @@ def main(input_dirs, labels, output_dir):
history["train_ispu"].values,
history["valid_ispu"].values,
margin=0.5,
ylabel="PU classification loss",
ylabel="PU Classification Loss [a.u.]",
title=label,
)
plt.xticks(range(1, len(history) + 1))
Expand All @@ -186,10 +192,10 @@ def main(input_dirs, labels, output_dir):
# fig.suptitle(label, fontsize=35)

loss_info = [
{"name": "Total", "label": "Total Loss"},
{"name": "Classification", "label": "PID Classification Loss"},
{"name": "Classification_binary", "label": "Binary Classification Loss"},
{"name": "Regression", "label": "Regression Loss"},
{"name": "Total", "label": "Total Loss [a.u.]"},
{"name": "Classification", "label": "PID Classification Loss [a.u.]"},
{"name": "Classification_binary", "label": "Binary Classification Loss [a.u.]"},
{"name": "Regression", "label": "Regression Loss [a.u.]"},
]

for i, (ax, info) in enumerate(zip(axs, loss_info)):
Expand Down Expand Up @@ -238,10 +244,10 @@ def main(input_dirs, labels, output_dir):
continue
label = labels[i] if labels else Path(input_dirs[i]).name
plt.plot(history.index + 1, history["valid_Total"], marker="o", label=label)
plt.ylabel("Total valid. loss")
plt.xlabel("epoch")
plt.ylabel("Total Valid. Loss [a.u.]")
plt.xlabel("Epoch")
plt.legend(loc="best")
mplhep.cms.label("", data=False, rlabel="Run 3 configuration")
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (14 TeV)")
plt.savefig(output_path / "loss_comparison.pdf")
plt.close()

Expand Down
26 changes: 13 additions & 13 deletions mlpf/plotting/plot_met_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ def plot_met_distribution(
h2 = to_bh(data_mlpf["PuppiMET_pt"], bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", ls="--", color=gen_color)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen", ls="--", color=gen_color)
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", ls=pf_linestyle, color=pf_color)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle, color=mlpf_color)

if logy:
plt.yscale("log")
a0.set_ylim(bottom=1, top=a0.get_ylim()[1] * 1000)

mplhep.cms.label("", data=False, com=tev, year="Run 3", ax=a0)
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=a0)
a0.text(
sample_label_coords[0],
sample_label_coords[1],
Expand Down Expand Up @@ -127,7 +127,7 @@ def met_response_plot(
ax = plt.axes()
b = np.linspace(0, 5, 101)

mplhep.cms.label("", data=False, com=tev, year="Run 3", ax=ax)
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)
ax.text(
sample_label_coords[0],
sample_label_coords[1],
Expand Down Expand Up @@ -325,7 +325,7 @@ def plot_met_response_vs_pu(resp_pf, resp_mlpf, data_pf, data_mlpf, output_dir,
ax.set_xlabel("True $N_{PV}$")
ax.set_ylabel("MET response resolution (IQR/med.)")
ax.set_ylim(0, 1.5)
mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)
ax.text(
kwargs["sample_label_coords"][0],
kwargs["sample_label_coords"][1],
Expand Down Expand Up @@ -368,16 +368,16 @@ def make_plots(input_pf_parquet, input_mlpf_parquet, output_dir, sample_name, te

# plotting style variables
legend_fontsize = 30
legend_loc = (0.5, 0.55)
legend_loc = (0.5, 0.5)
legend_loc_scalereso = (0.50, 0.65)
legend_loc_met_response = (0.3, 0.45)
sample_label_fontsize = 30
sample_label_fontsize = 35
addtext_fontsize = 25
jet_label_coords_single = 0.02, 0.88
sample_label_coords = 0.02, 0.96
gen_color = "#648df4"
pf_color = "#f3a041"
mlpf_color = "#d23b3d"
jet_label_coords_single = 0.05, 0.88
sample_label_coords = 0.05, 0.95
gen_color = "#9C9CA1"
pf_color = "#5790FC"
mlpf_color = "#E42536"
pf_linestyle = "-."
mlpf_linestyle = "-"
pf_label = "PF-PUPPI"
Expand Down Expand Up @@ -511,7 +511,7 @@ def varbins(*args):
ax.set_xscale("log")
ax.set_ylim(0.0, 4.0)
plt.axhline(1.0, color="black", ls="--")
mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)
ax.text(
sample_label_coords[0], sample_label_coords[1], plot_sample_name, transform=ax.transAxes, fontsize=sample_label_fontsize, ha="left", va="top"
)
Expand All @@ -533,7 +533,7 @@ def varbins(*args):
ax.legend(fontsize=legend_fontsize, loc=legend_loc_scalereso)
ax.set_xscale("log")
ax.set_ylim(0.0, 2.0)
mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")
mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)
ax.text(
sample_label_coords[0], sample_label_coords[1], plot_sample_name, transform=ax.transAxes, fontsize=sample_label_fontsize, ha="left", va="top"
)
Expand Down
20 changes: 10 additions & 10 deletions mlpf/plotting/plot_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ def make_plots(input_pf_parquet, input_mlpf_parquet, corrections_file, output_di
legend_loc_effpur = (0.5, 0.68)
legend_loc_scalereso = (0.40, 0.50)
legend_loc_jet_response = (0.05, 0.55)
sample_label_fontsize = 30
sample_label_fontsize = 35
addtext_fontsize = 25
jet_label_coords = 0.02, 0.86
jet_label_coords_single = 0.02, 0.88
sample_label_coords = 0.02, 0.96
gen_color = "#648df4"
pf_color = "#f3a041"
mlpf_color = "#d23b3d"
jet_label_coords = 0.05, 0.85
jet_label_coords_single = 0.05, 0.87
sample_label_coords = 0.05, 0.95
gen_color = "#9C9CA1"
pf_color = "#5790FC"
mlpf_color = "#E42536"
gen_linestyle = "--"
pf_linestyle = "-."
mlpf_linestyle = "-"
Expand Down Expand Up @@ -184,9 +184,9 @@ def plot_kinematic_distribution(
h2 = to_bh(awkward.flatten(data_mlpf[f"{jet_prefix}_{variable_reco}"][mlpf_jet_mask]), bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", ls=gen_linestyle)
x1 = mplhep.histplot(h1, histtype="step", lw=2, label=pf_label, ls=pf_linestyle)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen", ls=gen_linestyle, color=gen_color)
x1 = mplhep.histplot(h1, histtype="step", lw=2, label=pf_label, ls=pf_linestyle, color=pf_color)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle, color=mlpf_color)

if logy:
plt.yscale("log")
Expand Down
39 changes: 15 additions & 24 deletions notebooks/cms/cms-validate-root-vs-postprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,21 @@
"outputs": [],
"source": [
"matplotlib.rcParams['axes.labelsize'] = 35\n",
"legend_fontsize = 30\n",
"legend_fontsize = 35\n",
"sample_label_fontsize = 30\n",
"addtext_fontsize = 25\n",
"addtext_fontsize = 30\n",
"\n",
"jet_label_coords = 0.02, 0.82\n",
"jet_label_coords_single = 0.02, 0.86\n",
"sample_label_coords = 0.02, 0.96\n",
"jet_label_coords = 0.05, 0.80\n",
"jet_label_coords_single = 0.05, 0.86\n",
"sample_label_coords = 0.05, 0.95\n",
"jet_label_ak4 = \"AK4 ref. jets, $p_T$ > 3 GeV\"\n",
"particle_label = \"$p_T$ > 0.5 GeV, $|\\eta|$ < 5\"\n",
"legend_loc = (0.54, 0.48)\n",
"\n",
"default_cycler = plt.rcParams['axes.prop_cycle']\n",
"\n",
"pythia_color = \"#648df4\"\n",
"target_color = \"#944b8a\"\n",
"pythia_color = \"#9C9CA1\"\n",
"target_color = \"#964A8B\"\n",
"\n",
"pythia_linestyle = \"--\"\n",
"target_linestyle = \"-\""
Expand Down Expand Up @@ -104,7 +105,7 @@
"outputs": [],
"source": [
"#files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/validation_plots/out*.pkl\")]\n",
"files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/mnt/work/particleflow/20250508_cmssw_15_0_5_d3c6d1/validation_plots/out*.pkl\")]\n",
"files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/mnt/work/particleflow/CMSSW_15_0_5_mlpf_v2.6.0pre1_puppi_2372e2/validation_plots/out*.pkl\")]\n",
"ret = reduce(add_results, files, {})\n",
"\n",
"sample_keys = sorted(set([\"/\".join(k.split(\"/\")[0:2]) for k in ret.keys() if not k.startswith(\"combined\")]))\n",
Expand Down Expand Up @@ -139,7 +140,7 @@
"}\n",
"\n",
"def sample_label(ax, sample, additional_text=\"\"):\n",
" plt.text(sample_label_coords[0], sample_label_coords[1], EVALUATION_DATASET_NAMES[sample_labels[sample.replace(\"combined/\", \"\")]] + \"\\n\" + additional_text, ha=\"left\", va=\"top\", transform=ax.transAxes)"
" plt.text(sample_label_coords[0], sample_label_coords[1], EVALUATION_DATASET_NAMES[sample_labels[sample.replace(\"combined/\", \"\")]] + \"\\n\" + additional_text, ha=\"left\", va=\"top\", transform=ax.transAxes, fontsize=sample_label_fontsize)"
]
},
{
Expand Down Expand Up @@ -167,16 +168,6 @@
" plt.savefig(\"{}_particle_met_2d.pdf\".format(sample.replace(\"/\", \"_\")), bbox_inches=\"tight\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44a895d1-f5f9-4f09-8134-126c34da6422",
"metadata": {},
"outputs": [],
"source": [
"sample_keys_combined"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -195,7 +186,7 @@
" mplhep.histplot(h1, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",
" plt.xscale(\"log\")\n",
" plt.yscale(\"log\")\n",
" plt.legend(loc=(0.65, 0.7), fontsize=legend_fontsize)\n",
" plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",
" plt.ylim(1, 1e8)\n",
" mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",
" sample_label(a0, sample)\n",
Expand Down Expand Up @@ -272,13 +263,13 @@
" mplhep.histplot(h0, label=\"Pythia\", lw=2, color=pythia_color, ls=pythia_linestyle)\n",
" mplhep.histplot(h1, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",
" plt.xscale(\"log\")\n",
" plt.legend(fontsize=legend_fontsize)\n",
" plt.legend(fontsize=legend_fontsize, loc=legend_loc)\n",
" mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",
" sample_label(a0, sample)\n",
" a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)\n",
" plt.yscale(\"log\")\n",
" plt.ylabel(\"Count\")\n",
" plt.ylim(1,1e8)\n",
" plt.ylim(100,1e8)\n",
"\n",
" plt.sca(a1)\n",
" \n",
Expand Down Expand Up @@ -311,7 +302,7 @@
" plt.figure()\n",
" ax = plt.axes()\n",
" mplhep.histplot(ret[f\"{sample}/jets_pt_ratio_target_pumask\"][bh.rebin(rebin)], yerr=False, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",
" plt.legend(fontsize=legend_fontsize)\n",
" plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",
" mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",
" sample_label(ax, sample, jet_label_ak4)\n",
" a0.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)\n",
Expand All @@ -336,7 +327,7 @@
" plt.sca(a0)\n",
" mplhep.histplot(ret[f\"{sample}/met_pythia\"][bh.rebin(rebin)], yerr=False, label=\"Pythia\", lw=2, color=pythia_color, ls=pythia_linestyle)\n",
" mplhep.histplot(ret[f\"{sample}/met_target_pumask\"][bh.rebin(rebin)], yerr=False, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",
" plt.legend(loc=(0.65, 0.7), fontsize=legend_fontsize)\n",
" plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",
" plt.yscale(\"log\")\n",
" plt.xscale(\"log\")\n",
" mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",
Expand Down
Loading
Loading