Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ nobackup/
fitting/results/
fitting/templates/
plots/
*.txt
*.ipynb_checkpoints/
170 changes: 170 additions & 0 deletions fitting/check_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from __future__ import annotations

import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
import uproot

# --- CONFIGURATION ---
# Make sure this points to the STABLE tag you decided on
FILENAME = "parT_results/25Nov19_stable_v14_private/2022/testsignalregion.root"
CATEGORY = "pass_bb"
LUMI = 34.7

hep.style.use("CMS")

# 1. VISUAL CONFIGURATION
# Matches image_1dc440.jpg style
color_map = {
r"$\gamma$+jets": "#1f4e5f", # Dark Teal (Main background)
r"Other": "#5e6c7a", # Slate Grey
r"$t\bar{t}$": "#7f964f", # Olive Green
r"W+$\gamma$": "#d16a4c", # Salmon/Red
r"Z+$\gamma$": "#6a3d6a", # Purple
"Data": "black",
}

# Stack order (Bottom -> Top)
stack_order = [
r"$\gamma$+jets",
r"Other",
r"$t\bar{t}$",
r"W+$\gamma$",
r"Z+$\gamma$",
]


def main():
print(f"Opening {FILENAME}...")
try:
f = uproot.open(FILENAME)
except FileNotFoundError:
print(f"Error: Could not find {FILENAME}. Check your path/tag.")
return

hists = {}
data_hist = None

# Pythonic iteration (fixes SIM118)
keys = [k for k in f if CATEGORY in k]
print(f"Found {len(keys)} histograms for category '{CATEGORY}'")

if not keys:
print("Error: No keys found. Check if the category name is correct.")
return

for key in keys:
h = f[key].to_hist()
name = key.split(";")[0] # remove ;1 suffix

# 2. ROBUST PROCESS GROUPING
# Logic to map file names to the labels in our legend
if "Run2022" in name or "Data" in name or "EGamma" in name:
proc = "Data"
elif "Zgamma" in name:
proc = r"Z+$\gamma$"
elif "TTGamma" in name or "TTbar" in name or "TTTo" in name:
proc = r"$t\bar{t}$"
elif "WGamma" in name:
proc = r"W+$\gamma$"
# This catches Gamma+Jets (often named GJ_PTG or GJet)
elif "GJet" in name or "GJ_" in name or "QCD" in name:
proc = r"$\gamma$+jets"
elif "ZJets" in name or "DYJets" in name:
proc = r"Other" # Merging Z+jets into Other based on plot size
elif "WJets" in name:
proc = r"Other" # Merging W+jets into Other
else:
proc = r"Other"

# Accumulate
if proc == "Data":
if data_hist is None:
data_hist = h
else:
data_hist += h
else:
if proc not in hists:
hists[proc] = h
else:
hists[proc] += h

# 3. BUILD STACK
# Filter out empty processes and enforce order
final_stack_names = [p for p in stack_order if p in hists]
stack_hists = [hists[p] for p in final_stack_names]
stack_colors = [color_map.get(p, "grey") for p in final_stack_names]

# 4. PLOTTING
fig, (ax, rax) = plt.subplots(
2, 1, figsize=(10, 10), gridspec_kw={"height_ratios": [3, 1]}, sharex=True
)

# Main Stack
if stack_hists:
hep.histplot(
stack_hists,
label=final_stack_names,
stack=True,
histtype="fill",
color=stack_colors,
ax=ax,
)

# Data
if data_hist:
hep.histplot(
data_hist,
label="Data",
histtype="errorbar",
color="black",
ax=ax,
markersize=10,
yerr=True,
)

# Ratio Calculation
total_mc = sum(stack_hists)
r_num = data_hist.values()
r_den = total_mc.values()
# Safe division
ratio = np.divide(r_num, r_den, out=np.ones_like(r_num), where=r_den != 0)

# Ratio Plot
centers = data_hist.axes[0].centers
rax.errorbar(centers, ratio, yerr=0, fmt="ko", markersize=4)

# 5. STYLING
ax.set_ylabel("Events / GeV")
ax.set_xlim(0, 200) # Match reference X-axis

# Legend: Reverse handles so top of stack = top of legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], title=None, loc="upper right", ncol=2, fontsize=14)

# Text Overlay (The "TXbb > 0.95" text)
ax.text(
0.95,
0.92,
r"TXbb $> 0.95$ Region, $200 < p_T < 1200$ GeV",
transform=ax.transAxes,
ha="right",
fontsize=16,
)

# Ratio Styling
rax.set_ylabel("Data / Bkg")
rax.set_ylim(0, 2.5)
rax.axhline(1, color="gray", linestyle="--")
rax.set_xlabel(r"Jet 0 $m_{sd}$ [GeV]", fontsize=18)

# CMS Label
hep.cms.label("Private Work", data=True, lumi=LUMI, year="2022", ax=ax, loc=0)

out_name = "check_zgamma_plot_styled.png"
plt.savefig(out_name)
print(f"\nPlot saved to {out_name}")


if __name__ == "__main__":
main()
78 changes: 42 additions & 36 deletions fitting/make_hists.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,35 @@
from __future__ import annotations

import argparse
import json
import os
from pathlib import Path

import hist
import json
import os
import uproot

from hbb import utils


def fill_hists(outdict, events, region, reg_cfg, obs_cfg, qq_true):

h = hist.Hist(hist.axis.Regular(obs_cfg["nbins"], obs_cfg["min"], obs_cfg["max"], name=obs_cfg["name"], label=obs_cfg["name"]))
h = hist.Hist(
hist.axis.Regular(
obs_cfg["nbins"],
obs_cfg["min"],
obs_cfg["max"],
name=obs_cfg["name"],
label=obs_cfg["name"],
)
)

bins_list = reg_cfg["bins"]
bin_pname = reg_cfg["bin_pname"]
str_bin_br = reg_cfg["branch_name"]

for _process_name, data in events.items():

#TODO add in systematics functionality
# TODO add in systematics functionality
weight_val = data["finalWeight"].astype(float)
s = "nominal"

Expand All @@ -37,17 +45,17 @@ def fill_hists(outdict, events, region, reg_cfg, obs_cfg, qq_true):
pre_selection = (obs_br > obs_cfg["min"]) & (obs_br < obs_cfg["max"])

selection_dict = {
"pass_bb": pre_selection & (Txbbxcc > 0.95) & (Txbb > Txcc),
"pass_cc": pre_selection & (Txbbxcc > 0.95) & (Txcc > Txbb),
"pass_bb": pre_selection & (Txbbxcc > 0.95) & (Txbb > Txcc),
"pass_cc": pre_selection & (Txbbxcc > 0.95) & (Txcc > Txbb),
"fail": pre_selection & (Txbbxcc <= 0.95),
"pass": pre_selection & (Txbbxcc > 0.95)
"pass": pre_selection & (Txbbxcc > 0.95),
}

cut_bb = (genf == 3)
cut_bb = genf == 3
cut_qq = (genf > 0) & (genf < 3)

for i in range(len(bins_list) - 1):
bin_cut = (bin_br > bins_list[i]) & (bin_br < bins_list[i+1]) & pre_selection
bin_cut = (bin_br > bins_list[i]) & (bin_br < bins_list[i + 1]) & pre_selection

for category, selection in selection_dict.items():
if qq_true:
Expand All @@ -57,7 +65,7 @@ def fill_hists(outdict, events, region, reg_cfg, obs_cfg, qq_true):
obs_br[selection & bin_cut & cut_qq],
weight=weight_val[selection & bin_cut & cut_qq],
)
if not name in outdict:
if name not in outdict:
outdict[name] = h.copy()
else:
outdict[name] += h.copy()
Expand All @@ -68,7 +76,7 @@ def fill_hists(outdict, events, region, reg_cfg, obs_cfg, qq_true):
obs_br[selection & bin_cut & cut_bb],
weight=weight_val[selection & bin_cut & cut_bb],
)
if not name in outdict:
if name not in outdict:
outdict[name] = h.copy()
else:
outdict[name] += h.copy()
Expand All @@ -80,20 +88,22 @@ def fill_hists(outdict, events, region, reg_cfg, obs_cfg, qq_true):
obs_br[selection & bin_cut],
weight=weight_val[selection & bin_cut],
)
if not name in outdict:
if name not in outdict:
outdict[name] = h.copy()
else:
outdict[name] += h.copy()
return outdict


def main(args):
year = args.year
tag = args.tag

path_to_dir = f"/eos/uscms/store/group/lpchbbrun3/skims/{tag}"

samples_qq = ['Wjets','Zjets','EWKW','EWKZ','EWKV']

# path_to_dir = f"/eos/uscms/store/group/lpchbbrun3/skims/{tag}"
path_to_dir = f"/eos/uscms/store/group/lpchbbrun3/gmachado/{tag}"

samples_qq = ["Wjets", "Zjets", "EWKW", "EWKZ", "EWKV"]

columns = [
"weight",
"FatJet0_pt",
Expand All @@ -110,31 +120,31 @@ def main(args):
out_path = f"results/{tag}/{year}"
output_file = f"{out_path}/signalregion.root"

if not os.path.exists(out_path):
os.makedirs(out_path)
if not Path(out_path).exists():
Path(out_path).mkdir(parents=True)

if os.path.isfile(output_file):
os.remove(output_file)
if Path(output_file).is_file():
Path(output_file).unlink()
fout = uproot.create(output_file)

# So I can remember the settings I used for each set of results produced
os.popen(f'cp setup.json {out_path}')
with open('setup.json') as f:
os.popen(f"cp setup.json {out_path}")
with Path("setup.json").open() as f:
setup = json.load(f)
cats = setup["categories"]
obs_cfg = setup["observable"]

with open('pmap_run3.json') as f:
with Path("pmap_run3.json").open() as f:
pmap = json.load(f)

filters = [
("FatJet0_pt", ">", 450),
("FatJet0_pt", "<", 1200),
("VBFPair_mjj", ">", -2),
("VBFPair_mjj", "<", 13000),
("FatJet0_pt", ">", 450),
("FatJet0_pt", "<", 1200),
("VBFPair_mjj", ">", -2),
("VBFPair_mjj", "<", 13000),
]

if not obs_cfg["branch_name"] in columns:
if obs_cfg["branch_name"] not in columns:
columns.append(obs_cfg["branch_name"])

out_hists = {}
Expand All @@ -148,7 +158,7 @@ def main(args):
{process: [dataset]},
columns=columns,
region=cfg["name"],
filters=filters
filters=filters,
)

if not events:
Expand All @@ -161,6 +171,7 @@ def main(args):

print(f"Histograms saved to {output_file}")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Make histograms for a given year.")
parser.add_argument(
Expand All @@ -170,12 +181,7 @@ def main(args):
required=True,
choices=["2022", "2022EE", "2023", "2023BPix"],
)
parser.add_argument(
"--tag",
help="tag",
type=str,
required=True
)
parser.add_argument("--tag", help="tag", type=str, required=True)
args = parser.parse_args()

main(args)
Loading