Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 22 additions & 45 deletions examples/1D_example/run_gmm_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,34 +149,21 @@ def main():
high=high,
name="Signal",
)
hist_bkg1 = create_hist(
data["bkg1"]["NN_output"],
weights=data["bkg1"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg1",
)
hist_bkg2 = create_hist(
data["bkg2"]["NN_output"],
weights=data["bkg2"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg2",
)
hist_bkg3 = create_hist(
data["bkg3"]["NN_output"],
weights=data["bkg3"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg3",
)
bkg_hists = [hist_bkg1, hist_bkg2, hist_bkg3]
bkg_processes = [f"bkg{i}" for i in range(1, 6)]
bkg_hists = [
create_hist(
data[proc]["NN_output"],
weights=data[proc]["weight"],
bins=n_bins,
low=low,
high=high,
name=f"{proc.capitalize()}",
)
for proc in bkg_processes
]

# plot the backgrounds:
process_labels = ["Background 1", "Background 2", "Background 3"]
process_labels = [f"Background {i}" for i in range(1, len(bkg_processes) + 1)]
signal_labels = ["Signal x 100"]

# For demonstration, we compare multiple binning schemes.
Expand Down Expand Up @@ -356,25 +343,15 @@ def train_step(
bins=opt_bin_edges,
name="Signal_opt",
)
h_bkg1_opt = create_hist(
data["bkg1"]["NN_output"],
weights=data["bkg1"]["weight"],
bins=opt_bin_edges,
name="Bkg1_opt",
)
h_bkg2_opt = create_hist(
data["bkg2"]["NN_output"],
weights=data["bkg2"]["weight"],
bins=opt_bin_edges,
name="Bkg2_opt",
)
h_bkg3_opt = create_hist(
data["bkg3"]["NN_output"],
weights=data["bkg3"]["weight"],
bins=opt_bin_edges,
name="Bkg3_opt",
)
opt_bkg_hists = [h_bkg1_opt, h_bkg2_opt, h_bkg3_opt]
opt_bkg_hists = [
create_hist(
data[proc]["NN_output"],
weights=data[proc]["weight"],
bins=opt_bin_edges,
name=f"{proc}_opt",
)
for proc in bkg_processes
]

# Compute significance from these optimized histograms.
Z_opt = compute_significance_from_hists(h_signal_opt, opt_bkg_hists)
Expand Down
67 changes: 22 additions & 45 deletions examples/1D_example/run_sigmoid_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,34 +174,21 @@ def main():
high=high,
name="Signal",
)
hist_bkg1 = create_hist(
data["bkg1"]["NN_output"],
weights=data["bkg1"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg1",
)
hist_bkg2 = create_hist(
data["bkg2"]["NN_output"],
weights=data["bkg2"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg2",
)
hist_bkg3 = create_hist(
data["bkg3"]["NN_output"],
weights=data["bkg3"]["weight"],
bins=n_bins,
low=low,
high=high,
name="Bkg3",
)
bkg_hists = [hist_bkg1, hist_bkg2, hist_bkg3]
bkg_processes = [f"bkg{i}" for i in range(1, 6)]
bkg_hists = [
create_hist(
data[proc]["NN_output"],
weights=data[proc]["weight"],
bins=n_bins,
low=low,
high=high,
name=f"{proc.capitalize()}",
)
for proc in bkg_processes
]

# plot the backgrounds:
process_labels = ["Background 1", "Background 2", "Background 3"]
process_labels = [f"Background {i}" for i in range(1, len(bkg_processes) + 1)]
signal_labels = ["Signal x 100"]

# For demonstration, we compare multiple binning schemes.
Expand Down Expand Up @@ -385,25 +372,15 @@ def train_step(
bins=opt_bin_edges,
name="Signal_opt",
)
h_bkg1_opt = create_hist(
data["bkg1"]["NN_output"],
weights=data["bkg1"]["weight"],
bins=opt_bin_edges,
name="Bkg1_opt",
)
h_bkg2_opt = create_hist(
data["bkg2"]["NN_output"],
weights=data["bkg2"]["weight"],
bins=opt_bin_edges,
name="Bkg2_opt",
)
h_bkg3_opt = create_hist(
data["bkg3"]["NN_output"],
weights=data["bkg3"]["weight"],
bins=opt_bin_edges,
name="Bkg3_opt",
)
opt_bkg_hists = [h_bkg1_opt, h_bkg2_opt, h_bkg3_opt]
opt_bkg_hists = [
create_hist(
data[proc]["NN_output"],
weights=data[proc]["weight"],
bins=opt_bin_edges,
name=f"{proc}_opt",
)
for proc in bkg_processes
]

# Compute significance from these optimized histograms.
Z_opt = compute_significance_from_hists(h_signal_opt, opt_bkg_hists)
Expand Down
39 changes: 32 additions & 7 deletions src/gatohep/data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def generate_toy_data_1D(
xs_bkg1: float = 100,
xs_bkg2: float = 80,
xs_bkg3: float = 50,
xs_bkg4: float = 20,
xs_bkg5: float = 10,
lumi: float = 100.0,
noise_scale: float = 0.3,
seed: int | None = None,
Expand All @@ -169,6 +171,10 @@ def generate_toy_data_1D(
Cross-section for the second background component. Default is 15.
xs_bkg3 : float, optional
Cross-section for the third background component. Default is 10.
xs_bkg4 : float, optional
Cross-section for the fourth background component. Default is 20.
xs_bkg5 : float, optional
Cross-section for the fifth background component. Default is 10.
lumi : float, optional
Luminosity for scaling event weights. Default is 100.
seed : int or None, optional
Expand All @@ -184,32 +190,51 @@ def generate_toy_data_1D(
if seed is not None:
np.random.seed(seed)

tot_xs_bkg = xs_bkg1 + xs_bkg2 + xs_bkg3
tot_xs_bkg = xs_bkg1 + xs_bkg2 + xs_bkg3 + xs_bkg4 + xs_bkg5
n_bkg1 = int(n_bkg * xs_bkg1 / tot_xs_bkg)
n_bkg2 = int(n_bkg * xs_bkg2 / tot_xs_bkg)
n_bkg3 = n_bkg - (n_bkg1 + n_bkg2)
n_bkg3 = int(n_bkg * xs_bkg3 / tot_xs_bkg)
n_bkg4 = int(n_bkg * xs_bkg4 / tot_xs_bkg)
n_bkg5 = n_bkg - (n_bkg1 + n_bkg2 + n_bkg3 + n_bkg4)

counts = dict(signal=n_signal, bkg1=n_bkg1, bkg2=n_bkg2, bkg3=n_bkg3)
xs = dict(signal=xs_signal, bkg1=xs_bkg1, bkg2=xs_bkg2, bkg3=xs_bkg3)
counts = dict(
signal=n_signal,
bkg1=n_bkg1,
bkg2=n_bkg2,
bkg3=n_bkg3,
bkg4=n_bkg4,
bkg5=n_bkg5,
)
xs = dict(
signal=xs_signal,
bkg1=xs_bkg1,
bkg2=xs_bkg2,
bkg3=xs_bkg3,
bkg4=xs_bkg4,
bkg5=xs_bkg5,
)

X = {
"signal": _sample("signal1", n_signal, seed),
"bkg1": _sample("bkg1", n_bkg1, seed + 1 if seed else None),
"bkg2": _sample("bkg2", n_bkg2, seed + 2 if seed else None),
"bkg3": _sample("bkg3", n_bkg3, seed + 3 if seed else None),
"bkg4": _sample("bkg4", n_bkg4, seed + 4 if seed else None),
"bkg5": _sample("bkg5", n_bkg5, seed + 5 if seed else None),
}

pdf_sig = multivariate_normal(MEANS["signal1"], COV)
pdf_bkg = {
p: multivariate_normal(MEANS[p], COV) for p in ("bkg1", "bkg2", "bkg3")
p: multivariate_normal(MEANS[p], COV)
for p in ("bkg1", "bkg2", "bkg3", "bkg4", "bkg5")
}
total_bkg_xs = xs_bkg1 + xs_bkg2 + xs_bkg3
total_bkg_xs = xs_bkg1 + xs_bkg2 + xs_bkg3 + xs_bkg4 + xs_bkg5

def _pb(x):
return sum((xs[p] / total_bkg_xs) * pdf_bkg[p].pdf(x) for p in pdf_bkg)

data = {}
for proc in ("signal", "bkg1", "bkg2", "bkg3"):
for proc in ("signal", "bkg1", "bkg2", "bkg3", "bkg4", "bkg5"):
Xp = X[proc]
ps = pdf_sig.pdf(Xp)
pb = _pb(Xp)
Expand Down