From 16236b817ae3db7548888c1a8f416a5bffe8eff7 Mon Sep 17 00:00:00 2001 From: Florian Mausolf Date: Fri, 14 Nov 2025 11:16:53 +0100 Subject: [PATCH] include bkg 4 and 5 in 1D data generation --- examples/1D_example/run_gmm_example.py | 67 +++++++--------------- examples/1D_example/run_sigmoid_example.py | 67 +++++++--------------- src/gatohep/data_generation.py | 39 ++++++++++--- 3 files changed, 76 insertions(+), 97 deletions(-) diff --git a/examples/1D_example/run_gmm_example.py b/examples/1D_example/run_gmm_example.py index c9bb5a7..8327ddf 100644 --- a/examples/1D_example/run_gmm_example.py +++ b/examples/1D_example/run_gmm_example.py @@ -149,34 +149,21 @@ def main(): high=high, name="Signal", ) - hist_bkg1 = create_hist( - data["bkg1"]["NN_output"], - weights=data["bkg1"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg1", - ) - hist_bkg2 = create_hist( - data["bkg2"]["NN_output"], - weights=data["bkg2"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg2", - ) - hist_bkg3 = create_hist( - data["bkg3"]["NN_output"], - weights=data["bkg3"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg3", - ) - bkg_hists = [hist_bkg1, hist_bkg2, hist_bkg3] + bkg_processes = [f"bkg{i}" for i in range(1, 6)] + bkg_hists = [ + create_hist( + data[proc]["NN_output"], + weights=data[proc]["weight"], + bins=n_bins, + low=low, + high=high, + name=f"{proc.capitalize()}", + ) + for proc in bkg_processes + ] # plot the backgrounds: - process_labels = ["Background 1", "Background 2", "Background 3"] + process_labels = [f"Background {i}" for i in range(1, len(bkg_processes) + 1)] signal_labels = ["Signal x 100"] # For demonstration, we compare multiple binning schemes. @@ -356,25 +343,15 @@ def train_step( bins=opt_bin_edges, name="Signal_opt", ) - h_bkg1_opt = create_hist( - data["bkg1"]["NN_output"], - weights=data["bkg1"]["weight"], - bins=opt_bin_edges, - name="Bkg1_opt", - ) - h_bkg2_opt = create_hist( - data["bkg2"]["NN_output"], - weights=data["bkg2"]["weight"], - bins=opt_bin_edges, - name="Bkg2_opt", - ) - h_bkg3_opt = create_hist( - data["bkg3"]["NN_output"], - weights=data["bkg3"]["weight"], - bins=opt_bin_edges, - name="Bkg3_opt", - ) - opt_bkg_hists = [h_bkg1_opt, h_bkg2_opt, h_bkg3_opt] + opt_bkg_hists = [ + create_hist( + data[proc]["NN_output"], + weights=data[proc]["weight"], + bins=opt_bin_edges, + name=f"{proc}_opt", + ) + for proc in bkg_processes + ] # Compute significance from these optimized histograms. Z_opt = compute_significance_from_hists(h_signal_opt, opt_bkg_hists) diff --git a/examples/1D_example/run_sigmoid_example.py b/examples/1D_example/run_sigmoid_example.py index eefd7e5..b9dcd45 100644 --- a/examples/1D_example/run_sigmoid_example.py +++ b/examples/1D_example/run_sigmoid_example.py @@ -174,34 +174,21 @@ def main(): high=high, name="Signal", ) - hist_bkg1 = create_hist( - data["bkg1"]["NN_output"], - weights=data["bkg1"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg1", - ) - hist_bkg2 = create_hist( - data["bkg2"]["NN_output"], - weights=data["bkg2"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg2", - ) - hist_bkg3 = create_hist( - data["bkg3"]["NN_output"], - weights=data["bkg3"]["weight"], - bins=n_bins, - low=low, - high=high, - name="Bkg3", - ) - bkg_hists = [hist_bkg1, hist_bkg2, hist_bkg3] + bkg_processes = [f"bkg{i}" for i in range(1, 6)] + bkg_hists = [ + create_hist( + data[proc]["NN_output"], + weights=data[proc]["weight"], + bins=n_bins, + low=low, + high=high, + name=f"{proc.capitalize()}", + ) + for proc in bkg_processes + ] # plot the backgrounds: - process_labels = ["Background 1", "Background 2", "Background 3"] + process_labels = [f"Background {i}" for i in range(1, len(bkg_processes) + 1)] signal_labels = ["Signal x 100"] # For demonstration, we compare multiple binning schemes. @@ -385,25 +372,15 @@ def train_step( bins=opt_bin_edges, name="Signal_opt", ) - h_bkg1_opt = create_hist( - data["bkg1"]["NN_output"], - weights=data["bkg1"]["weight"], - bins=opt_bin_edges, - name="Bkg1_opt", - ) - h_bkg2_opt = create_hist( - data["bkg2"]["NN_output"], - weights=data["bkg2"]["weight"], - bins=opt_bin_edges, - name="Bkg2_opt", - ) - h_bkg3_opt = create_hist( - data["bkg3"]["NN_output"], - weights=data["bkg3"]["weight"], - bins=opt_bin_edges, - name="Bkg3_opt", - ) - opt_bkg_hists = [h_bkg1_opt, h_bkg2_opt, h_bkg3_opt] + opt_bkg_hists = [ + create_hist( + data[proc]["NN_output"], + weights=data[proc]["weight"], + bins=opt_bin_edges, + name=f"{proc}_opt", + ) + for proc in bkg_processes + ] # Compute significance from these optimized histograms. Z_opt = compute_significance_from_hists(h_signal_opt, opt_bkg_hists) diff --git a/src/gatohep/data_generation.py b/src/gatohep/data_generation.py index eac9019..6fc33e0 100644 --- a/src/gatohep/data_generation.py +++ b/src/gatohep/data_generation.py @@ -148,6 +148,8 @@ def generate_toy_data_1D( xs_bkg1: float = 100, xs_bkg2: float = 80, xs_bkg3: float = 50, + xs_bkg4: float = 20, + xs_bkg5: float = 10, lumi: float = 100.0, noise_scale: float = 0.3, seed: int | None = None, @@ -169,6 +171,10 @@ def generate_toy_data_1D( Cross-section for the second background component. Default is 15. xs_bkg3 : float, optional Cross-section for the third background component. Default is 10. + xs_bkg4 : float, optional + Cross-section for the fourth background component. Default is 20. + xs_bkg5 : float, optional + Cross-section for the fifth background component. Default is 10. lumi : float, optional Luminosity for scaling event weights. Default is 100. seed : int or None, optional @@ -184,32 +190,51 @@ def generate_toy_data_1D( if seed is not None: np.random.seed(seed) - tot_xs_bkg = xs_bkg1 + xs_bkg2 + xs_bkg3 + tot_xs_bkg = xs_bkg1 + xs_bkg2 + xs_bkg3 + xs_bkg4 + xs_bkg5 n_bkg1 = int(n_bkg * xs_bkg1 / tot_xs_bkg) n_bkg2 = int(n_bkg * xs_bkg2 / tot_xs_bkg) - n_bkg3 = n_bkg - (n_bkg1 + n_bkg2) + n_bkg3 = int(n_bkg * xs_bkg3 / tot_xs_bkg) + n_bkg4 = int(n_bkg * xs_bkg4 / tot_xs_bkg) + n_bkg5 = n_bkg - (n_bkg1 + n_bkg2 + n_bkg3 + n_bkg4) - counts = dict(signal=n_signal, bkg1=n_bkg1, bkg2=n_bkg2, bkg3=n_bkg3) - xs = dict(signal=xs_signal, bkg1=xs_bkg1, bkg2=xs_bkg2, bkg3=xs_bkg3) + counts = dict( + signal=n_signal, + bkg1=n_bkg1, + bkg2=n_bkg2, + bkg3=n_bkg3, + bkg4=n_bkg4, + bkg5=n_bkg5, + ) + xs = dict( + signal=xs_signal, + bkg1=xs_bkg1, + bkg2=xs_bkg2, + bkg3=xs_bkg3, + bkg4=xs_bkg4, + bkg5=xs_bkg5, + ) X = { "signal": _sample("signal1", n_signal, seed), "bkg1": _sample("bkg1", n_bkg1, seed + 1 if seed else None), "bkg2": _sample("bkg2", n_bkg2, seed + 2 if seed else None), "bkg3": _sample("bkg3", n_bkg3, seed + 3 if seed else None), + "bkg4": _sample("bkg4", n_bkg4, seed + 4 if seed else None), + "bkg5": _sample("bkg5", n_bkg5, seed + 5 if seed else None), } pdf_sig = multivariate_normal(MEANS["signal1"], COV) pdf_bkg = { - p: multivariate_normal(MEANS[p], COV) for p in ("bkg1", "bkg2", "bkg3") + p: multivariate_normal(MEANS[p], COV) + for p in ("bkg1", "bkg2", "bkg3", "bkg4", "bkg5") } - total_bkg_xs = xs_bkg1 + xs_bkg2 + xs_bkg3 + total_bkg_xs = xs_bkg1 + xs_bkg2 + xs_bkg3 + xs_bkg4 + xs_bkg5 def _pb(x): return sum((xs[p] / total_bkg_xs) * pdf_bkg[p].pdf(x) for p in pdf_bkg) data = {} - for proc in ("signal", "bkg1", "bkg2", "bkg3"): + for proc in ("signal", "bkg1", "bkg2", "bkg3", "bkg4", "bkg5"): Xp = X[proc] ps = pdf_sig.pdf(Xp) pb = _pb(Xp)