diff --git a/bin/event_mixer b/bin/event_mixer new file mode 100755 index 0000000000..90481ff325 --- /dev/null +++ b/bin/event_mixer @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +import os +import argparse +import numpy as np +import pandas as pd + +from invisible_cities.core.system_of_units import kg, dalton, year +from invisible_cities.core.configure import read_config_file +from invisible_cities.evm.mixer import Event_Mixer, _check_enough_nevents +from invisible_cities.evm.mixer import get_mixer_nevents, get_reco_and_sim_nevents + +# parse the config filename +parser = argparse.ArgumentParser(description="Run the event mixer by parsing the configuration file") +parser.add_argument("config", type=str, help="config file") +args = parser.parse_args() +config_filename = os.path.expandvars(args.config) + +# 2nubb half-life taken from EXO-200 Phys. Rev. C 89, 015502 +T12_2nubb = 2.165e+21 * year + +if __name__ == "__main__": + + conf = read_config_file(config_filename) + + detector_db = conf.get("detector_db") + inpath = conf.get("inpath") + outpath = conf.get("outpath") + isotopes = conf.get("isotopes") + xenon_mass = conf.get("xenon_mass") + enrichment = conf.get("enrichment") + exposure = conf.get("exposure") + T12_0nubb = conf.get("T12_0nubb") + verbosity = conf.get("verbosity", default=0) + ic_efficiencies = conf.get("ic_efficiencies") + nevents_per_file = conf.get("nevents_per_file") + + # compute initial number of 136Xe isotopes + assert (0. <= enrichment) & (enrichment <= 1.) + N0 = enrichment*(xenon_mass/(136. * dalton)) + + # get event df with (g4volume, isotope, nevts) + nevent_df = get_mixer_nevents(exposure, detector_db, isotopes) + + # get number of decays of signal-like events + if "0nubb" in isotopes: + if not T12_0nubb: + raise Exception("0nubb half-life (T12_0nubb) is not provided") + nevts = N0 * (np.log(2)/T12_0nubb) * exposure + nevent_df.loc[len(nevent_df)] = ("ACTIVE", "0nubb", nevts) + + if "2nubb" in isotopes: + nevts = N0 * (np.log(2)/T12_2nubb) * exposure + nevent_df.loc[len(nevent_df)] = ("ACTIVE", "2nubb", nevts) + + # load processing efficiencies + eff_df = pd.read_csv(os.path.expandvars(ic_efficiencies)) + + # add reconstruction efficiency and poissonize + nevent_df.nevts = nevent_df.nevts * (eff_df.nreco/eff_df.nsim) + nevent_df.nevts = np.random.poisson(nevent_df.nevts) + + # check input events are enough to run the mixer + _check_enough_nevents(nevent_df, eff_df) + + # run mixer + mixer = Event_Mixer(inpath, outpath, nevent_df, nevents_per_file, verbosity) + mixer.run() diff --git a/invisible_cities/config/mixer.conf b/invisible_cities/config/mixer.conf new file mode 100644 index 0000000000..0482515ecc --- /dev/null +++ b/invisible_cities/config/mixer.conf @@ -0,0 +1,58 @@ +""" +------------ +Event mixer: +------------ + +:verbosity: (optional) default 0 + verbosity level, either 0 (no verbosity) or 1 + +:detector_db: + detector database name + +:inpath: + address of the isaura input files for background/signal events. + - The files for each isotope must be (by convention) at different locations as a function of + the isotope and g4volume names. See the example path below. + - The files must be (by convention) named such that they contain a file-number placed after the + first "_" in the file. Namely, the file name must start by "name_{file_number}" + +:isotopes: + isotopes to include in the mixer. Any of 0nubb, 2nubb, 214Bi, 208Tl, 40K, 60Co. + +:ic_efficiencies: + csv file with number of reconstructed (after IC processing) and simulated events for each component. + Numbers can be computed from get_reco_and_sim_nevents function in evm/mixer.py, and saved with + pandas.DataFrame.to_csv function + +:xenon_mass: + total xenon mass of the detector + +:enrichment: + 136Xe enrichment factor (value between 0 and 1) + +:exposure: + total exposure of the production. + +:T12_0nubb: + 0nubb half-life. If 0nubb is not included in :isotopes:, this parameter is not required + +:nevents_per_file: + number of events per output file. + +:out_path: + output filename structure as a function of a file number variable, "file_number". + By convention "file_number" must be placed after a first "_". See example below. +""" +verbosity = 1 +detector_db = "next100" +inpath = "$ICDIR/database/test_data/mixer/{g4volume}/{isotope}/*_test.h5" +isotopes = ["0nubb", "2nubb"] +ic_efficiencies = "/path/to/ic_efficiencies.csv" + +xenon_mass = 100. * kg +enrichment = 0.9 +exposure = 1. * year +T12_0nubb = 1.e+27 * year + +nevents_per_file = 10 +outpath = "/tmp/mixer_{file_number}_tag.h5" diff --git a/invisible_cities/core/system_of_units.py b/invisible_cities/core/system_of_units.py index 247e208630..4946648018 100644 --- a/invisible_cities/core/system_of_units.py +++ b/invisible_cities/core/system_of_units.py @@ -204,11 +204,13 @@ milligram = 1.e-3 * gram ton = 1.e+3 * kilogram kiloton = 1.e+3 * ton +dalton = 1.660539e-27 * kilogram # symbols kg = kilogram g = gram mg = milligram +Da = dalton # # Power [E][T^-1] diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_1_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_1_demopp_test.h5 new file mode 100644 index 0000000000..3b79305069 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_1_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3776b161f4473188c4d8475213a0fa054731915d426bb34b97ba3c000abe0c64 +size 161483 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_2_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_2_demopp_test.h5 new file mode 100644 index 0000000000..cca187b2ba --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_2_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d02bb4ec9b02a8c18321269668edb36ebba37f8f7e3bebd8f2b037123d7c8232 +size 164790 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_3_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_3_demopp_test.h5 new file mode 100644 index 0000000000..3d141441e0 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_3_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a8b696647cb01cec6b4fe4aa9b1b235a3ccd9c2dbcb173b872eea07c4f0e98 +size 163716 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_4_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_4_demopp_test.h5 new file mode 100644 index 0000000000..dd5774dd5c --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_4_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62a9d261ca5e25537c2cefe2b5a6ee38c91bf2026c44cec53289994d99ae485 +size 173311 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_5_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_5_demopp_test.h5 new file mode 100644 index 0000000000..8d980387bf --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/0nubb/isaura_5_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220ea66c4f7bf1359d8e364893b9ab8052bf468e2625e7df345663cc0cc0a5cc +size 163584 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_1_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_1_demopp_test.h5 new file mode 100644 index 0000000000..97b811b8c4 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_1_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bbbfeb288f72807f3ca594d324d7c83866157bcb121592174855e4d5b41204 +size 163584 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_2_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_2_demopp_test.h5 new file mode 100644 index 0000000000..71a2234b84 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_2_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becf05042e5668f108053f117a79a262c46a580d1f2971d198bdfd85dc70e26c +size 164993 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_3_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_3_demopp_test.h5 new file mode 100644 index 0000000000..c84178274a --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_3_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87020bcbfb3ee6ccb7a492400171cd35aefb2415a92f2305b672ca2543b206a +size 160973 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_4_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_4_demopp_test.h5 new file mode 100644 index 0000000000..8acd63a967 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_4_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3ad7ce52a59dac762f2e775600d22eb8ca581eff24e4c9003bd0048fc01b78 +size 162686 diff --git a/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_5_demopp_test.h5 b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_5_demopp_test.h5 new file mode 100644 index 0000000000..b029fbf986 --- /dev/null +++ b/invisible_cities/database/test_data/mixer/ACTIVE/2nubb/isaura_5_demopp_test.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7504a8101b3ad0d8b8b4b20e4ecc22d8348c910ccdb0e73f34a26f76acf8e9 +size 166906 diff --git a/invisible_cities/evm/mixer.py b/invisible_cities/evm/mixer.py new file mode 100644 index 0000000000..e66afe3c2c --- /dev/null +++ b/invisible_cities/evm/mixer.py @@ -0,0 +1,391 @@ +import os +import glob +import warnings +import tables as tb +import pandas as pd +import numpy as np + +from ..io.dst_io import load_dst +from ..io.dst_io import df_writer + +from ..core.system_of_units import mBq +from ..database.load_db import RadioactivityData + +get_file_number = lambda filename: int(filename.split("/")[-1].split(".h5")[0].split("_")[1]) + + +class Event_Mixer(): + ''' + This class writes MC mixed isaura files. It reads the files separately for each + MC component pair (isotope, g4volume), selects the provided number of events + for each component, and writes them consecutively. + The input files and events are read randomly and therefore the output is non-deterministic. + + Parameters: + ---------- + :inpath: (str) the input path of the MC files, which must explicitly depend on the simulated component + through "g4volume" and "isotope" variables. For example: "/somepath/{g4volume}/{isotope}" + + :outpath:(str) the output path for where mixed files will be saved. + + :events_df: (pd.DataFrame) dataframe with 3 columns: G4Volume, Isotope, nevts + + :nevents_per_file: (int) the number of events to same in each mixed file. Each mixed file will + contain the fraction of events for each component based on :events_df: + ''' + + def __init__(self, inpath : str + , outpath : str + , nevents_df: pd.DataFrame + , nevents_per_file: int + , verbosity: int = 0): + ''' + Initializes the constant (fixed-value) class atributes, except for self.counter + which is modified some of the methods. + ''' + + self.verbosity = verbosity + if self.verbosity: print(">> Initializing mixer...", end="\n") + self.inpath = os.path.expandvars(inpath) + self.outpath = os.path.expandvars(outpath) + + # cast and set index + self.nevents_df = nevents_df.astype({"nevts": int}, copy=True) + self.nevents_df = self.nevents_df.set_index(["Isotope", "G4Volume"]) + self.nevents_df.to_csv(os.path.dirname(self.outpath) + "/nevents.csv") + + self.nevents_per_file = nevents_per_file + self.saved_events = 0 + + if self.verbosity: + print(" ------------------------", end="\n") + print(" Number of events to mix:", end="\n") + print(" ------------------------", end="\n") + print( self.nevents_df.__repr__(), end="\n") + print("Total:", self.nevents_df.nevts.sum(), end="\n") + + # number of output files + self.nfiles_out = \ + int(np.ceil(self.nevents_df.nevts.sum() / self.nevents_per_file)) + + # (group, node) pair of isaura file tables (ignoring Filters) + self.tables = dict(( ( "events", ("Run", "events") ) + , ( "eventMap", ("Run", "eventMap")) + , ( "dst", ("DST", "Events") ) + , ( "summary", ("Summary","Events")) + , ( "tracking", ("Tracking", "Tracks")) + , ( "hits", ("MC", "hits") ) + , ( "particles", ("MC", "particles")) + , ("sns_response", ("MC", "sns_response")) + )) + + # output file + self.out_file_number = 0 + self.h5out = tb.open_file(self.outpath.format(file_number=0), "w") + self.nevents_in_file = 0 + if self.verbosity: + print(">> Writing:", self.outpath.format(file_number=self.out_file_number), end="\n") + return + + + def run(self): + ''' + Runs the event mixer. + For each (isotope, g4volume) component: + - shufle input filenames + - read and write data until all required events are saved + Once all the output files are writen, the working dataframes are disposed. + ''' + + for isotope, g4volume in self.nevents_df.index: + + total_nevts = self.nevents_df.loc[isotope, g4volume].nevts + if total_nevts == 0: continue + filenames = sorted( glob.glob(self.inpath.format(g4volume = g4volume, isotope = isotope)) + , key = get_file_number) + + np.random.shuffle(filenames) + + written_nevents = 0 + for filename in filenames: + + self.events_ = load_dst(filename, *self.tables["events"]) + if len(self.events_) == 0: continue + self._read_data(filename, isotope, g4volume) + + # write nevts to output files + nevts = min(total_nevts-written_nevents, len(self.events_)) + self._write_data(nevts) + written_nevents += nevts + + if (written_nevents == total_nevts): break + self.h5out.close() + self._dispose_dfs() + + # check total number of events + assert (self.saved_events == self.nevents_df.nevts.sum()) + return + + + def _read_data(self, filename, isotope, g4volume): + ''' + Reads data for each table in filename: self.name = load_dst(filename, group, node) + Notice that index is set to the event number, simplifying data selection at self._select_data + Also drops timestamp and adds file_number and component (isotope, g4volume) in dataframe columns + ''' + + for key, table in self.tables.items(): + + if (key == "eventMap"): + setattr(self, key + "_", load_dst(filename, *table).set_index("evt_number")) + + elif (key in ("dst", "tracking", "summary")): + setattr(self, key + "_", load_dst(filename, *table) + .rename({"event": "evt_number"}, axis=1).set_index("evt_number")) + + elif (key in ("hits", "particles", "sns_response")): + setattr(self, key + "_", load_dst(filename, *table).set_index("event_id")) + + # drop timestamp + self.events_.drop("timestamp", axis=1, inplace=True) + self.dst_ .drop( "time", axis=1, inplace=True) + + # add file number and component info + file_number = get_file_number(filename) + for key, table in self.tables.items(): + if (key in ("events", "eventMap", "dst", "tracking", "summary")): + exec(f"self.{key}_.loc[:, 'file'] = {file_number}") + exec(f"self.{key}_.loc[:, 'Isotope'] = '{isotope}'") + exec(f"self.{key}_.loc[:, 'G4Volume'] = '{g4volume}'") + return + + + def _write_data(self, nevents): + ''' + Write nevents from current component dataframes to output files: + - randomly selects nevents from dataframes + - if current file has enough space, save the data there + - if the space is not enough, fill the current file and continue writing + in new files. + ''' + + # select nevents + self.events_ = self.events_.sample(n=nevents) + + # write data to output file + # empty space in current output file + nidle = int(self.nevents_per_file - self.nevents_in_file) + if (nidle >= nevents): + self.events = self.events_ + self._select_data() + + df_writer(self.h5out, self.events, "Run", "events", 'ZLIB4') + for key, table in self.tables.items(): + if key != "events": + exec(f"df_writer(self.h5out, self.{key}.reset_index(), *table, 'ZLIB4')") + self.nevents_in_file += nevents + + if (nidle == nevents): # open new file + self.h5out.close() + self.out_file_number += 1 + self.nevents_in_file = 0 + self.h5out = tb.open_file(self.outpath.format(file_number=self.out_file_number), "w") + if self.verbosity: + print(">> Writing:", self.outpath.format(file_number=self.out_file_number), end="\n") + + # not enough space in current output file + else: + # finish to write current outputfile + self.events = self.events_[:nidle].copy() + + self._select_data() + + df_writer(self.h5out, self.events, "Run", "events", 'ZLIB4') + for key, table in self.tables.items(): + if key != "events": + exec(f"df_writer(self.h5out, self.{key}.reset_index(), *table, 'ZLIB4')") + + # write new file(s) (the loop avoids recursive calling of self._write_data) + for fidx in np.arange(0, np.ceil((nevents-nidle)/self.nevents_per_file).astype(int)): + + init = int(nidle + self.nevents_per_file* fidx) + last = int(nidle + self.nevents_per_file*(fidx+1)) # notice that len(self.events_) == nevents + + self.events = self.events_[init:last].copy() + + # open new file + self.h5out.close() + self.out_file_number += 1 + self.nevents_in_file = 0 + self.h5out = tb.open_file(self.outpath.format(file_number=self.out_file_number), "w") + if self.verbosity: + print(">> Writing:", self.outpath.format(file_number=self.out_file_number), end="\n") + + # write in new file + self._select_data() + df_writer(self.h5out, self.events, "Run", "events", 'ZLIB4') + for key, table in self.tables.items(): + if key != "events": + exec(f"df_writer(self.h5out, self.{key}.reset_index(), *table, 'ZLIB4')") + + self.nevents_in_file += len(self.events) + return + + + def _select_data(self): + ''' + Selects the events from self.events and adds unique event-id: + self.table_name = self.table_name.loc[self.events.index.evt_number)] + (similar for MC data) + ''' + for key, table in self.tables.items(): + if (key == "events"): continue + + elif (table[0] != "MC"): + exec(f"self.{key} = self.{key}_.loc[self.events.evt_number]") + + elif (table[0] == "MC"): # notice that self.eventMap is created just before + exec(f"self.{key} = self.{key}_.loc[self.{key}_.index.intersection(self.eventMap.nexus_evt)]") + + # add unique event-id to 'event' column + indexes = ["evt_number", "G4Volume", "Isotope", "file"] + self.events.set_index(indexes, inplace=True) + + n = len(self.events) + self.events["event"] = self.saved_events + np.arange(0, n) + self.saved_events += n + + for key, table in self.tables.items(): + if (key in ("dst", "tracking", "summary")): + exec(f"self.{key}.set_index(indexes[1:], inplace=True, append=True)") + exec(f"self.{key}['event'] = self.events.event") + + self.events.reset_index(inplace=True) + return + + + def _dispose_dfs(self): + ''' + Deletes dataframe atributtes: + del self.table_name + del self.table_name_ + ''' + for key, table in self.tables.items(): + delattr(self, key) + delattr(self, key + "_") + return + + + +def get_mixer_nevents(exposure : float, detector_db : str = "next100", isotopes : list = "all"): + ''' + This function computes the number of events of each component (isotope, volume) pairs + based on the activity-assumptions provided in the database. + + Parameters: + ---------- + :exposure: exposure time + :detector_db: detector database + :isotopes: (default "all") list with the isotopes to simulate, + ignores signal-like events "0nubb" and "2nubb" + + Output: + ------- + pandas.DataFrame object with three columns: (G4Volume, Isotope, nevents), + where nevents is the number of events of each component for the given exposure + ''' + + # get activities and efficiencies from database + act, eff = RadioactivityData(detector_db) + + # if a list of isotopes is provided, warn missing and select them + if not (isotopes == "all"): + # warn about missing isotopes in the database + act_in = np.isin(isotopes, act.Isotope.unique()) + eff_in = np.isin(isotopes, eff.Isotope.unique()) + missing = ~(act_in | eff_in) + + if missing.any(): + isos = [iso for b, iso in zip(missing, isotopes) if b and (iso not in ["0nubb", "2nubb"])] + if len(isos)>0: + msg = f"Missing database isotopes: {isos}" + warnings.warn(msg) + + # select requested isotopes + act = act[act.Isotope.isin(isotopes)] + eff = eff[eff.Isotope.isin(isotopes)] + + # warn about missing components + act_uniq = act.value_counts(subset=["G4Volume", "Isotope"]).index + eff_uniq = eff.value_counts(subset=["G4Volume", "Isotope"]).index + + act_in = act_uniq.isin(eff_uniq) + if not act_in.all(): + msg = f"Components missing at Efficiency table: {str(act_uniq[act_in].to_list())}" + warnings.warn(msg) + + eff_in = eff_uniq.isin(act_uniq) + if not eff_in.all(): + msg = f"Components missing at Activity table: {str(eff_uniq[eff_in].to_list())}" + warnings.warn(msg) + + # create nevents df and return it + df = pd.merge(act, eff, on=["G4Volume", "Isotope"]) + df.loc[:, "nevts"] = (df.TotalActivity * mBq) * df.MCEfficiency * exposure + df = df.drop(columns=["TotalActivity", "MCEfficiency"]) + return df + + +def get_reco_and_sim_nevents(inpath : str, components : list)->pd.DataFrame: + ''' + This function computes the number of events of each component (isotope, volume) pairs + that were simulated (nsim) and that have been reconstructed (nreco) + + Parameters: + ---------- + :inpath: path for the input files (see mixer.conf doc) + :components: list of (G4Volume, Isotope) pairs + + Output: + ------- + pandas.DataFrame object with four columns: (G4Volume, Isotope, nreco, nsim), + where nreco is the number of reconstructed events and nsim the number of simulated events + ''' + inpath = os.path.expandvars(inpath) + reco_df = pd.DataFrame(columns=["G4Volume", "Isotope", "nreco", "nsim"]) + + for g4volume, isotope in components: + filenames = glob.glob(inpath.format(g4volume=g4volume, isotope=isotope)) + nreco = 0 + nsim = 0 + if len(filenames) == 0: + raise Exception(f"Not files found for component: {g4volume}, {isotope}") + for filename in filenames: + events = load_dst(filename, "Run", "events") + conf = load_dst(filename, "MC", "configuration").set_index("param_key") + + nreco += len(events) + nsim += int(conf.loc["saved_events", "param_value"]) + reco_df.loc[len(reco_df)] = (g4volume, isotope, nreco, nsim) + + return reco_df + + +def _check_enough_nevents(nevent_df : pd.DataFrame, eff_df : pd.DataFrame): + ''' + Function to check that the number of events in the input + data is enough to run the mixer + ''' + indexes = ["G4Volume", "Isotope"] + nevent_df = nevent_df.set_index(indexes) + eff_df = eff_df.set_index(indexes) + sel = pd.Series.ge(eff_df.nreco, nevent_df.nevts) + + if sel.all(): return # enough events + else: # not enought events + msg = "Not enough input data for: \n" + for (g4volume, isotope) in nevent_df[~sel].index: + nevts = nevent_df.loc[g4volume, isotope].nevts + nreco = eff_df.loc[g4volume, isotope].nreco + msg += f"{g4volume}, {isotope}: required {nevts} got {nreco} \n" + raise Exception(msg) diff --git a/invisible_cities/evm/mixer_test.py b/invisible_cities/evm/mixer_test.py new file mode 100644 index 0000000000..b823a73036 --- /dev/null +++ b/invisible_cities/evm/mixer_test.py @@ -0,0 +1,112 @@ +import os +import glob +import pandas as pd +import tables as tb + +from ..io.dst_io import load_dst, load_dsts + +from ..database.load_db import RadioactivityData + +from .mixer import get_file_number +from .mixer import Event_Mixer +from .mixer import get_mixer_nevents + + +def test_Event_Mixer_writes_all_tables(ICDATADIR, output_tmpdir): + ''' + Runs the event mixer class to test that the output files contain all the expected tables + ''' + + # mixer config + inpath = os.path.join(ICDATADIR, "mixer/{g4volume}/{isotope}/*_test.h5") + outpath = os.path.join(output_tmpdir, "mixer_{file_number}_test_all_tables.h5") + nevents_per_file = 10 + + # create event dataframe + df = pd.DataFrame(columns = ["G4Volume", "Isotope", "nevts"]) + df.loc[len(df)] = ["ACTIVE", "0nubb", 3] + df.loc[len(df)] = ["ACTIVE", "2nubb", 10] + df.nevts = df.nevts.astype(int) + + # run mixer + mixer = Event_Mixer(inpath, outpath, df, nevents_per_file) + mixer.run() + + filenames = sorted(glob.glob(outpath.format(file_number="*")), key=get_file_number) + for filename in filenames: + with tb.open_file(filename, "r") as h5out: + assert hasattr(h5out.root, "Run/events") + assert hasattr(h5out.root, "DST/Events") + assert hasattr(h5out.root, "Tracking/Tracks") + assert hasattr(h5out.root, "Summary/Events") + assert hasattr(h5out.root, "MC/hits") + assert hasattr(h5out.root, "MC/particles") + assert hasattr(h5out.root, "MC/sns_response") + + +def test_Event_Mixer_nevents(ICDATADIR, output_tmpdir): + ''' + Runs the event mixer class to test that the output files contain + the total number of events per file and the number of events of each component + provided as input. Also checks the unique event-id + ''' + + # mixer config + inpath = os.path.join(ICDATADIR, "mixer/{g4volume}/{isotope}/*_test.h5") + outpath = os.path.join(output_tmpdir, "mixer_{file_number}_test_nevents.h5") + nevents_per_file = 10 + + # create event dataframe + df = pd.DataFrame(columns = ["G4Volume", "Isotope", "nevts"]) + df.loc[len(df)] = ["ACTIVE", "0nubb", 3] + df.loc[len(df)] = ["ACTIVE", "2nubb", 10] + df.nevts = df.nevts.astype(int) + + # run mixer + mixer = Event_Mixer(inpath, outpath, df, nevents_per_file) + mixer.run() + + # test total events per file + filenames = sorted(glob.glob(outpath.format(file_number="*")), key=get_file_number) + nevents_per_file = min(nevents_per_file, df.nevts.sum()) + for filename in filenames[:-1]: # last file could not be full + events = load_dst(filename, "Run", "events") + assert (len(events) == nevents_per_file) + + # test total events per component + indexes = ["G4Volume", "Isotope", "evt_number", "file"] + nevents_df = mixer.nevents_df.reset_index().set_index(indexes[:2]) + + for key in ("events", "dst", "tracking", "summary"): + dst = load_dsts(filenames, *mixer.tables[key]) + nev = dst.groupby(indexes[:2]) \ + .apply(lambda df: df.set_index(indexes[2:]).index.nunique()).to_frame()\ + .rename({0:"nevts"}, axis=1) + + pd.testing.assert_frame_equal(nev, nevents_df) + + # test unique-id + for key in ("events", "dst", "tracking", "summary"): + dst = load_dsts(filenames, *mixer.tables[key]) + assert dst.event.nunique() == nevents_df.nevts.sum() + + +def test_get_mixer_nevents(): + ''' + Tests that get_mixer_nevents returns the expected number of events for + a simple user case + ''' + + detector_db = "next100" + isotopes = ["Bi214", "Co60"] + exposure = 1 # dummy + + got = get_mixer_nevents(exposure, detector_db, isotopes) + got = got.set_index(["G4Volume", "Isotope"]).nevts + + act, eff = RadioactivityData(detector_db) + act = act[act.Isotope.isin(isotopes)].set_index(["G4Volume", "Isotope"]) + eff = eff[eff.Isotope.isin(isotopes)].set_index(["G4Volume", "Isotope"]) + expected = (act.TotalActivity * eff.MCEfficiency * exposure).dropna().rename("nevts") + + pd.testing.assert_series_equal(got, expected)