diff --git a/lstchain/io/io.py b/lstchain/io/io.py index a5d76e359a..ae1922442e 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -85,7 +85,6 @@ dl1_mon_tel_CatB_ped_key = "/dl1/monitoring/telescope/catB/pedestal" dl1_mon_tel_CatB_cal_key = "/dl1/monitoring/telescope/catB/calibration" dl1_mon_tel_CatB_flat_key = "/dl1/monitoring/telescope/catB/flatfield" - dl1_params_lstcam_key = "/dl1/event/telescope/parameters/LST_LSTCam" dl1_images_lstcam_key = "/dl1/event/telescope/image/LST_LSTCam" dl2_params_lstcam_key = "/dl2/event/telescope/parameters/LST_LSTCam" @@ -304,6 +303,7 @@ def auto_merge_h5files( file_list, output_filename="merged.h5", nodes_keys=None, + keys_to_copy=None, merge_arrays=False, filters=HDF5_ZSTD_FILTERS, progress_bar=True, @@ -319,6 +319,7 @@ def auto_merge_h5files( file_list: list of path output_filename: path nodes_keys: list of path + keys_to_copy: list of nodes that must be copied and not merged (because the same in all files) merge_arrays: bool filters progress_bar: bool @@ -336,6 +337,8 @@ def auto_merge_h5files( else: keys = set(nodes_keys) + keys_to_copy = set() if keys_to_copy is None else set(keys_to_copy).intersection(keys) + bar = tqdm(total=len(file_list), disable=not progress_bar) with open_file(output_filename, 'w', filters=filters) as merge_file: with open_file(file_list[0]) as f1: @@ -343,8 +346,21 @@ def auto_merge_h5files( bar.update(1) for filename in file_list[1:]: + common_keys = keys.intersection(get_dataset_keys(filename)) + + # do not merge specific nodes with equal data in all files + common_keys=common_keys.difference(keys_to_copy) + with open_file(filename) as file: + + # check value of Table.nrow for keys copied from the first file + for k in keys_to_copy: + first_node = merge_file.root[k] + present_node = file.root[k] + if first_node.nrows != present_node.nrows: + raise ValueError("Length of key {} from file {} different than in file {}".format(k, filename, file_list[0])) + for k in common_keys: in_node = file.root[k] out_node = merge_file.root[k] diff --git a/lstchain/scripts/lstchain_merge_hdf5_files.py b/lstchain/scripts/lstchain_merge_hdf5_files.py index 9c7adfd530..5fad0acba0 100644 --- a/lstchain/scripts/lstchain_merge_hdf5_files.py +++ b/lstchain/scripts/lstchain_merge_hdf5_files.py @@ -22,6 +22,22 @@ from lstchain.io import auto_merge_h5files from lstchain.io import get_dataset_keys +from lstchain.io.io import ( + dl1_params_tel_mon_ped_key, + dl1_params_tel_mon_cal_key, + dl1_params_tel_mon_flat_key, + dl1_mon_tel_CatB_cal_key, + dl1_mon_tel_CatB_ped_key, + dl1_mon_tel_CatB_flat_key +) + +default_keys_to_copy = [dl1_params_tel_mon_ped_key, + dl1_params_tel_mon_cal_key, + dl1_params_tel_mon_flat_key, + dl1_mon_tel_CatB_cal_key, + dl1_mon_tel_CatB_ped_key, + dl1_mon_tel_CatB_flat_key +] parser = argparse.ArgumentParser(description='Merge HDF5 files') @@ -72,6 +88,11 @@ help='Skip checks when merging files' ) +parser.add_argument( + '--keys-to-copy', + nargs="*", default=[], + help='List of duplicated keys to be copied and not to be merged' +) def main(): args = parser.parse_args() @@ -91,10 +112,13 @@ def main(): else: keys = None + keys_to_copy = default_keys_to_copy + args.keys_to_copy + auto_merge_h5files( file_list, args.output_file, nodes_keys=keys, + keys_to_copy=keys_to_copy, progress_bar=not args.no_progress, run_checks=not args.skip_checks )