diff --git a/plugins/decl_hdf5/README.md b/plugins/decl_hdf5/README.md index 48b76044..1bfaf49d 100644 --- a/plugins/decl_hdf5/README.md +++ b/plugins/decl_hdf5/README.md @@ -42,7 +42,7 @@ The possible values for the keys are as follow: can be replaced inside the `DATA_SECTION`. * `datasets`: a key-value map associating a PDI type to string keys. Each string is the name of a dataset to create in the file on first - access, with the type described in the value. The string key can also be + access, with the type described in the value. The string key is a regular expression (regex), and be used to define "generic keys", that can be used in `DATA_IO_DESC` for the keyword dataset. * `collision_policy`: a string identifying a \ref COLLISION_POLICY @@ -114,7 +114,8 @@ The possible values for the keys are as follow: It defaults to selecting the whole data. * `dataset_selection`: a `SELECTION_DESC` specifying the selection of data in the file data to write or read. - This is only valid if the dataset is defined in the datasets. + This is only valid if the dataset is explicitly defined in the `datasets` + section. * `attributes`: a key-value map specifying the set of attributes to read from (respectively, write to) the file when the associated dataset is read (respectively, written). diff --git a/plugins/decl_hdf5/dataset_op.cxx b/plugins/decl_hdf5/dataset_op.cxx index 983e961e..f5105f97 100644 --- a/plugins/decl_hdf5/dataset_op.cxx +++ b/plugins/decl_hdf5/dataset_op.cxx @@ -29,7 +29,6 @@ #endif #include -#include #include #include #include @@ -42,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +70,8 @@ using PDI::Type_error; using PDI::Value_error; using std::dynamic_pointer_cast; using std::function; +using std::pair; +using std::regex; using std::string; using std::stringstream; using std::tie; @@ -212,7 +214,7 @@ void Dataset_op::fletcher(Context& ctx, Expression value) } } -void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map& dsets) +void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map>& dsets) { Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose); #ifdef H5_HAVE_PARALLEL @@ -346,7 +348,7 @@ hid_t Dataset_op::dataset_creation_plist(Context& ctx, const Datatype* dataset_t return dset_plist; } -void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map& dsets) +void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map>& dsets) { string dataset_name = m_dataset.to_string(ctx); ctx.logger().trace("Preparing for writing `{}' dataset", dataset_name); @@ -364,65 +366,62 @@ void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const un Datatype_sptr dataset_type; Raii_hid h5_file_type, h5_file_space; - int counter_dataset_found = 0; + bool bool_dataset_found = false; + pair< string, pair > dset_found; ctx.logger().trace("search `{}' in the list of datasets section", dataset_name); - for (auto&& dsets_elem: dsets) { - // create regex from string - std::regex dsets_elem_regex(dsets_elem.first); - // try if dataset_name is including in regex - if (std::regex_match(dataset_name, dsets_elem_regex)) { - counter_dataset_found++; - ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem.first); - } - } - - ctx.logger().trace("Found `{}' match(s) in the list of datasets section for `{}'", counter_dataset_found, dataset_name); - - if (counter_dataset_found > 1) { - // if we found two or more element in the list of datasets, we can't choose the right dataset (if the elements found have different size, subsize, type, ...) - // send a error a message to the user - std::stringstream msg_dataset_found; - msg_dataset_found << "\nThe elements that match " << dataset_name << " are:" << std::endl; - for (auto&& dsets_elem: dsets) { - // create regex from string - std::regex dsets_elem_regex(dsets_elem.first); - // try if dataset_name is including in regex - if (std::regex_match(dataset_name, dsets_elem_regex)) { - msg_dataset_found << " - " << dsets_elem.first << std::endl; + for (auto&& dsets_elem = dsets.begin(); dsets_elem != dsets.end(); ++dsets_elem) { + if (std::regex_match(dataset_name, dsets_elem->second.first)) { + if (!bool_dataset_found) { + bool_dataset_found = true; + ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem->first); + dset_found = *dsets_elem; + } else { + // if we found an other element in the list of datasets, we can't choose the right dataset + // (if the elements found have different size, subsize, type, ...) + // send a error a message to the user + std::list list_dataset_found; + list_dataset_found.emplace_back(dset_found.first); + list_dataset_found.emplace_back(dsets_elem->first); + + ++dsets_elem; // get the next element in the iterator on dsets + // loop over the rest of the elements in the iterator on dsets + for (dsets_elem; dsets_elem != dsets.end(); ++dsets_elem) { + if (std::regex_match(dataset_name, dsets_elem->second.first)) { + list_dataset_found.emplace_back(dsets_elem->first); + } + } + list_dataset_found.sort(); // sort the list of dataset + + std::string msg_dataset_found = fmt::format( + "\nThe elements that match {} are:\n - {}\nAttention: The elements are considered as a regex.", + dataset_name, + fmt::join(list_dataset_found, "\n - ") + ); + + throw Config_error{ + m_dataset_selection.selection_tree(), + "Found `{}' match(s) in the list of datasets section for `{}'. Cannot choose the right element in datasets.{}", + list_dataset_found.size(), + dataset_name, + msg_dataset_found + }; } } - msg_dataset_found << "Attention: The elements are considered as a regex."; - - throw Config_error{ - m_dataset_selection.selection_tree(), - "Found `{}' match(s) in the list of datasets section for `{}'. Cannot choose the right element in datasets.{}", - counter_dataset_found, - dataset_name, - msg_dataset_found.str() - }; } - if (counter_dataset_found == 1) { - for (auto&& dataset_type_iter_regex = dsets.begin(); dataset_type_iter_regex != dsets.end(); ++dataset_type_iter_regex) { - std::regex dsets_elem_regex(dataset_type_iter_regex->first); - if (std::regex_match(dataset_name, dsets_elem_regex)) { - // we found the dataset - ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dataset_type_iter_regex->first); - dataset_type = dataset_type_iter_regex->second->evaluate(ctx); - tie(h5_file_space, h5_file_type) = space(dataset_type); - ctx.logger().trace("Applying `{}' dataset selection", dataset_name); - m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space); - break; // stop the "for" loop - } - } + if (bool_dataset_found) { + ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dset_found.first); + dataset_type = dset_found.second.second->evaluate(ctx); + tie(h5_file_space, h5_file_type) = space(dataset_type); + ctx.logger().trace("Applying `{}' dataset selection", dataset_name); + m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space); } else { if (!m_dataset_selection.size().empty()) { - throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid in implicit dataset `{}'", dataset_name}; - } else { - dataset_type = ref.type(); - tie(h5_file_space, h5_file_type) = space(dataset_type, true); + throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid for implicit dataset `{}'", dataset_name}; } + dataset_type = ref.type(); + tie(h5_file_space, h5_file_type) = space(dataset_type, true); } ctx.logger().trace("Validating `{}' dataset dataspaces selection", dataset_name); diff --git a/plugins/decl_hdf5/dataset_op.h b/plugins/decl_hdf5/dataset_op.h index aaa63bec..fed57bf5 100644 --- a/plugins/decl_hdf5/dataset_op.h +++ b/plugins/decl_hdf5/dataset_op.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA) * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * @@ -32,6 +32,7 @@ #include #endif +#include #include #include @@ -188,12 +189,22 @@ class Dataset_op * \param use_mpio whether the hdf5 read/write is parallel * \param dsets the type of the explicitly typed datasets */ - void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::unordered_map& dsets); + void execute( + PDI::Context& ctx, + hid_t h5_file, + bool use_mpio, + const std::unordered_map>& dsets + ); private: void do_read(PDI::Context& ctx, hid_t h5_file, hid_t read_lst); - void do_write(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::unordered_map& dsets); + void do_write( + PDI::Context& ctx, + hid_t h5_file, + hid_t xfer_lst, + const std::unordered_map>& dsets + ); }; } // namespace decl_hdf5 diff --git a/plugins/decl_hdf5/file_op.cxx b/plugins/decl_hdf5/file_op.cxx index 61f29682..66912f7b 100644 --- a/plugins/decl_hdf5/file_op.cxx +++ b/plugins/decl_hdf5/file_op.cxx @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA) * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * @@ -89,7 +89,12 @@ vector File_op::parse(Context& ctx, PC_tree_t tree) #endif } else if (key == "datasets") { each(value, [&](PC_tree_t dset_name, PC_tree_t dset_type) { - template_op.m_datasets.emplace(to_string(dset_name), ctx.datatype(dset_type)); + std::string dset_name_string = to_string(dset_name); + std::regex dset_regex(dset_name_string); + template_op.m_datasets.emplace( + dset_name_string, + std::pair(dset_regex, ctx.datatype(dset_type)) + ); }); } else if (key == "deflate") { deflate = value; @@ -230,7 +235,7 @@ File_op::File_op(const File_op& other) , m_dset_size_ops{other.m_dset_size_ops} { for (auto&& dataset: other.m_datasets) { - m_datasets.emplace(dataset.first, dataset.second); + m_datasets.emplace(dataset.first, std::pair(dataset.second.first, dataset.second.second)); } } diff --git a/plugins/decl_hdf5/file_op.h b/plugins/decl_hdf5/file_op.h index ab2126b1..2006be1c 100644 --- a/plugins/decl_hdf5/file_op.h +++ b/plugins/decl_hdf5/file_op.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (C) 2015-2021 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA) * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * @@ -32,6 +32,7 @@ #include #endif +#include #include #include #include @@ -67,7 +68,7 @@ class File_op #endif /// type of the datasets for which an explicit type is specified - std::unordered_map m_datasets; + std::unordered_map> m_datasets; /// the dataset operations std::vector m_dset_ops; diff --git a/plugins/decl_hdf5/tests/decl_hdf5_tests.cxx b/plugins/decl_hdf5/tests/decl_hdf5_tests.cxx index ed90acae..77f93a3e 100644 --- a/plugins/decl_hdf5/tests/decl_hdf5_tests.cxx +++ b/plugins/decl_hdf5/tests/decl_hdf5_tests.cxx @@ -1448,12 +1448,15 @@ TEST_F(decl_hdf5_test, check_config_error_for_two_regex_found) int has_failed = 0; std::string true_errmsg - = "Error while triggering event `write_event': Config_error in lines 32 - 33: Found `2' match(s) in the list of datasets " + = "Error while triggering event `write_event': Config_error in lines 44 - 45: Found `4' match(s) in the list of datasets " "section for `group123/array_data'. Cannot choose the right element in datasets.\n" "The elements that match group123/array_data are:\n" - " - group[0-9]+/array_data\n" " - group.*/array_data\n" + " - group1.*/array_data\n" + " - group12.*/array_data\n" + " - group[0-9]+/array_data\n" "Attention: The elements are considered as a regex."; + PDI_status_t true_status = PDI_ERR_CONFIG; context_check_error ctx{true_errmsg, true_status, has_failed}; @@ -1490,6 +1493,18 @@ TEST_F(decl_hdf5_test, check_config_error_for_two_regex_found) " size: [3, 8] \n" " type: array \n" " subtype: int \n" + " group1.*/array_data: \n" + " size: [3, 8] \n" + " type: array \n" + " subtype: int \n" + " group/.*/array_data: \n" + " size: [3, 8] \n" + " type: array \n" + " subtype: int \n" + " group12.*/array_data: \n" + " size: [3, 8] \n" + " type: array \n" + " subtype: int \n" " write: \n" " array_data: \n" " dataset: 'group${index}/array_data' \n" @@ -1551,7 +1566,7 @@ TEST_F(decl_hdf5_test, check_config_error_for_no_regex_found) { SetUp("decl_hdf5_test_no_regex.h5"); int has_failed = 0; - std::string true_errmsg = "Error while triggering event `write_event': Config_error in lines 19 - 20: Dataset selection is invalid in implicit " + std::string true_errmsg = "Error while triggering event `write_event': Config_error in lines 19 - 20: Dataset selection is invalid for implicit " "dataset `group123/array_data'"; PDI_status_t true_status = PDI_ERR_CONFIG; context_check_error ctx{true_errmsg, true_status, has_failed};