pdidev · jmorice91 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
@@ -42,7 +42,7 @@ The possible values for the keys are as follow:
   can be replaced inside the `DATA_SECTION`.
 * `datasets`: a key-value map associating a PDI type to string keys.
   Each string is the name of a dataset to create in the file on first
-  access, with the type described in the value. The string key can also be 
+  access, with the type described in the value. The string key is 
   a regular expression (regex), and be used to define "generic keys",
   that can be used in `DATA_IO_DESC` for the keyword dataset.
 * `collision_policy`: a string identifying a \ref COLLISION_POLICY
@@ -114,7 +114,8 @@ The possible values for the keys are as follow:
   It defaults to selecting the whole data.
 * `dataset_selection`: a `SELECTION_DESC` specifying the selection of
   data in the file data to write or read.
-  This is only valid if the dataset is defined in the datasets.
+  This is only valid if the dataset is explicitly defined in the `datasets`
+  section.
 * `attributes`: a key-value map specifying the set of attributes to read from
   (respectively, write to) the file when the associated dataset is read
   (respectively, written).

@@ -29,7 +29,6 @@
 #endif
 
 #include <algorithm>
-#include <regex>
 #include <sstream>
 #include <tuple>
 #include <vector>
@@ -42,6 +41,7 @@
 #include <pdi/datatype.h>
 #include <pdi/datatype_template.h>
 #include <pdi/error.h>
+#include <pdi/fmt.h>
 #include <pdi/paraconf_wrapper.h>
 #include <pdi/ref_any.h>
 #include <pdi/scalar_datatype.h>
@@ -70,6 +70,8 @@ using PDI::Type_error;
 using PDI::Value_error;
 using std::dynamic_pointer_cast;
 using std::function;
+using std::pair;
+using std::regex;
 using std::string;
 using std::stringstream;
 using std::tie;
@@ -212,7 +214,7 @@ void Dataset_op::fletcher(Context& ctx, Expression value)
 	}
 }
 
-void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map<string, Datatype_template_sptr>& dsets)
+void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map<string, pair<regex, Datatype_template_sptr>>& dsets)
 {
 	Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose);
 #ifdef H5_HAVE_PARALLEL
@@ -346,7 +348,7 @@ hid_t Dataset_op::dataset_creation_plist(Context& ctx, const Datatype* dataset_t
 	return dset_plist;
 }
 
-void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map<string, Datatype_template_sptr>& dsets)
+void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map<string, pair<regex, Datatype_template_sptr>>& dsets)
 {
 	string dataset_name = m_dataset.to_string(ctx);
 	ctx.logger().trace("Preparing for writing `{}' dataset", dataset_name);
@@ -364,65 +366,62 @@ void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const un
 	Datatype_sptr dataset_type;
 	Raii_hid h5_file_type, h5_file_space;
 
-	int counter_dataset_found = 0;
+	bool bool_dataset_found = false;
+	pair< string, pair<regex, Datatype_template_sptr> > dset_found;
 	ctx.logger().trace("search `{}' in the list of datasets section", dataset_name);
 
-	for (auto&& dsets_elem: dsets) {
-		// create regex from string
-		std::regex dsets_elem_regex(dsets_elem.first);
-		// try if dataset_name is including in regex
-		if (std::regex_match(dataset_name, dsets_elem_regex)) {
-			counter_dataset_found++;
-			ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem.first);
-		}
-	}
-
-	ctx.logger().trace("Found `{}' match(s) in the list of datasets section for `{}'", counter_dataset_found, dataset_name);
-
-	if (counter_dataset_found > 1) {
-		// if we found two or more element in the list of datasets, we can't choose the right dataset (if the elements found have different size, subsize, type, ...)
-		// send a error a message to the user
-		std::stringstream msg_dataset_found;
-		msg_dataset_found << "\nThe elements that match " << dataset_name << " are:" << std::endl;
-		for (auto&& dsets_elem: dsets) {
-			// create regex from string
-			std::regex dsets_elem_regex(dsets_elem.first);
-			// try if dataset_name is including in regex
-			if (std::regex_match(dataset_name, dsets_elem_regex)) {
-				msg_dataset_found << " - " << dsets_elem.first << std::endl;
+	for (auto&& dsets_elem = dsets.begin(); dsets_elem != dsets.end(); ++dsets_elem) {
+		if (std::regex_match(dataset_name, dsets_elem->second.first)) {
+			if (!bool_dataset_found) {
+				bool_dataset_found = true;
+				ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem->first);
+				dset_found = *dsets_elem;
+			} else {
+				// if we found an other element in the list of datasets, we can't choose the right dataset
+				// (if the elements found have different size, subsize, type, ...)
+				// send a error a message to the user
+				std::list<string> list_dataset_found;
+				list_dataset_found.emplace_back(dset_found.first);
+				list_dataset_found.emplace_back(dsets_elem->first);
+
+				++dsets_elem; // get the next element in the iterator on dsets
+				// loop over the rest of the elements in the iterator on dsets
+				for (dsets_elem; dsets_elem != dsets.end(); ++dsets_elem) {
+					if (std::regex_match(dataset_name, dsets_elem->second.first)) {
+						list_dataset_found.emplace_back(dsets_elem->first);
+					}
+				}
+				list_dataset_found.sort(); // sort the list of dataset
+
+				std::string msg_dataset_found = fmt::format(
+					"\nThe elements that match {} are:\n - {}\nAttention: The elements are considered as a regex.",
+					dataset_name,
+					fmt::join(list_dataset_found, "\n - ")
+				);
+
+				throw Config_error{
+					m_dataset_selection.selection_tree(),
+					"Found `{}' match(s) in the list of datasets section for `{}'. Cannot choose the right element in datasets.{}",
+					list_dataset_found.size(),
+					dataset_name,
+					msg_dataset_found
+				};
 			}
 		}
-		msg_dataset_found << "Attention: The elements are considered as a regex.";
-
-		throw Config_error{
-			m_dataset_selection.selection_tree(),
-			"Found `{}' match(s) in the list of datasets section for `{}'. Cannot choose the right element in datasets.{}",
-			counter_dataset_found,
-			dataset_name,
-			msg_dataset_found.str()
-		};
 	}
 
-	if (counter_dataset_found == 1) {
-		for (auto&& dataset_type_iter_regex = dsets.begin(); dataset_type_iter_regex != dsets.end(); ++dataset_type_iter_regex) {
-			std::regex dsets_elem_regex(dataset_type_iter_regex->first);
-			if (std::regex_match(dataset_name, dsets_elem_regex)) {
-				// we found the dataset
-				ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dataset_type_iter_regex->first);
-				dataset_type = dataset_type_iter_regex->second->evaluate(ctx);
-				tie(h5_file_space, h5_file_type) = space(dataset_type);
-				ctx.logger().trace("Applying `{}' dataset selection", dataset_name);
-				m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space);
-				break; // stop the "for" loop
-			}
-		}
+	if (bool_dataset_found) {
+		ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dset_found.first);
+		dataset_type = dset_found.second.second->evaluate(ctx);
+		tie(h5_file_space, h5_file_type) = space(dataset_type);
+		ctx.logger().trace("Applying `{}' dataset selection", dataset_name);
+		m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space);
 	} else {
 		if (!m_dataset_selection.size().empty()) {
-			throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid in implicit dataset `{}'", dataset_name};
-		} else {
-			dataset_type = ref.type();
-			tie(h5_file_space, h5_file_type) = space(dataset_type, true);
+			throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid for implicit dataset `{}'", dataset_name};
 		}
+		dataset_type = ref.type();
+		tie(h5_file_space, h5_file_type) = space(dataset_type, true);
 	}
 
 	ctx.logger().trace("Validating `{}' dataset dataspaces selection", dataset_name);

@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
+ * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
  * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
  * All rights reserved.
  *
@@ -32,6 +32,7 @@
 #include <mpi.h>
 #endif
 
+#include <regex>
 #include <string>
 #include <unordered_map>
 
@@ -188,12 +189,22 @@ class Dataset_op
 	 * \param use_mpio whether the hdf5 read/write is parallel
 	 * \param dsets the type of the explicitly typed datasets
 	 */
-	void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);
+	void execute(
+		PDI::Context& ctx,
+		hid_t h5_file,
+		bool use_mpio,
+		const std::unordered_map<std::string, std::pair<std::regex, PDI::Datatype_template_sptr>>& dsets
+	);
 
 private:
 	void do_read(PDI::Context& ctx, hid_t h5_file, hid_t read_lst);
 
-	void do_write(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);
+	void do_write(
+		PDI::Context& ctx,
+		hid_t h5_file,
+		hid_t xfer_lst,
+		const std::unordered_map<std::string, std::pair<std::regex, PDI::Datatype_template_sptr>>& dsets
+	);
 };
 
 } // namespace decl_hdf5

@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
+ * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
  * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
  * All rights reserved.
  *
@@ -89,7 +89,12 @@ vector<File_op> File_op::parse(Context& ctx, PC_tree_t tree)
 #endif
 		} else if (key == "datasets") {
 			each(value, [&](PC_tree_t dset_name, PC_tree_t dset_type) {
-				template_op.m_datasets.emplace(to_string(dset_name), ctx.datatype(dset_type));
+				std::string dset_name_string = to_string(dset_name);
+				std::regex dset_regex(dset_name_string);
+				template_op.m_datasets.emplace(
+					dset_name_string,
+					std::pair<std::regex, PDI::Datatype_template_sptr>(dset_regex, ctx.datatype(dset_type))
+				);
 			});
 		} else if (key == "deflate") {
 			deflate = value;
@@ -230,7 +235,7 @@ File_op::File_op(const File_op& other)
 	, m_dset_size_ops{other.m_dset_size_ops}
 {
 	for (auto&& dataset: other.m_datasets) {
-		m_datasets.emplace(dataset.first, dataset.second);
+		m_datasets.emplace(dataset.first, std::pair<std::regex, PDI::Datatype_template_sptr>(dataset.second.first, dataset.second.second));
 	}
 }
 

@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2015-2021 Commissariat a l'energie atomique et aux energies alternatives (CEA)
+ * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
  * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
  * All rights reserved.
  *
@@ -32,6 +32,7 @@
 #include <mpi.h>
 #endif
 
+#include <regex>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -67,7 +68,7 @@ class File_op
 #endif
 
 	/// type of the datasets for which an explicit type is specified
-	std::unordered_map<std::string, PDI::Datatype_template_sptr> m_datasets;
+	std::unordered_map<std::string, std::pair<std::regex, PDI::Datatype_template_sptr>> m_datasets;
 
 	/// the dataset operations
 	std::vector<Dataset_op> m_dset_ops;

@@ -1448,12 +1448,15 @@ TEST_F(decl_hdf5_test, check_config_error_for_two_regex_found)
 	int has_failed = 0;
 
 	std::string true_errmsg
-		= "Error while triggering event `write_event': Config_error in lines 32 - 33: Found `2' match(s) in the list of datasets "
+		= "Error while triggering event `write_event': Config_error in lines 44 - 45: Found `4' match(s) in the list of datasets "
 		  "section for `group123/array_data'. Cannot choose the right element in datasets.\n"
 		  "The elements that match group123/array_data are:\n"
-		  " - group[0-9]+/array_data\n"
 		  " - group.*/array_data\n"
+		  " - group1.*/array_data\n"
+		  " - group12.*/array_data\n"
+		  " - group[0-9]+/array_data\n"
 		  "Attention: The elements are considered as a regex.";
+
 	PDI_status_t true_status = PDI_ERR_CONFIG;
 	context_check_error ctx{true_errmsg, true_status, has_failed};
 
@@ -1490,6 +1493,18 @@ TEST_F(decl_hdf5_test, check_config_error_for_two_regex_found)
 		  "        size: [3, 8]                                                 \n"
 		  "        type: array                                                  \n"
 		  "        subtype: int                                                 \n"
+		  "      group1.*/array_data:                                           \n"
+		  "        size: [3, 8]                                                 \n"
+		  "        type: array                                                  \n"
+		  "        subtype: int                                                 \n"
+		  "      group/.*/array_data:                                           \n"
+		  "        size: [3, 8]                                                 \n"
+		  "        type: array                                                  \n"
+		  "        subtype: int                                                 \n"
+		  "      group12.*/array_data:                                          \n"
+		  "        size: [3, 8]                                                 \n"
+		  "        type: array                                                  \n"
+		  "        subtype: int                                                 \n"
 		  "    write:                                                           \n"
 		  "      array_data:                                                    \n"
 		  "        dataset: 'group${index}/array_data'                          \n"
@@ -1551,7 +1566,7 @@ TEST_F(decl_hdf5_test, check_config_error_for_no_regex_found)
 {
 	SetUp("decl_hdf5_test_no_regex.h5");
 	int has_failed = 0;
-	std::string true_errmsg = "Error while triggering event `write_event': Config_error in lines 19 - 20: Dataset selection is invalid in implicit "
+	std::string true_errmsg = "Error while triggering event `write_event': Config_error in lines 19 - 20: Dataset selection is invalid for implicit "
 							  "dataset `group123/array_data'";
 	PDI_status_t true_status = PDI_ERR_CONFIG;
 	context_check_error ctx{true_errmsg, true_status, has_failed};