Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions plugins/decl_hdf5/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ The possible values for the keys are as follow:
can be replaced inside the `DATA_SECTION`.
* `datasets`: a key-value map associating a PDI type to string keys.
Each string is the name of a dataset to create in the file on first
access, with the type described in the value. The string key can also be
access, with the type described in the value. The string key is
a regular expression (regex), and be used to define "generic keys",
that can be used in `DATA_IO_DESC` for the keyword dataset.
that can be used in `DATA_IO_DESC` for the keyword dataset. The regex use the Modified ECMAScript regular expression grammar.
* `collision_policy`: a string identifying a \ref COLLISION_POLICY
* `deflate`: an integer value (from 0 to 9) defining the default deflate (GNU
gzip) compression level to use for datasets created in this file.
Expand Down Expand Up @@ -114,7 +114,8 @@ The possible values for the keys are as follow:
It defaults to selecting the whole data.
* `dataset_selection`: a `SELECTION_DESC` specifying the selection of
data in the file data to write or read.
This is only valid if the dataset is defined in the datasets.
This is only valid if the dataset is explicitly defined in the `datasets`
section.
* `attributes`: a key-value map specifying the set of attributes to read from
(respectively, write to) the file when the associated dataset is read
(respectively, written).
Expand Down
79 changes: 79 additions & 0 deletions plugins/decl_hdf5/dataset_explicit_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*******************************************************************************
* Copyright (C) 2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of CEA nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/


#ifndef DECL_HDF5_DATASET_EXPLICIT_TYPE_H_
#define DECL_HDF5_DATASET_EXPLICIT_TYPE_H_

// #include <hdf5.h>
// #ifdef H5_HAVE_PARALLEL
// #include <mpi.h>
// #endif

#include <regex>
#include <string>

#include <paraconf.h>

#include <pdi/pdi_fwd.h>

//#include <pdi/context.h>
//#include <pdi/expression.h>

namespace decl_hdf5 {

/// Information about the types that should be used to create datasets as provided in the Yaml file
struct Dataset_explicit_type {
std::string m_definition; ///< definition from the YAML as a string for debugging purpose
std::regex m_regex; ///< the parsed regex that determines if the provided type applies (depend only on m_definition and regex grammar)
int m_begin_line; ///< begin line number in the YAML for debugging purposes
int m_end_line; ///< end line number in the YAML for debugging purposes
PDI::Datatype_template_sptr m_type; ///< the type to use for the dataset in case the regex matches

Dataset_explicit_type() = default;

Dataset_explicit_type(std::string def, int b_line, int e_line, std::regex regex, PDI::Datatype_template_sptr type)
: m_definition(def)
, m_begin_line(b_line)
, m_end_line(e_line)
, m_regex(regex)
, m_type(type)
{}

/// function to get the line where the dataset is defined in Yaml file
std::string get_msg_err_line() const
{
std::string result;
if (m_begin_line == m_end_line) {
result = " defined in line " + std::to_string(m_begin_line + 1);
} else {
result = " defined in lines " + std::to_string(m_begin_line + 1) + " - " + std::to_string(m_end_line);
}
return result;
}
};

} // namespace decl_hdf5

#endif // DECL_HDF5_DATASET_EXPLICIT_TYPE_H_
102 changes: 51 additions & 51 deletions plugins/decl_hdf5/dataset_op.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <pdi/datatype.h>
#include <pdi/datatype_template.h>
#include <pdi/error.h>
#include <pdi/fmt.h>
#include <pdi/paraconf_wrapper.h>
#include <pdi/ref_any.h>
#include <pdi/scalar_datatype.h>
Expand Down Expand Up @@ -70,6 +71,8 @@ using PDI::Type_error;
using PDI::Value_error;
using std::dynamic_pointer_cast;
using std::function;
using std::pair;
using std::regex;
using std::string;
using std::stringstream;
using std::tie;
Expand Down Expand Up @@ -212,7 +215,7 @@ void Dataset_op::fletcher(Context& ctx, Expression value)
}
}

void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map<string, Datatype_template_sptr>& dsets)
void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const std::vector<Dataset_explicit_type>& dsets)
{
Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose);
#ifdef H5_HAVE_PARALLEL
Expand Down Expand Up @@ -346,7 +349,7 @@ hid_t Dataset_op::dataset_creation_plist(Context& ctx, const Datatype* dataset_t
return dset_plist;
}

void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map<string, Datatype_template_sptr>& dsets)
void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const std::vector<Dataset_explicit_type>& dsets)
{
string dataset_name = m_dataset.to_string(ctx);
ctx.logger().trace("Preparing for writing `{}' dataset", dataset_name);
Expand All @@ -364,65 +367,62 @@ void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const un
Datatype_sptr dataset_type;
Raii_hid h5_file_type, h5_file_space;

int counter_dataset_found = 0;
bool dataset_found = false;
Dataset_explicit_type dset_found;
ctx.logger().trace("search `{}' in the list of datasets section", dataset_name);

for (auto&& dsets_elem: dsets) {
// create regex from string
std::regex dsets_elem_regex(dsets_elem.first);
// try if dataset_name is including in regex
if (std::regex_match(dataset_name, dsets_elem_regex)) {
counter_dataset_found++;
ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem.first);
}
}
for (auto&& dsets_elem = dsets.begin(); dsets_elem != dsets.end(); ++dsets_elem) {
if (std::regex_match(dataset_name, dsets_elem->m_regex)) {
if (!dataset_found) {
dataset_found = true;
ctx.logger().trace(" `{}' match an element of datasets(defined as regex) with value := `{}'", dataset_name, dsets_elem->m_definition);
dset_found = *dsets_elem;
} else {
// if we found an other element in the list of datasets, we can't choose the right dataset
// (if the elements found have different size, subsize, type, ...)
// send a error a message to the user
std::vector<string> list_dataset_found;
list_dataset_found.emplace_back(dset_found.m_definition + dset_found.get_msg_err_line());
list_dataset_found.emplace_back(dsets_elem->m_definition + dsets_elem->get_msg_err_line());
++dsets_elem; // get the next element in the iterator on dsets
// loop over the rest of the elements in the iterator on dsets
for (dsets_elem; dsets_elem != dsets.end(); ++dsets_elem) {
if (std::regex_match(dataset_name, dsets_elem->m_regex)) {
list_dataset_found.emplace_back(dsets_elem->m_definition + dsets_elem->get_msg_err_line());
}
}

ctx.logger().trace("Found `{}' match(s) in the list of datasets section for `{}'", counter_dataset_found, dataset_name);

if (counter_dataset_found > 1) {
// if we found two or more element in the list of datasets, we can't choose the right dataset (if the elements found have different size, subsize, type, ...)
// send a error a message to the user
std::stringstream msg_dataset_found;
msg_dataset_found << "\nThe elements that match " << dataset_name << " are:" << std::endl;
for (auto&& dsets_elem: dsets) {
// create regex from string
std::regex dsets_elem_regex(dsets_elem.first);
// try if dataset_name is including in regex
if (std::regex_match(dataset_name, dsets_elem_regex)) {
msg_dataset_found << " - " << dsets_elem.first << std::endl;
// Remark: message error is defined outside Config_error because is too long.
const char* msg_config_error
= "Found `{0}' match(es) in the list of datasets section for `{1}'."
" Cannot choose the right element in datasets.\n"
"The elements that match `{1}' are:\n"
" - {2}\n"
"Attention: The elements are considered as a regex.";

throw Config_error{
m_dataset_selection.selection_tree(),
msg_config_error,
list_dataset_found.size(),
dataset_name,
fmt::join(list_dataset_found, "\n - ")
};
}
}
msg_dataset_found << "Attention: The elements are considered as a regex.";

throw Config_error{
m_dataset_selection.selection_tree(),
"Found `{}' match(s) in the list of datasets section for `{}'. Cannot choose the right element in datasets.{}",
counter_dataset_found,
dataset_name,
msg_dataset_found.str()
};
}

if (counter_dataset_found == 1) {
for (auto&& dataset_type_iter_regex = dsets.begin(); dataset_type_iter_regex != dsets.end(); ++dataset_type_iter_regex) {
std::regex dsets_elem_regex(dataset_type_iter_regex->first);
if (std::regex_match(dataset_name, dsets_elem_regex)) {
// we found the dataset
ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dataset_type_iter_regex->first);
dataset_type = dataset_type_iter_regex->second->evaluate(ctx);
tie(h5_file_space, h5_file_type) = space(dataset_type);
ctx.logger().trace("Applying `{}' dataset selection", dataset_name);
m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space);
break; // stop the "for" loop
}
}
if (dataset_found) {
ctx.logger().trace("Get the regex in the list of datasets section := `{}'", dset_found.m_definition);
dataset_type = dset_found.m_type->evaluate(ctx);
tie(h5_file_space, h5_file_type) = space(dataset_type);
ctx.logger().trace("Applying `{}' dataset selection", dataset_name);
m_dataset_selection.apply(ctx, h5_file_space, h5_mem_space);
} else {
if (!m_dataset_selection.size().empty()) {
throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid in implicit dataset `{}'", dataset_name};
} else {
dataset_type = ref.type();
tie(h5_file_space, h5_file_type) = space(dataset_type, true);
throw Config_error{m_dataset_selection.selection_tree(), "Dataset selection is invalid for implicit dataset `{}'", dataset_name};
}
dataset_type = ref.type();
tie(h5_file_space, h5_file_type) = space(dataset_type, true);
}

ctx.logger().trace("Validating `{}' dataset dataspaces selection", dataset_name);
Expand Down
10 changes: 6 additions & 4 deletions plugins/decl_hdf5/dataset_op.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
Expand Down Expand Up @@ -32,6 +32,7 @@
#include <mpi.h>
#endif

#include <regex>
#include <string>
#include <unordered_map>

Expand All @@ -43,6 +44,7 @@

#include "attribute_op.h"
#include "collision_policy.h"
#include "dataset_explicit_type.h"
#include "selection.h"

namespace decl_hdf5 {
Expand Down Expand Up @@ -186,14 +188,14 @@ class Dataset_op
* \param ctx the context in which to operate
* \param h5_file the already opened HDF5 file id
* \param use_mpio whether the hdf5 read/write is parallel
* \param dsets the type of the explicitly typed datasets
* \param dsets the vector of the explicitly typed datasets defined in Yaml file.
*/
void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);
void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::vector<Dataset_explicit_type>& dsets);

private:
void do_read(PDI::Context& ctx, hid_t h5_file, hid_t read_lst);

void do_write(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);
void do_write(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::vector<Dataset_explicit_type>& dsets);
};

} // namespace decl_hdf5
Expand Down
18 changes: 15 additions & 3 deletions plugins/decl_hdf5/file_op.cxx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
Expand Down Expand Up @@ -89,7 +89,19 @@ vector<File_op> File_op::parse(Context& ctx, PC_tree_t tree)
#endif
} else if (key == "datasets") {
each(value, [&](PC_tree_t dset_name, PC_tree_t dset_type) {
template_op.m_datasets.emplace(to_string(dset_name), ctx.datatype(dset_type));
std::string dset_name_value = to_string(dset_name);
std::regex dset_regex(dset_name_value, std::regex::ECMAScript);
if (dset_type.node && dset_name.node) {
template_op.m_datasets.emplace_back(
dset_name_value,
dset_name.node->start_mark.line,
dset_type.node->end_mark.line,
dset_regex,
ctx.datatype(dset_type)
);
} else {
Config_error{key_tree, "Error in the definiion of dataset `{}' in datasets section.", dset_name_value};
}
});
} else if (key == "deflate") {
deflate = value;
Expand Down Expand Up @@ -230,7 +242,7 @@ File_op::File_op(const File_op& other)
, m_dset_size_ops{other.m_dset_size_ops}
{
for (auto&& dataset: other.m_datasets) {
m_datasets.emplace(dataset.first, dataset.second);
m_datasets.emplace_back(dataset);
}
}

Expand Down
8 changes: 5 additions & 3 deletions plugins/decl_hdf5/file_op.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (C) 2015-2021 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
Expand Down Expand Up @@ -32,6 +32,7 @@
#include <mpi.h>
#endif

//#include <regex>
#include <string>
#include <unordered_map>
#include <vector>
Expand All @@ -43,6 +44,7 @@

#include "attribute_op.h"
#include "collision_policy.h"
#include "dataset_explicit_type.h"
#include "dataset_op.h"

namespace decl_hdf5 {
Expand All @@ -66,8 +68,8 @@ class File_op
PDI::Expression m_communicator;
#endif

/// type of the datasets for which an explicit type is specified
std::unordered_map<std::string, PDI::Datatype_template_sptr> m_datasets;
/// type information for the datasets for which an explicit type is specified
std::vector<Dataset_explicit_type> m_datasets;

/// the dataset operations
std::vector<Dataset_op> m_dset_ops;
Expand Down
Loading
Loading