From 721a5b9ef2e136963c4327ee43a2594b66a847d8 Mon Sep 17 00:00:00 2001 From: Jacob Heflin Date: Mon, 19 May 2025 20:05:26 -0500 Subject: [PATCH 1/5] adjusted xyz_to_mol to take in different forms of xyz data --- src/cxx/nux/xyz_to_mol.cpp | 81 ++++++++++++++++++----------- tests/cxx/unit_tests/xyz_to_mol.cpp | 28 +++++++++- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/src/cxx/nux/xyz_to_mol.cpp b/src/cxx/nux/xyz_to_mol.cpp index c3166a6..b8eaf05 100644 --- a/src/cxx/nux/xyz_to_mol.cpp +++ b/src/cxx/nux/xyz_to_mol.cpp @@ -18,7 +18,6 @@ #include #include #include -#include namespace nux { @@ -29,45 +28,67 @@ MODULE_CTOR(XYZToMolecule) { } MODULE_RUN(XYZToMolecule) { - const auto& [filename] = simde::MoleculeFromString::unwrap_inputs(inputs); - - auto& z_from_sym = submods.at("Z from symbol"); - auto& atom_from_z = submods.at("Atom from z"); + const auto& [xyz_data] = simde::MoleculeFromString::unwrap_inputs(inputs); + auto& z_from_sym = submods.at("Z from symbol"); + auto& atom_from_z = submods.at("Atom from z"); + std::string line; chemist::Molecule mol; - std::ifstream xyz_file(filename); - - if(!xyz_file) { throw std::runtime_error("File not found: " + filename); } - - std::string line; + std::ifstream file(xyz_data); + std::stringstream buffer; + if(file.is_open()) { + buffer << file.rdbuf(); + file.close(); + } else { + buffer << xyz_data; + if(buffer.str().empty()) { + throw std::runtime_error( + "File or XYZ data not valid, empty string"); + } + } - // Skips the 1st line of the file, associated with the number of - // atoms in the file, and the 2nd line, the comment line. - std::getline(xyz_file, line); - std::getline(xyz_file, line); + int i = 0; - while(std::getline(xyz_file, line)) { + while(std::getline(buffer, line)) { + i++; + int num_atoms; + if(i == 1) { + try { + num_atoms = std::stoi(line); + } catch(const std::invalid_argument& e) { + throw std::runtime_error("Formatting of XYZ data incorrect"); + } + continue; + } + if(i == 2) { continue; } std::istringstream iss(line); - std::vector tokens; - std::string token; - while(std::getline(iss, token, ' ')) { - if(token.size()) { tokens.emplace_back(token); } + std::string atom_string; + double x, y, z; + if(!(iss >> atom_string >> x >> y >> z)) { + throw std::runtime_error("This is not a good coordinate"); } - auto Z = z_from_sym.run_as(tokens.at(0)); - auto x = std::stod(tokens.at(1)); - auto y = std::stod(tokens.at(2)); - auto z = std::stod(tokens.at(3)); - auto atm = atom_from_z.run_as(Z); - atm.x() = x; - atm.y() = y; - atm.z() = z; - mol.push_back(atm); - } + auto Z = z_from_sym.run_as(atom_string); + auto atom = atom_from_z.run_as(Z); + atom.x() = x; + atom.y() = y; + atom.z() = z; - xyz_file.close(); + mol.push_back(atom); + if(!(buffer.peek() == EOF)) { + continue; + } else { + if(mol.size() == num_atoms) { + continue; + } else { + throw std::runtime_error("Incorrect format for XYZ file: " + "Number of atoms and number of atom " + "coordinates do not match"); + } + } + } auto rv = results(); return simde::MoleculeFromString::wrap_results(rv, mol); } diff --git a/tests/cxx/unit_tests/xyz_to_mol.cpp b/tests/cxx/unit_tests/xyz_to_mol.cpp index d6547fb..258b8d9 100644 --- a/tests/cxx/unit_tests/xyz_to_mol.cpp +++ b/tests/cxx/unit_tests/xyz_to_mol.cpp @@ -36,13 +36,19 @@ TEST_CASE("XYZToMolecule") { xyz_mod.change_submod("Z from symbol", z_mod); xyz_mod.change_submod("Atom from z", atom_mod); - SECTION("No XYZ file found") { + SECTION("XYZ File Non-existent") { std::string file = "file.xyz"; REQUIRE_THROWS_AS(xyz_mod.run_as(file), std::runtime_error); } - SECTION("Full XYZ To Molecule run") { + SECTION("XYZ Data Incorrect") { + std::string data = "2\nComment\nH 0 0 0"; + REQUIRE_THROWS_AS(xyz_mod.run_as(data), + std::runtime_error); + } + + SECTION("Full XYZ To Molecule run: File") { auto atom0{make_atoms(1)}; auto atom1{make_atoms(1)}; atom1.z() = 1; @@ -68,4 +74,22 @@ TEST_CASE("XYZToMolecule") { REQUIRE(mol == test_mol); REQUIRE(del_file.good() == false); } + + SECTION("Full XYZ To Molecule run: Data") { + auto atom0{make_atoms(1)}; + auto atom1{make_atoms(1)}; + atom1.z() = 1; + + simde::type::molecule test_mol{atom0, atom1}; + + std::stringstream xyz_data; + xyz_data << "2\n"; + xyz_data << "This is a comment!\n"; + xyz_data << "H 0 0 0\n"; + xyz_data << "H 0 0 1\n"; + + auto mol = xyz_mod.run_as(xyz_data.str()); + + REQUIRE(mol == test_mol); + } } From 245261d6d7188d9d41f13508ee1ba62bd5ed3a6e Mon Sep 17 00:00:00 2001 From: Jacob Heflin Date: Tue, 20 May 2025 14:14:21 -0500 Subject: [PATCH 2/5] Separate out XYZ To Molecule and XYZ File To Molecule --- src/cxx/nux/nux.cpp | 3 ++ src/cxx/nux/nux_modules.hpp | 5 ++++ src/cxx/nux/xyz_to_mol.cpp | 44 ++++++++++++++++++++++------- tests/cxx/unit_tests/xyz_to_mol.cpp | 3 +- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/cxx/nux/nux.cpp b/src/cxx/nux/nux.cpp index 7f77484..5dadf88 100644 --- a/src/cxx/nux/nux.cpp +++ b/src/cxx/nux/nux.cpp @@ -22,6 +22,9 @@ namespace nux { void load_modules(pluginplay::ModuleManager& mm) { mm.add_module("Born-Oppenheimer Approximation"); mm.add_module("XYZ To Molecule"); + mm.add_module("XYZ File to Molecule"); + + set_defaults(mm); } } // namespace nux diff --git a/src/cxx/nux/nux_modules.hpp b/src/cxx/nux/nux_modules.hpp index d8a65d3..bdfd427 100644 --- a/src/cxx/nux/nux_modules.hpp +++ b/src/cxx/nux/nux_modules.hpp @@ -21,5 +21,10 @@ namespace nux { DECLARE_MODULE(BOApproximation); DECLARE_MODULE(XYZToMolecule); +DECLARE_MODULE(XYZFileToMolecule); +inline void set_defaults(pluginplay::ModuleManager& mm) { + mm.change_submod("XYZ File To Molecule", "XYZ to molecule", + "XYZ To Molecule"); +} } // namespace nux diff --git a/src/cxx/nux/xyz_to_mol.cpp b/src/cxx/nux/xyz_to_mol.cpp index b8eaf05..ecb98fb 100644 --- a/src/cxx/nux/xyz_to_mol.cpp +++ b/src/cxx/nux/xyz_to_mol.cpp @@ -35,17 +35,11 @@ MODULE_RUN(XYZToMolecule) { std::string line; chemist::Molecule mol; - std::ifstream file(xyz_data); std::stringstream buffer; - if(file.is_open()) { - buffer << file.rdbuf(); - file.close(); - } else { - buffer << xyz_data; - if(buffer.str().empty()) { - throw std::runtime_error( - "File or XYZ data not valid, empty string"); - } + + buffer << xyz_data; + if(buffer.str().empty()) { + throw std::runtime_error("File or XYZ data not valid, empty string"); } int i = 0; @@ -93,4 +87,34 @@ MODULE_RUN(XYZToMolecule) { return simde::MoleculeFromString::wrap_results(rv, mol); } +MODULE_CTOR(XYZFileToMolecule) { + satisfies_property_type(); + add_submodule("XYZ to molecule"); +} + +MODULE_RUN(XYZFileToMolecule) { + const auto& [xyz_filename] = + simde::MoleculeFromString::unwrap_inputs(inputs); + + auto& xyz_parser = submods.at("XYZ to molecule"); + + std::ifstream file(xyz_filename); + std::stringstream buffer; + try { + if(file.is_open()) { + buffer << file.rdbuf(); + file.close(); + } else { + throw std::runtime_error(""); + } + } catch(const std::runtime_error& e) { + std::cerr << "File not found: " << xyz_filename << std::endl; + } + + chemist::Molecule mol = + xyz_parser.run_as(buffer.str()); + + auto rv = results(); + return simde::MoleculeFromString::wrap_results(rv, mol); +} } // namespace nux diff --git a/tests/cxx/unit_tests/xyz_to_mol.cpp b/tests/cxx/unit_tests/xyz_to_mol.cpp index 258b8d9..74e9427 100644 --- a/tests/cxx/unit_tests/xyz_to_mol.cpp +++ b/tests/cxx/unit_tests/xyz_to_mol.cpp @@ -66,7 +66,8 @@ TEST_CASE("XYZToMolecule") { std::string filename = "h2.xyz"; - auto mol = xyz_mod.run_as(filename); + auto& xyz_file_mod = mm.at("XYZ File to Molecule"); + auto mol = xyz_file_mod.run_as(filename); remove("h2.xyz"); std::ifstream del_file("h2.xyz"); From 67ce605f33db02cfdbca7944699c81316c461549 Mon Sep 17 00:00:00 2001 From: "Jonathan M. Waldrop" Date: Tue, 20 May 2025 15:49:30 -0500 Subject: [PATCH 3/5] Update pull_request.yaml --- .github/workflows/pull_request.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 2bee7d1..c6d996f 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -26,7 +26,6 @@ jobs: with: config_file: '.github/.licenserc.yaml' source_dir: 'include src tests' - compilers: '["gcc-11"]' + compilers: '["gcc-11", "clang-14"]' doc_target: 'nux_cxx_api' - build_fail_on_warning: false secrets: inherit From a0b11fcb629ca43484a3be9077ecff9213a0e050 Mon Sep 17 00:00:00 2001 From: Jacob Heflin Date: Tue, 20 May 2025 16:17:09 -0500 Subject: [PATCH 4/5] Separated out xyz modules to separate source files, as well as the test files; made suggested changes --- src/cxx/nux/xyz_file_to_mol.cpp | 50 ++++++++++++++++ src/cxx/nux/xyz_to_mol.cpp | 58 +++---------------- tests/cxx/unit_tests/xyz_file_to_mol.cpp | 74 ++++++++++++++++++++++++ tests/cxx/unit_tests/xyz_to_mol.cpp | 38 ++---------- 4 files changed, 137 insertions(+), 83 deletions(-) create mode 100644 src/cxx/nux/xyz_file_to_mol.cpp create mode 100644 tests/cxx/unit_tests/xyz_file_to_mol.cpp diff --git a/src/cxx/nux/xyz_file_to_mol.cpp b/src/cxx/nux/xyz_file_to_mol.cpp new file mode 100644 index 0000000..5decdb6 --- /dev/null +++ b/src/cxx/nux/xyz_file_to_mol.cpp @@ -0,0 +1,50 @@ + +/* + * Copyright 2025 NWChemEx-Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nux_modules.hpp" +#include +#include + +namespace nux { + +MODULE_CTOR(XYZFileToMolecule) { + satisfies_property_type(); + add_submodule("XYZ to molecule"); +} + +MODULE_RUN(XYZFileToMolecule) { + const auto& [xyz_filename] = + simde::MoleculeFromString::unwrap_inputs(inputs); + + auto& xyz_parser = submods.at("XYZ to molecule"); + + std::ifstream file(xyz_filename); + std::stringstream buffer; + + if(!file.is_open()) { + throw std::runtime_error("File not found: " + xyz_filename); + } + buffer << file.rdbuf(); + file.close(); + + chemist::Molecule mol = + xyz_parser.run_as(buffer.str()); + + auto rv = results(); + return simde::MoleculeFromString::wrap_results(rv, mol); +} +} // namespace nux diff --git a/src/cxx/nux/xyz_to_mol.cpp b/src/cxx/nux/xyz_to_mol.cpp index ecb98fb..b92a649 100644 --- a/src/cxx/nux/xyz_to_mol.cpp +++ b/src/cxx/nux/xyz_to_mol.cpp @@ -15,7 +15,6 @@ */ #include "nux_modules.hpp" -#include #include #include @@ -42,17 +41,13 @@ MODULE_RUN(XYZToMolecule) { throw std::runtime_error("File or XYZ data not valid, empty string"); } + int num_atoms; int i = 0; while(std::getline(buffer, line)) { i++; - int num_atoms; if(i == 1) { - try { - num_atoms = std::stoi(line); - } catch(const std::invalid_argument& e) { - throw std::runtime_error("Formatting of XYZ data incorrect"); - } + num_atoms = std::stoi(line); continue; } if(i == 2) { continue; } @@ -60,7 +55,8 @@ MODULE_RUN(XYZToMolecule) { std::string atom_string; double x, y, z; if(!(iss >> atom_string >> x >> y >> z)) { - throw std::runtime_error("This is not a good coordinate"); + throw std::runtime_error("Incorrect XYZ Coordinate format: " + + line); } auto Z = z_from_sym.run_as(atom_string); @@ -70,50 +66,12 @@ MODULE_RUN(XYZToMolecule) { atom.z() = z; mol.push_back(atom); - - if(!(buffer.peek() == EOF)) { - continue; - } else { - if(mol.size() == num_atoms) { - continue; - } else { - throw std::runtime_error("Incorrect format for XYZ file: " - "Number of atoms and number of atom " - "coordinates do not match"); - } - } } - auto rv = results(); - return simde::MoleculeFromString::wrap_results(rv, mol); -} - -MODULE_CTOR(XYZFileToMolecule) { - satisfies_property_type(); - add_submodule("XYZ to molecule"); -} - -MODULE_RUN(XYZFileToMolecule) { - const auto& [xyz_filename] = - simde::MoleculeFromString::unwrap_inputs(inputs); - - auto& xyz_parser = submods.at("XYZ to molecule"); - - std::ifstream file(xyz_filename); - std::stringstream buffer; - try { - if(file.is_open()) { - buffer << file.rdbuf(); - file.close(); - } else { - throw std::runtime_error(""); - } - } catch(const std::runtime_error& e) { - std::cerr << "File not found: " << xyz_filename << std::endl; + if(!(mol.size() == num_atoms)) { + throw std::runtime_error("Incorrect format for XYZ file: " + "Number of atoms and number of atom " + "coordinates do not match"); } - - chemist::Molecule mol = - xyz_parser.run_as(buffer.str()); - auto rv = results(); return simde::MoleculeFromString::wrap_results(rv, mol); } diff --git a/tests/cxx/unit_tests/xyz_file_to_mol.cpp b/tests/cxx/unit_tests/xyz_file_to_mol.cpp new file mode 100644 index 0000000..aceefaa --- /dev/null +++ b/tests/cxx/unit_tests/xyz_file_to_mol.cpp @@ -0,0 +1,74 @@ + +/* + * Copyright 2024 NWChemEx-Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_nux.hpp" +#include + +TEST_CASE("XYZFileToMolecule") { + pluginplay::ModuleManager mm; + nux::load_modules(mm); + + auto make_atoms = [=](auto&& Z) { + REQUIRE(Z == 1); + double h_mass = 1837.4260218693814; + return simde::type::atom{"H", 1, h_mass, 0.0, 0.0, 0.0}; + }; + + auto atom_mod = pluginplay::make_lambda(make_atoms); + + auto z_mod = pluginplay::make_lambda( + [=](auto&& symbol) { return simde::type::atomic_number{1}; }); + + auto& xyz_file_mod = mm.at("XYZ File To Molecule"); + auto& xyz_mod = mm.at("XYZ to Molecule"); + xyz_mod.change_submod("Z from symbol", z_mod); + xyz_mod.change_submod("Atom from z", atom_mod); + + SECTION("XYZ File Non-existent") { + std::string file = "file.xyz"; + REQUIRE_THROWS_AS(xyz_file_mod.run_as(file), + std::runtime_error); + } + + SECTION("Full XYZ To Molecule run: File") { + auto atom0{make_atoms(1)}; + auto atom1{make_atoms(1)}; + atom1.z() = 1; + + simde::type::molecule test_mol{atom0, atom1}; + + std::ofstream xyz_file; + xyz_file.open("h2.xyz"); + + xyz_file << "2\n"; + xyz_file << "This is a comment!\n"; + xyz_file << "H 0 0 0\n"; + xyz_file << "H 0 0 1\n"; + xyz_file.close(); + + std::string filename = "h2.xyz"; + + auto& xyz_file_mod = mm.at("XYZ File to Molecule"); + auto mol = xyz_file_mod.run_as(filename); + + remove("h2.xyz"); + std::ifstream del_file("h2.xyz"); + + REQUIRE(mol == test_mol); + REQUIRE(del_file.good() == false); + } +} diff --git a/tests/cxx/unit_tests/xyz_to_mol.cpp b/tests/cxx/unit_tests/xyz_to_mol.cpp index 74e9427..1946807 100644 --- a/tests/cxx/unit_tests/xyz_to_mol.cpp +++ b/tests/cxx/unit_tests/xyz_to_mol.cpp @@ -36,44 +36,16 @@ TEST_CASE("XYZToMolecule") { xyz_mod.change_submod("Z from symbol", z_mod); xyz_mod.change_submod("Atom from z", atom_mod); - SECTION("XYZ File Non-existent") { - std::string file = "file.xyz"; - REQUIRE_THROWS_AS(xyz_mod.run_as(file), - std::runtime_error); - } - - SECTION("XYZ Data Incorrect") { + SECTION("XYZ Atom Count Incorrect") { std::string data = "2\nComment\nH 0 0 0"; REQUIRE_THROWS_AS(xyz_mod.run_as(data), std::runtime_error); } - SECTION("Full XYZ To Molecule run: File") { - auto atom0{make_atoms(1)}; - auto atom1{make_atoms(1)}; - atom1.z() = 1; - - simde::type::molecule test_mol{atom0, atom1}; - - std::ofstream xyz_file; - xyz_file.open("h2.xyz"); - - xyz_file << "2\n"; - xyz_file << "This is a comment!\n"; - xyz_file << "H 0 0 0\n"; - xyz_file << "H 0 0 1\n"; - xyz_file.close(); - - std::string filename = "h2.xyz"; - - auto& xyz_file_mod = mm.at("XYZ File to Molecule"); - auto mol = xyz_file_mod.run_as(filename); - - remove("h2.xyz"); - std::ifstream del_file("h2.xyz"); - - REQUIRE(mol == test_mol); - REQUIRE(del_file.good() == false); + SECTION("XYZ Atom Coordinate Data Incorrect") { + std::string data = "2\nComment\nH 0 0 K\nH 0 0 1"; + REQUIRE_THROWS_AS(xyz_mod.run_as(data), + std::runtime_error); } SECTION("Full XYZ To Molecule run: Data") { From 2e317f8fd76221e43b5541d5167e7048d5d0bdd5 Mon Sep 17 00:00:00 2001 From: Jacob Heflin Date: Tue, 20 May 2025 16:33:56 -0500 Subject: [PATCH 5/5] added static cast of into to long unsigned int for gcc-11 error --- src/cxx/nux/xyz_to_mol.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cxx/nux/xyz_to_mol.cpp b/src/cxx/nux/xyz_to_mol.cpp index b92a649..dd1fbde 100644 --- a/src/cxx/nux/xyz_to_mol.cpp +++ b/src/cxx/nux/xyz_to_mol.cpp @@ -41,13 +41,18 @@ MODULE_RUN(XYZToMolecule) { throw std::runtime_error("File or XYZ data not valid, empty string"); } - int num_atoms; + long unsigned int num_atoms; int i = 0; while(std::getline(buffer, line)) { i++; if(i == 1) { - num_atoms = std::stoi(line); + int num = std::stoi(line); + if(num < 0) { + throw std::out_of_range("Negative number for atom count: " + + line); + } + num_atoms = static_cast(num); continue; } if(i == 2) { continue; }