diff --git a/.github/enable_openmp.cmake b/.github/enable_openmp.cmake new file mode 100644 index 00000000..f3f31ee0 --- /dev/null +++ b/.github/enable_openmp.cmake @@ -0,0 +1 @@ +set(INTEGRALS_ENABLE_OPENMP ON) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 8a1a561c..f8fca5ab 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -39,3 +39,9 @@ jobs: with: compilers: '["gcc-11", "clang-14"]' repo_toolchain: ".github/enable_sigma.cmake" + + test_enable_openmp: + uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master + with: + compilers: '["gcc-11", "clang-14"]' + repo_toolchain: ".github/enable_openmp.cmake" diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e72fd80..78d425b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ cmaize_option_list( BUILD_TESTING OFF "Should we build the tests?" BUILD_PYBIND11_PYBINDINGS ON "Build pybind11 python3 bindings?" ENABLE_SIGMA OFF "Should we enable Sigma for uncertainty tracking?" + INTEGRALS_ENABLE_OPENMP OFF "Should we enable OpenMP for threading?" ) # Can't build from github due to Libint setup. @@ -60,11 +61,18 @@ cmaize_find_or_build_dependency( ENABLE_SIGMA=${ENABLE_SIGMA} ) +set(project_depends Libint2 simde) + +if("${INTEGRALS_ENABLE_OPENMP}") +find_package(OpenMP REQUIRED) +list(APPEND project_depends OpenMP::OpenMP_CXX) +endif() + cmaize_add_library( ${PROJECT_NAME} SOURCE_DIR "${INTEGRALS_SOURCE_DIR}/${PROJECT_NAME}" INCLUDE_DIRS "${INTEGRALS_INCLUDE_DIR}/${PROJECT_NAME}" - DEPENDS Libint2 simde + DEPENDS "${project_depends}" ) include(nwx_pybind11) diff --git a/src/integrals/libint/libint.cpp b/src/integrals/libint/libint.cpp index f488577f..7cd0c278 100644 --- a/src/integrals/libint/libint.cpp +++ b/src/integrals/libint/libint.cpp @@ -23,9 +23,30 @@ #include "libint_visitor.hpp" #include +#ifdef _OPENMP +#include +#endif + namespace integrals::libint { namespace { +#ifdef _OPENMP +int get_num_threads() { + int num_threads; +#pragma omp parallel + { num_threads = omp_get_num_threads(); } + return num_threads; +} + +int get_thread_num() { return omp_get_thread_num(); } +#else + +int get_num_threads() { return 1; } + +int get_thread_num() { return 0; } + +#endif + template auto build_eigen_buffer(const std::vector& basis_sets, parallelzone::runtime::RuntimeView& rv, double thresh) { @@ -52,25 +73,46 @@ template auto fill_tensor(const std::vector& basis_sets, const chemist::qm_operator::OperatorBase& op, parallelzone::runtime::RuntimeView& rv, double thresh) { + using size_type = decltype(N); + // Dimensional information - std::vector dims_shells(N); - for(decltype(N) i = 0; i < N; ++i) dims_shells[i] = basis_sets[i].size(); + std::vector dim_stepsizes(N, 1); + size_type num_shell_combinations = 1; - auto pbuffer = build_eigen_buffer(basis_sets, rv, thresh); + for(size_type i = 0; i < N; ++i) { + num_shell_combinations *= basis_sets[i].size(); + for(size_type j = i; j < N - 1; ++j) { + dim_stepsizes[i] *= basis_sets[j].size(); + } + } - // Make libint engine + // Make an engine for each thread + int num_threads = get_num_threads(); + std::vector engines(num_threads); LibintVisitor visitor(basis_sets, thresh); op.visit(visitor); - auto engine = visitor.engine(); - const auto& buf = engine.results(); + for(int i = 0; i != num_threads; ++i) { engines[i] = visitor.engine(); } // Fill in values - std::vector shells(N, 0); - while(shells[0] < dims_shells[0]) { - detail_::run_engine_(engine, basis_sets, shells, + auto pbuffer = build_eigen_buffer(basis_sets, rv, thresh); +#ifdef _OPENMP +#pragma omp parallel for +#endif + for(size_type i_pair = 0; i_pair != num_shell_combinations; ++i_pair) { + auto thread_id = get_thread_num(); + + std::vector shells(N); + auto shell_ord = i_pair; + for(size_type i = 0; i < N; ++i) { + shells[i] = shell_ord / dim_stepsizes[i]; + shell_ord = shell_ord % dim_stepsizes[i]; + } + + detail_::run_engine_(engines[thread_id], basis_sets, shells, std::make_index_sequence()); - auto vals = buf[0]; + const auto& buf = engines[thread_id].results(); + auto vals = buf[0]; if(vals) { auto ord = detail_::shells2ord(basis_sets, shells); auto n_ord = ord.size(); @@ -79,17 +121,6 @@ auto fill_tensor(const std::vector& basis_sets, pbuffer->set_data(ord[i_ord], update); } } - - // Increment index - shells[N - 1] += 1; - for(decltype(N) i = 1; i < N; ++i) { - if(shells[N - i] >= dims_shells[N - i]) { - // Reset this dimension and increment the next one - // shells[0] accumulates until we reach the end - shells[N - i] = 0; - shells[N - i - 1] += 1; - } - } } auto pshape = pbuffer->layout().shape().clone();