diff --git a/.github/enable_openmp.cmake b/.github/enable_openmp.cmake
new file mode 100644
index 00000000..f3f31ee0
--- /dev/null
+++ b/.github/enable_openmp.cmake
@@ -0,0 +1 @@
+set(INTEGRALS_ENABLE_OPENMP ON)
diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
index 8a1a561c..f8fca5ab 100644
--- a/.github/workflows/pull_request.yaml
+++ b/.github/workflows/pull_request.yaml
@@ -39,3 +39,9 @@ jobs:
     with:
       compilers: '["gcc-11", "clang-14"]'
       repo_toolchain: ".github/enable_sigma.cmake"
+
+  test_enable_openmp:
+    uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
+    with:
+      compilers: '["gcc-11", "clang-14"]'
+      repo_toolchain: ".github/enable_openmp.cmake"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5e72fd80..78d425b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,6 +39,7 @@ cmaize_option_list(
     BUILD_TESTING OFF "Should we build the tests?"
     BUILD_PYBIND11_PYBINDINGS ON "Build pybind11 python3 bindings?"
     ENABLE_SIGMA OFF "Should we enable Sigma for uncertainty tracking?"
+    INTEGRALS_ENABLE_OPENMP OFF "Should we enable OpenMP for threading?"
 )
 
 # Can't build from github due to Libint setup.
@@ -60,11 +61,18 @@ cmaize_find_or_build_dependency(
                ENABLE_SIGMA=${ENABLE_SIGMA}
 )
 
+set(project_depends Libint2 simde)
+
+if("${INTEGRALS_ENABLE_OPENMP}")
+find_package(OpenMP REQUIRED)
+list(APPEND project_depends OpenMP::OpenMP_CXX)
+endif()
+
 cmaize_add_library(
     ${PROJECT_NAME}
     SOURCE_DIR "${INTEGRALS_SOURCE_DIR}/${PROJECT_NAME}"
     INCLUDE_DIRS "${INTEGRALS_INCLUDE_DIR}/${PROJECT_NAME}"
-    DEPENDS Libint2 simde
+    DEPENDS "${project_depends}"
 )
 
 include(nwx_pybind11)
diff --git a/src/integrals/libint/libint.cpp b/src/integrals/libint/libint.cpp
index f488577f..7cd0c278 100644
--- a/src/integrals/libint/libint.cpp
+++ b/src/integrals/libint/libint.cpp
@@ -23,9 +23,30 @@
 #include "libint_visitor.hpp"
 #include <type_traits>
 
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
 namespace integrals::libint {
 namespace {
 
+#ifdef _OPENMP
+int get_num_threads() {
+    int num_threads;
+#pragma omp parallel
+    { num_threads = omp_get_num_threads(); }
+    return num_threads;
+}
+
+int get_thread_num() { return omp_get_thread_num(); }
+#else
+
+int get_num_threads() { return 1; }
+
+int get_thread_num() { return 0; }
+
+#endif
+
 template<typename FloatType>
 auto build_eigen_buffer(const std::vector<libint2::BasisSet>& basis_sets,
                         parallelzone::runtime::RuntimeView& rv, double thresh) {
@@ -52,25 +73,46 @@ template<std::size_t N, typename FloatType>
 auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
                  const chemist::qm_operator::OperatorBase& op,
                  parallelzone::runtime::RuntimeView& rv, double thresh) {
+    using size_type = decltype(N);
+
     // Dimensional information
-    std::vector<std::size_t> dims_shells(N);
-    for(decltype(N) i = 0; i < N; ++i) dims_shells[i] = basis_sets[i].size();
+    std::vector<size_type> dim_stepsizes(N, 1);
+    size_type num_shell_combinations = 1;
 
-    auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
+    for(size_type i = 0; i < N; ++i) {
+        num_shell_combinations *= basis_sets[i].size();
+        for(size_type j = i; j < N - 1; ++j) {
+            dim_stepsizes[i] *= basis_sets[j].size();
+        }
+    }
 
-    // Make libint engine
+    // Make an engine for each thread
+    int num_threads = get_num_threads();
+    std::vector<libint2::Engine> engines(num_threads);
     LibintVisitor visitor(basis_sets, thresh);
     op.visit(visitor);
-    auto engine     = visitor.engine();
-    const auto& buf = engine.results();
+    for(int i = 0; i != num_threads; ++i) { engines[i] = visitor.engine(); }
 
     // Fill in values
-    std::vector<std::size_t> shells(N, 0);
-    while(shells[0] < dims_shells[0]) {
-        detail_::run_engine_(engine, basis_sets, shells,
+    auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for(size_type i_pair = 0; i_pair != num_shell_combinations; ++i_pair) {
+        auto thread_id = get_thread_num();
+
+        std::vector<size_type> shells(N);
+        auto shell_ord = i_pair;
+        for(size_type i = 0; i < N; ++i) {
+            shells[i] = shell_ord / dim_stepsizes[i];
+            shell_ord = shell_ord % dim_stepsizes[i];
+        }
+
+        detail_::run_engine_(engines[thread_id], basis_sets, shells,
                              std::make_index_sequence<N>());
 
-        auto vals = buf[0];
+        const auto& buf = engines[thread_id].results();
+        auto vals       = buf[0];
         if(vals) {
             auto ord   = detail_::shells2ord(basis_sets, shells);
             auto n_ord = ord.size();
@@ -79,17 +121,6 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
                 pbuffer->set_data(ord[i_ord], update);
             }
         }
-
-        // Increment index
-        shells[N - 1] += 1;
-        for(decltype(N) i = 1; i < N; ++i) {
-            if(shells[N - i] >= dims_shells[N - i]) {
-                // Reset this dimension and increment the next one
-                // shells[0] accumulates until we reach the end
-                shells[N - i] = 0;
-                shells[N - i - 1] += 1;
-            }
-        }
     }
 
     auto pshape = pbuffer->layout().shape().clone();