From f08be9cf358395a322c05871d9d17e38bb32a177 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 18 Oct 2024 13:35:15 -0600 Subject: [PATCH 1/5] RBILUK: Use new KK::sptrsv block support instead of KK::trsv Signed-off-by: James Foucar --- .../src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 + .../src/Ifpack2_Experimental_RBILUK_def.hpp | 90 ++++++++++++------- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 5e8378c027cb..2052c6e530e2 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -336,6 +336,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix //! The inverse of the diagonal Teuchos::RCP D_block_inverse_; + + Kokkos::View tmp_; }; diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index f68d8d96a793..e50a71649527 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -18,7 +18,7 @@ #include "Ifpack2_LocalFilter.hpp" #include "Ifpack2_Utilities.hpp" #include "Ifpack2_RILUK.hpp" -#include "KokkosSparse_trsv.hpp" +#include "KokkosSparse_sptrsv.hpp" //#define IFPACK2_RBILUK_INITIAL //#define IFPACK2_RBILUK_INITIAL_NOKK @@ -194,6 +194,11 @@ void RBILUK::allocate_L_and_U_blocks () U_block_->setAllToScalar (STM::zero ()); D_block_->setAllToScalar (STM::zero ()); + // Allocate temp space for apply + if (this->isKokkosKernelsSpiluk_) { + const auto numRows = L_block_->getLocalNumRows(); + tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_); + } } this->isAllocated_ = true; } @@ -1070,7 +1075,7 @@ apply (const Tpetra::MultiVectorgetCrsGraph().getLocalRowPtrsHost(); - auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost(); - auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost(); - auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost(); - auto L_values_host = L_block_->getValuesHost(); - auto U_values_host = U_block_->getValuesHost(); - - row_map_type* L_row_ptrs_host_ri = reinterpret_cast(&L_row_ptrs_host); - index_type* L_entries_host_ri = reinterpret_cast(&L_entries_host); - row_map_type* U_row_ptrs_host_ri = reinterpret_cast(&U_row_ptrs_host); - index_type* U_entries_host_ri = reinterpret_cast(&U_entries_host); - values_type* L_values_host_ri = reinterpret_cast(&L_values_host); - values_type* U_values_host_ri = reinterpret_cast(&U_values_host); + // Kokkos kernels impl. + auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite); + + auto lclL = L_block_->getLocalMatrixDevice(); + auto L_rowmap = lclL.graph.row_map; + auto L_entries = lclL.graph.entries; + auto L_values = lclL.values; + + auto lclU = U_block_->getLocalMatrixDevice(); + auto U_rowmap = lclU.graph.row_map; + auto U_entries = lclU.graph.entries; + auto U_values = lclU.values; const auto numRows = L_block_->getLocalNumRows(); - local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_); - local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_); + local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_); + local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_); if (mode == Teuchos::NO_TRANS) { - KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view); - KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; + { + KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); + KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); + Kokkos::fence(); + + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec); + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); + } + Kokkos::fence(); + + KernelHandle_->destroy_sptrsv_handle(); + } + + { + KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); + KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); + Kokkos::fence(); + + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); + } + Kokkos::fence(); + + KernelHandle_->destroy_sptrsv_handle(); + } + + KokkosBlas::axpby(alpha, Y_views, beta, Y_views); } else { - KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view); - KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::runtime_error, + "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm"); } //Y.getWrappedDualView().sync(); From 0a6fbd74207fb8d98675eb9ad470032166609537 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 22 Oct 2024 12:30:17 -0600 Subject: [PATCH 2/5] Remove unused host matrices Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index e50a71649527..a0736e8e9047 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -1108,8 +1108,6 @@ apply (const Tpetra::MultiVectorgetLocalNumRows(); - local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_); - local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_); if (mode == Teuchos::NO_TRANS) { KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; From a38eb95298e24aaddff9447d6825de6dabffd7ba Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 12:40:36 -0600 Subject: [PATCH 3/5] Fix tmp_ type, Kokkos::complex vs. std::complex mismatch Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 2052c6e530e2..84f5d421c6cd 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -337,7 +337,7 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix //! The inverse of the diagonal Teuchos::RCP D_block_inverse_; - Kokkos::View tmp_; + Kokkos::View tmp_; }; From a93e316f5190895ea107857f272bbd2165ff0d91 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 15:53:44 -0600 Subject: [PATCH 4/5] Move sptrsv handle setup out of apply Signed-off-by: James Foucar --- .../src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 ++ .../src/Ifpack2_Experimental_RBILUK_def.hpp | 34 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 84f5d421c6cd..ed918212c44d 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -165,6 +165,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix // kk_handle_type;//test Teuchos::RCP KernelHandle_; + Teuchos::RCP L_Sptrsv_KernelHandle_; + Teuchos::RCP U_Sptrsv_KernelHandle_; //@} diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index a0736e8e9047..650f0f3ed58a 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -327,12 +327,21 @@ void RBILUK::initialize () if (this->isKokkosKernelsSpiluk_) { this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + const auto numRows = this->A_local_->getLocalNumRows(); KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1, - this->A_local_->getLocalNumRows(), + numRows, 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), blockSize_); this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic + + this->L_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + this->U_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + + KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1; + + this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); + this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); } else { this->Graph_->initialize (); @@ -919,6 +928,10 @@ void RBILUK::compute () KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_, A_local_rowmap, A_local_entries, A_local_values, L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); + + // Now call symbolic for sptrsvs + KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); + KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); } } // Stop timing @@ -1107,39 +1120,24 @@ apply (const Tpetra::MultiVectorgetLocalNumRows(); - if (mode == Teuchos::NO_TRANS) { - KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; { - KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); - KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); - Kokkos::fence(); - const LO numVecs = X.getNumVectors(); for (LO vec = 0; vec < numVecs; ++vec) { auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec); auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); - KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); + KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); } Kokkos::fence(); - - KernelHandle_->destroy_sptrsv_handle(); } { - KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); - KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); - Kokkos::fence(); - const LO numVecs = X.getNumVectors(); for (LO vec = 0; vec < numVecs; ++vec) { auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); - KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); + KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); } Kokkos::fence(); - - KernelHandle_->destroy_sptrsv_handle(); } KokkosBlas::axpby(alpha, Y_views, beta, Y_views); From 643ff700bff96963ce106549c9d08610fe9bade0 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 16:00:23 -0600 Subject: [PATCH 5/5] Remove fences Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index 650f0f3ed58a..592d4dcfafd2 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -1128,7 +1128,6 @@ apply (const Tpetra::MultiVector