Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ifpack2: Add support for using kokkos kernels under RBILUK #12911

Merged
merged 12 commits into from
Apr 29, 2024
Merged
11 changes: 8 additions & 3 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix
local_ordinal_type,
global_ordinal_type,
node_type> crs_matrix_type;

using crs_graph_type = Tpetra::CrsGraph<local_ordinal_type,
global_ordinal_type,
node_type>;
Expand All @@ -175,11 +175,16 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix

template <class NewMatrixType> friend class RBILUK;

typedef typename block_crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
typedef typename block_crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
typedef typename block_crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;

//@}
//! \name Implementation of KK ILU(k).
//@{

typedef typename crs_matrix_type::local_matrix_device_type local_matrix_device_type;

typedef typename block_crs_matrix_type::local_matrix_device_type local_matrix_device_type;
typedef typename block_crs_matrix_type::local_matrix_host_type local_matrix_host_type;
typedef typename local_matrix_device_type::StaticCrsGraphType::row_map_type lno_row_view_t;
typedef typename local_matrix_device_type::StaticCrsGraphType::entries_type lno_nonzero_view_t;
typedef typename local_matrix_device_type::values_type scalar_nonzero_view_t;
Expand Down
718 changes: 425 additions & 293 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions packages/ifpack2/src/Ifpack2_IlukGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class IlukGraph : public Teuchos::Describable {
size_t getNumGlobalDiagonals() const { return NumGlobalDiagonals_; }

private:
typedef typename GraphType::map_type map_type;
typedef typename GraphType::map_type map_type;

/// \brief Copy constructor (UNIMPLEMENTED; DO NOT USE).
///
Expand Down Expand Up @@ -313,7 +313,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize()
: Kokkos::ceil(static_cast<double>(RowMaxNumIndices)
* Kokkos::pow(overalloc, levelfill));
});

};

bool insertError; // No error found yet while inserting entries
Expand Down Expand Up @@ -415,7 +415,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize()
L_Graph_->getLocalRowCopy(i, CurrentRow_view, LenL); // Get L Indices
CurrentRow[LenL] = i; // Put in Diagonal
if (LenU > 0) {
ArrayView<local_ordinal_type> URowView = CurrentRow.view (LenL+1,LenU);
ArrayView<local_ordinal_type> URowView = CurrentRow.view (LenL+1,LenU);
nonconst_local_inds_host_view_type URowView_v(URowView.data(),URowView.size());

// Get U Indices
Expand Down Expand Up @@ -584,7 +584,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT

typedef typename Kokkos::View<size_type*, array_layout, device_type> lno_row_view_t;
typedef typename Kokkos::View<data_type*, array_layout, device_type> lno_nonzero_view_t;

constructOverlapGraph();

// FIXME (mfh 23 Dec 2013) Use size_t or whatever
Expand Down Expand Up @@ -614,7 +614,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT
catch (std::runtime_error &e) {
symbolicError = true;
data_type nnzL = static_cast<data_type>(Overalloc_)*L_entries.extent(0);
data_type nnzU = static_cast<data_type>(Overalloc_)*U_entries.extent(0);
data_type nnzU = static_cast<data_type>(Overalloc_)*U_entries.extent(0);
KernelHandle->get_spiluk_handle()->reset_handle(NumMyRows, nnzL, nnzU);
Kokkos::resize(L_entries, KernelHandle->get_spiluk_handle()->get_nnzL());
Kokkos::resize(U_entries, KernelHandle->get_spiluk_handle()->get_nnzU());
Expand All @@ -628,17 +628,17 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT

Kokkos::resize(L_entries, KernelHandle->get_spiluk_handle()->get_nnzL());
Kokkos::resize(U_entries, KernelHandle->get_spiluk_handle()->get_nnzU());

RCP<Teuchos::ParameterList> params = Teuchos::parameterList ();
params->set ("Optimize Storage",false);

L_Graph_ = rcp (new crs_graph_type (OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
L_row_map, L_entries));
U_Graph_ = rcp (new crs_graph_type (OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
U_row_map, U_entries));

RCP<const map_type> L_DomainMap = OverlapGraph_->getRowMap ();
RCP<const map_type> L_RangeMap = Graph_->getRangeMap ();
RCP<const map_type> U_DomainMap = Graph_->getDomainMap ();
Expand Down
6 changes: 3 additions & 3 deletions packages/ifpack2/src/Ifpack2_RILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ class RILUK:
<typename lno_row_view_t::const_value_type, typename lno_nonzero_view_t::const_value_type, typename scalar_nonzero_view_t::value_type,
HandleExecSpace, TemporaryMemorySpace,PersistentMemorySpace > kk_handle_type;
typedef Ifpack2::IlukGraph<Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type>, kk_handle_type> iluk_graph_type;

/// \brief Constructor that takes a Tpetra::RowMatrix.
///
/// \param A_in [in] The input matrix.
Expand Down Expand Up @@ -402,7 +402,7 @@ class RILUK:
}

//! Get a rough estimate of cost per iteration
size_t getNodeSmootherComplexity() const;
size_t getNodeSmootherComplexity() const;



Expand Down Expand Up @@ -599,7 +599,7 @@ class RILUK:
/// may be computed using a crs_matrix_type that initialize() constructs
/// temporarily.
Teuchos::RCP<const row_matrix_type> A_local_;
lno_row_view_t A_local_rowmap_;
lno_row_view_t A_local_rowmap_;
lno_nonzero_view_t A_local_entries_;
scalar_nonzero_view_t A_local_values_;
std::vector<local_matrix_device_type> A_local_diagblks;
Expand Down
44 changes: 22 additions & 22 deletions packages/ifpack2/src/Ifpack2_RILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)


template<class MatrixType>
RILUK<MatrixType>::~RILUK()
RILUK<MatrixType>::~RILUK()
{
if (!isKokkosKernelsStream_) {
if (Teuchos::nonnull (KernelHandle_)) {
Expand Down Expand Up @@ -315,12 +315,12 @@ void RILUK<MatrixType>::allocate_L_and_U ()
for (int i = 0; i < num_streams_; i++) {
L_v_[i] = null;
U_v_[i] = null;

L_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getL_Graph ()));
U_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getU_Graph ()));
L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
U_v_[i]->setAllToScalar (STS::zero ());

L_v_[i]->fillComplete ();
U_v_[i]->fillComplete ();
}
Expand Down Expand Up @@ -620,19 +620,19 @@ void RILUK<MatrixType>::initialize ()
if (this->isKokkosKernelsSpiluk_) {
if (!isKokkosKernelsStream_) {
this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
A_local_->getLocalNumRows(),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
Graph_->initialize (KernelHandle_); // this calls spiluk_symbolic
}
else {
KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
for (int i = 0; i < num_streams_; i++) {
KernelHandle_v_[i] = Teuchos::rcp (new kk_handle_type ());
KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
A_local_diagblks[i].numRows(),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
Graph_v_[i]->initialize (KernelHandle_v_[i]); // this calls spiluk_symbolic
}
Expand All @@ -655,7 +655,7 @@ void RILUK<MatrixType>::initialize ()
L_solver_->setMatrices (L_v_);
}
L_solver_->initialize ();
//NOTE (Nov-09-2022):
//NOTE (Nov-09-2022):
//For Cuda >= 11.3 (using cusparseSpSV), skip trisolve computes here.
//Instead, call trisolve computes within RILUK compute
#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
Expand Down Expand Up @@ -925,7 +925,7 @@ void RILUK<MatrixType>::compute ()

nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);

U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
NumIn = NumL+NumU+1;

Expand All @@ -939,12 +939,12 @@ void RILUK<MatrixType>::compute ()
for (size_t jj = 0; jj < NumL; ++jj) {
local_ordinal_type j = InI[jj];
IST multiplier = InV[jj]; // current_mults++;

InV[jj] *= static_cast<scalar_type>(DV(j));

U_->getLocalRowView(j, UUI, UUV); // View of row above
NumUU = UUI.size();

if (RelaxValue_ == STM::zero ()) {
for (size_t k = 0; k < NumUU; ++k) {
const int kk = colflag[UUI[k]];
Expand Down Expand Up @@ -1001,7 +1001,7 @@ void RILUK<MatrixType>::compute ()
}

if (NumU) {
// Replace current row of L and U
// Replace current row of L and U
U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
}

Expand Down Expand Up @@ -1087,7 +1087,7 @@ void RILUK<MatrixType>::compute ()
for(int i = 0; i < num_streams_; i++) {
L_v_[i]->resumeFill ();
U_v_[i]->resumeFill ();

if (L_v_[i]->isStaticGraph () || L_v_[i]->isLocallyIndexed ()) {
L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
U_v_[i]->setAllToScalar (STS::zero ());
Expand All @@ -1108,16 +1108,16 @@ void RILUK<MatrixType>::compute ()
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;

KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
A_local_rowmap_, A_local_entries_, A_local_values_,
KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
A_local_rowmap_, A_local_entries_, A_local_values_,
L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );

L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());

L_solver_->setMatrix (L_);
U_solver_->setMatrix (U_);
}
}
else {
std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
Expand All @@ -1131,7 +1131,7 @@ void RILUK<MatrixType>::compute ()
L_rowmap_v[i] = lclL.graph.row_map;
L_entries_v[i] = lclL.graph.entries;
L_values_v[i] = lclL.values;

auto lclU = U_v_[i]->getLocalMatrixDevice();
U_rowmap_v[i] = lclU.graph.row_map;
U_entries_v[i] = lclU.graph.entries;
Expand Down Expand Up @@ -1210,7 +1210,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t

const scalar_type one = STS::one ();
const scalar_type zero = STS::zero ();

Teuchos::Time timer ("RILUK::apply");
double startTime = timer.wallTime();
{ // Start timing
Expand Down Expand Up @@ -1301,7 +1301,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
U_solver_->apply (Y, Y, mode); // Solve U Y = Y.
#endif
}
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
//NOTE (Nov-15-2022):
//This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
Expand Down Expand Up @@ -1425,8 +1425,8 @@ std::string RILUK<MatrixType>::description () const
os << "Level-of-fill: " << getLevelOfFill() << ", ";

if(isKokkosKernelsSpiluk_) os<<"KK-SPILUK, ";
if(isKokkosKernelsStream_) os<<"KK-Stream, ";
if(isKokkosKernelsStream_) os<<"KK-Stream, ";

if (A_.is_null ()) {
os << "Matrix: null";
}
Expand Down
Loading
Loading