diff --git a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp index ebf713ef64a4..21c15108496f 100644 --- a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp @@ -638,7 +638,7 @@ initialize () int block_size = List_.get("partitioner: block size"); TEUCHOS_TEST_FOR_EXCEPT_MSG (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); - A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size); + A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size, false); A_ = A_bcrs; hasBlockCrsMatrix_ = true; Kokkos::DefaultExecutionSpace().fence(); diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp index 6fbccc4d92ee..4c872368a43c 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp @@ -105,7 +105,7 @@ namespace Ifpack2 { (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); { IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix"); - impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size); + impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size, false); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } } diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp index 2b838977e957..32e2d83d4d24 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp @@ -99,7 +99,7 @@ namespace Tpetra { /// - Point rows corresponding to a particular mesh node must be stored consecutively. template Teuchos::RCP> - getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize); + getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_local_ID=true); /// \brief Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix. /// @@ -110,7 +110,7 @@ namespace Tpetra { /// - Point rows corresponding to a particular mesh node must be stored consecutively. template Teuchos::RCP> - convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize); + convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_local_ID=true); /// \brief Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called /// before convertToBlockCrsMatrix if your Crs is not filled. Performed on host. diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index f0e3414f41be..21e000ba5e30 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -293,7 +293,7 @@ namespace Tpetra { template Teuchos::RCP > - getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize) + getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { /* @@ -342,7 +342,41 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; - { + if (use_LID) { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; + + TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0, + std::runtime_error, "Tpetra::getBlockCrsGraph: " + "local number of non zero entries is not a multiple of blockSize^2 "); + + LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + row_map_type blockRowptr("blockRowptr", block_rows+1); + entries_type blockColind("blockColind", pointColind.extent(0)/(bs2)); + + Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) { + + const LO offset_b = pointRowptr(i*blockSize)/bs2; + const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2; + + if (i==block_rows-1) + blockRowptr(i+1) = offset_b_max; + blockRowptr(i) = offset_b; + + const LO offset_p = pointRowptr(i*blockSize); + + for (LO k=0; kfillComplete(meshDomainMap,meshRangeMap); + Kokkos::DefaultExecutionSpace().fence(); + } + else { TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; @@ -371,15 +405,15 @@ namespace Tpetra { LO filled_block = 0; for (LO p_i=0; p_i Teuchos::RCP > - convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize) + convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { /* @@ -427,13 +461,45 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; - auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize); + auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID); + + if (use_LID) { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; - auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap(); - auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); + offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); + auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); + auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; + + Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { + const offset_type blkBeg = blockRowptr[i]; + const offset_type numBlocks = blockRowptr[i+1] - blkBeg; + + // For each block in the row... + for (offset_type block=0; block < numBlocks; block++) { + + // For each entry in the block... + for(LO little_row=0; little_rowgetColMap()->getLocalMap(); + auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); { auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); @@ -1029,7 +1095,7 @@ namespace Tpetra { template void blockCrsMatrixWriter(BlockCrsMatrix const &A, std::ostream &os); \ template void blockCrsMatrixWriter(BlockCrsMatrix const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \ template void writeMatrixStrip(BlockCrsMatrix const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \ - template Teuchos::RCP > convertToBlockCrsMatrix(const CrsMatrix& pointMatrix, const LO &blockSize); \ + template Teuchos::RCP > convertToBlockCrsMatrix(const CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID); \ template Teuchos::RCP > fillLogicalBlocks(const CrsMatrix& pointMatrix, const LO &blockSize); \ template Teuchos::RCP > unfillFormerBlockCrs(const CrsMatrix& pointMatrix); \ template Teuchos::RCP > convertToCrsMatrix(const BlockCrsMatrix& blockMatrix);