From 988e11f43495c1adaf8ea06d57dc34d8a9cad2f5 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 17 Apr 2024 09:05:04 -0600 Subject: [PATCH 1/3] allow the converter to use LIDs or GIDs --- .../src/Ifpack2_BlockRelaxation_def.hpp | 2 +- .../src/Ifpack2_BlockTriDiContainer_def.hpp | 2 +- .../Tpetra_BlockCrsMatrix_Helpers_decl.hpp | 4 +- .../src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 153 +++++++++++++----- 4 files changed, 116 insertions(+), 45 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp index ebf713ef64a4..21c15108496f 100644 --- a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp @@ -638,7 +638,7 @@ initialize () int block_size = List_.get("partitioner: block size"); TEUCHOS_TEST_FOR_EXCEPT_MSG (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); - A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size); + A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size, false); A_ = A_bcrs; hasBlockCrsMatrix_ = true; Kokkos::DefaultExecutionSpace().fence(); diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp index 6fbccc4d92ee..4c872368a43c 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp @@ -105,7 +105,7 @@ namespace Ifpack2 { (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); { IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix"); - impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size); + impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size, false); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } } diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp index 2b838977e957..32e2d83d4d24 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp @@ -99,7 +99,7 @@ namespace Tpetra { /// - Point rows corresponding to a particular mesh node must be stored consecutively. template Teuchos::RCP> - getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize); + getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_local_ID=true); /// \brief Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix. /// @@ -110,7 +110,7 @@ namespace Tpetra { /// - Point rows corresponding to a particular mesh node must be stored consecutively. template Teuchos::RCP> - convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize); + convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_local_ID=true); /// \brief Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called /// before convertToBlockCrsMatrix if your Crs is not filled. Performed on host. diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index f0e3414f41be..48b0321a617b 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -293,7 +293,7 @@ namespace Tpetra { template Teuchos::RCP > - getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize) + getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { /* @@ -342,7 +342,41 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; - { + if (use_LID) { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; + + TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0, + std::runtime_error, "Tpetra::getBlockCrsGraph: " + "local number of non zero entries is not a multiple of blockSize^2 "); + + LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + row_map_type blockRowptr("blockRowptr", block_rows+1); + entries_type blockColind("blockColind", pointColind.extent(0)/(bs2)); + + Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) { + + const LO offset_b = pointRowptr(i*blockSize)/bs2; + const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2; + + if (i==block_rows-1) + blockRowptr(i+1) = offset_b_max; + blockRowptr(i) = offset_b; + + const LO offset_p = pointRowptr(i*blockSize); + + for (LO k=0; kfillComplete(meshDomainMap,meshRangeMap); + Kokkos::DefaultExecutionSpace().fence(); + } + else { TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; @@ -395,7 +429,7 @@ namespace Tpetra { template Teuchos::RCP > - convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize) + convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { /* @@ -427,64 +461,101 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; - auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize); + auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID); - auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap(); - auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); - - { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); - values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); + if (use_LID) { + auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize); { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; - auto blockColind = meshCrsGraph->getLocalGraphDevice().entries; - - row_map_type pointGColind("pointGColind", pointColind.extent(0)); - - Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) { - pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i)); - }); - - row_map_type blockGColind("blockGColind", blockColind.extent(0)); - - Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) { - blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i)); - }); Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { const offset_type blkBeg = blockRowptr[i]; const offset_type numBlocks = blockRowptr[i+1] - blkBeg; - for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) { + // For each block in the row... + for (offset_type block=0; block < numBlocks; block++) { - offset_type block_inv=static_cast(-1); - offset_type little_col_inv=static_cast(-1); - for (offset_type block_2=0; block_2 < numBlocks; block_2++) { - for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) { - if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) { - block_inv = block_2; - little_col_inv = little_col_2; - break; - } + // For each entry in the block... + for(LO little_row=0; little_row(-1)) - break; } - for(LO little_row=0; little_rowgetColMap()->getLocalMap(); + auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); + + { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); + { + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; + + offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); + auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; + auto blockColind = meshCrsGraph->getLocalGraphDevice().entries; + + row_map_type pointGColind("pointGColind", pointColind.extent(0)); + + Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) { + pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i)); + }); + + row_map_type blockGColind("blockGColind", blockColind.extent(0)); + + Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) { + blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i)); + }); + + Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { + const offset_type blkBeg = blockRowptr[i]; + const offset_type numBlocks = blockRowptr[i+1] - blkBeg; + + for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) { + + offset_type block_inv=static_cast(-1); + offset_type little_col_inv=static_cast(-1); + for (offset_type block_2=0; block_2 < numBlocks; block_2++) { + for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) { + if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) { + block_inv = block_2; + little_col_inv = little_col_2; + break; + } + } + if (block_inv!=static_cast(-1)) + break; + } + + for(LO little_row=0; little_row const &A, std::ostream &os); \ template void blockCrsMatrixWriter(BlockCrsMatrix const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \ template void writeMatrixStrip(BlockCrsMatrix const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \ - template Teuchos::RCP > convertToBlockCrsMatrix(const CrsMatrix& pointMatrix, const LO &blockSize); \ + template Teuchos::RCP > convertToBlockCrsMatrix(const CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID); \ template Teuchos::RCP > fillLogicalBlocks(const CrsMatrix& pointMatrix, const LO &blockSize); \ template Teuchos::RCP > unfillFormerBlockCrs(const CrsMatrix& pointMatrix); \ template Teuchos::RCP > convertToCrsMatrix(const BlockCrsMatrix& blockMatrix); From 8dfc23944812a4ae582edc6bd1a0ab5fab59c36d Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 17 Apr 2024 11:03:08 -0600 Subject: [PATCH 2/3] fix variable name --- .../tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index 48b0321a617b..5ff0daa98d54 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -405,15 +405,15 @@ namespace Tpetra { LO filled_block = 0; for (LO p_i=0; p_i Date: Thu, 18 Apr 2024 10:06:03 -0600 Subject: [PATCH 3/3] Fix the Werror=shadow --- .../src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 139 +++++++++--------- 1 file changed, 67 insertions(+), 72 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index 5ff0daa98d54..21e000ba5e30 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -464,98 +464,93 @@ namespace Tpetra { auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID); if (use_LID) { - auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize); - { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); - auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); - auto pointRowptr = pointLocalGraph.row_map; - auto pointColind = pointLocalGraph.entries; + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; - offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; - values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); - auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); - auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; + offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); + auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); + auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; - Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { - const offset_type blkBeg = blockRowptr[i]; - const offset_type numBlocks = blockRowptr[i+1] - blkBeg; + Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { + const offset_type blkBeg = blockRowptr[i]; + const offset_type numBlocks = blockRowptr[i+1] - blkBeg; - // For each block in the row... - for (offset_type block=0; block < numBlocks; block++) { + // For each block in the row... + for (offset_type block=0; block < numBlocks; block++) { - // For each entry in the block... - for(LO little_row=0; little_rowgetColMap()->getLocalMap(); auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); + values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); - values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2); - { - auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); - auto pointRowptr = pointLocalGraph.row_map; - auto pointColind = pointLocalGraph.entries; - - offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; - auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); - auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; - auto blockColind = meshCrsGraph->getLocalGraphDevice().entries; - - row_map_type pointGColind("pointGColind", pointColind.extent(0)); - - Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) { - pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i)); - }); + auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); + auto pointRowptr = pointLocalGraph.row_map; + auto pointColind = pointLocalGraph.entries; - row_map_type blockGColind("blockGColind", blockColind.extent(0)); + offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize; + auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly); + auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map; + auto blockColind = meshCrsGraph->getLocalGraphDevice().entries; - Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) { - blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i)); - }); + row_map_type pointGColind("pointGColind", pointColind.extent(0)); - Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { - const offset_type blkBeg = blockRowptr[i]; - const offset_type numBlocks = blockRowptr[i+1] - blkBeg; - - for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) { - - offset_type block_inv=static_cast(-1); - offset_type little_col_inv=static_cast(-1); - for (offset_type block_2=0; block_2 < numBlocks; block_2++) { - for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) { - if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) { - block_inv = block_2; - little_col_inv = little_col_2; - break; - } - } - if (block_inv!=static_cast(-1)) + Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) { + pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i)); + }); + + row_map_type blockGColind("blockGColind", blockColind.extent(0)); + + Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) { + blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i)); + }); + + Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) { + const offset_type blkBeg = blockRowptr[i]; + const offset_type numBlocks = blockRowptr[i+1] - blkBeg; + + for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) { + + offset_type block_inv=static_cast(-1); + offset_type little_col_inv=static_cast(-1); + for (offset_type block_2=0; block_2 < numBlocks; block_2++) { + for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) { + if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) { + block_inv = block_2; + little_col_inv = little_col_2; break; + } } + if (block_inv!=static_cast(-1)) + break; + } - for(LO little_row=0; little_row