Skip to content

Commit

Permalink
Merge Pull Request #12934 from kliegeois/Trilinos/fix-converter-c
Browse files Browse the repository at this point in the history
Automatically Merged using Trilinos Pull Request AutoTester
PR Title: b'Tpetra: fix converter bug'
PR Author: kliegeois
  • Loading branch information
trilinos-autotester authored Apr 18, 2024
2 parents a28cd86 + 9c905e0 commit b273621
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 15 deletions.
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ initialize ()
int block_size = List_.get<int>("partitioner: block size");
TEUCHOS_TEST_FOR_EXCEPT_MSG
(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_), block_size);
A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_), block_size, false);
A_ = A_bcrs;
hasBlockCrsMatrix_ = true;
Kokkos::DefaultExecutionSpace().fence();
Expand Down
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace Ifpack2 {
(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
{
IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix");
impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size);
impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, false);
IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ namespace Tpetra {
/// - Point rows corresponding to a particular mesh node must be stored consecutively.
template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node>>
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize);
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_local_ID=true);

/// \brief Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
///
Expand All @@ -110,7 +110,7 @@ namespace Tpetra {
/// - Point rows corresponding to a particular mesh node must be stored consecutively.
template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node>>
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize);
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_local_ID=true);

/// \brief Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called
/// before convertToBlockCrsMatrix if your Crs is not filled. Performed on host.
Expand Down
88 changes: 77 additions & 11 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ namespace Tpetra {

template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node> >
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
{

/*
Expand Down Expand Up @@ -342,7 +342,41 @@ namespace Tpetra {

const offset_type bs2 = blockSize * blockSize;

{
if (use_LID) {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
std::runtime_error, "Tpetra::getBlockCrsGraph: "
"local number of non zero entries is not a multiple of blockSize^2 ");

LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
row_map_type blockRowptr("blockRowptr", block_rows+1);
entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));

Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {

const LO offset_b = pointRowptr(i*blockSize)/bs2;
const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;

if (i==block_rows-1)
blockRowptr(i+1) = offset_b_max;
blockRowptr(i) = offset_b;

const LO offset_p = pointRowptr(i*blockSize);

for (LO k=0; k<offset_b_max-offset_b; ++k) {
blockColind(offset_b + k) = pointColind(offset_p + k * blockSize)/blockSize;
}
});

meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
Kokkos::DefaultExecutionSpace().fence();
}
else {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
Expand Down Expand Up @@ -371,15 +405,15 @@ namespace Tpetra {
LO filled_block = 0;
for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
auto lcol_GID = localMeshColMap.getLocalElement(bcol_GID);
auto bcol_LID = localMeshColMap.getLocalElement(bcol_GID);

bool visited = false;
for (LO k=0; k<filled_block; ++k) {
if (blockColind(offset_b + k) == lcol_GID)
if (blockColind(offset_b + k) == bcol_LID)
visited = true;
}
if (!visited) {
blockColind(offset_b + filled_block) = lcol_GID;
blockColind(offset_b + filled_block) = bcol_LID;
++filled_block;
}
}
Expand All @@ -395,7 +429,7 @@ namespace Tpetra {

template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
{

/*
Expand Down Expand Up @@ -427,13 +461,45 @@ namespace Tpetra {

const offset_type bs2 = blockSize * blockSize;

auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize);
auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID);

if (use_LID) {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();
offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

// For each block in the row...
for (offset_type block=0; block < numBlocks; block++) {

// For each entry in the block...
for(LO little_row=0; little_row<blockSize; little_row++) {
offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
for(LO little_col=0; little_col<blockSize; little_col++) {
blockValues((blkBeg+block) * bs2 + little_row * blockSize + little_col) =
pointValues[point_row_offset + block*blockSize + little_col];
}
}

{
}
});
blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
Kokkos::DefaultExecutionSpace().fence();
}
else {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();

values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
{
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
Expand Down Expand Up @@ -1029,7 +1095,7 @@ namespace Tpetra {
template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_LID); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > fillLogicalBlocks(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > unfillFormerBlockCrs(const CrsMatrix<S, LO, GO, NODE>& pointMatrix); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix);
Expand Down

0 comments on commit b273621

Please sign in to comment.