Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: fix converter bug #12934

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ initialize ()
int block_size = List_.get<int>("partitioner: block size");
TEUCHOS_TEST_FOR_EXCEPT_MSG
(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_), block_size);
A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_), block_size, false);
A_ = A_bcrs;
hasBlockCrsMatrix_ = true;
Kokkos::DefaultExecutionSpace().fence();
Expand Down
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace Ifpack2 {
(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
{
IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix");
impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size);
impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, false);
IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ namespace Tpetra {
/// - Point rows corresponding to a particular mesh node must be stored consecutively.
template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node>>
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize);
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_local_ID=true);

/// \brief Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
///
Expand All @@ -110,7 +110,7 @@ namespace Tpetra {
/// - Point rows corresponding to a particular mesh node must be stored consecutively.
template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node>>
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize);
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_local_ID=true);

/// \brief Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called
/// before convertToBlockCrsMatrix if your Crs is not filled. Performed on host.
Expand Down
88 changes: 77 additions & 11 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ namespace Tpetra {

template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node> >
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
{

/*
Expand Down Expand Up @@ -342,7 +342,41 @@ namespace Tpetra {

const offset_type bs2 = blockSize * blockSize;

{
if (use_LID) {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
std::runtime_error, "Tpetra::getBlockCrsGraph: "
"local number of non zero entries is not a multiple of blockSize^2 ");

LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
row_map_type blockRowptr("blockRowptr", block_rows+1);
entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));

Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {

const LO offset_b = pointRowptr(i*blockSize)/bs2;
const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;

if (i==block_rows-1)
blockRowptr(i+1) = offset_b_max;
blockRowptr(i) = offset_b;

const LO offset_p = pointRowptr(i*blockSize);

for (LO k=0; k<offset_b_max-offset_b; ++k) {
blockColind(offset_b + k) = pointColind(offset_p + k * blockSize)/blockSize;
}
});

meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
Kokkos::DefaultExecutionSpace().fence();
}
else {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
Expand Down Expand Up @@ -371,15 +405,15 @@ namespace Tpetra {
LO filled_block = 0;
for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
auto lcol_GID = localMeshColMap.getLocalElement(bcol_GID);
auto bcol_LID = localMeshColMap.getLocalElement(bcol_GID);

bool visited = false;
for (LO k=0; k<filled_block; ++k) {
if (blockColind(offset_b + k) == lcol_GID)
if (blockColind(offset_b + k) == bcol_LID)
visited = true;
}
if (!visited) {
blockColind(offset_b + filled_block) = lcol_GID;
blockColind(offset_b + filled_block) = bcol_LID;
++filled_block;
}
}
Expand All @@ -395,7 +429,7 @@ namespace Tpetra {

template<class Scalar, class LO, class GO, class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
{

/*
Expand Down Expand Up @@ -427,13 +461,45 @@ namespace Tpetra {

const offset_type bs2 = blockSize * blockSize;

auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize);
auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID);

if (use_LID) {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();
offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

// For each block in the row...
for (offset_type block=0; block < numBlocks; block++) {

// For each entry in the block...
for(LO little_row=0; little_row<blockSize; little_row++) {
offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
for(LO little_col=0; little_col<blockSize; little_col++) {
blockValues((blkBeg+block) * bs2 + little_row * blockSize + little_col) =
pointValues[point_row_offset + block*blockSize + little_col];
}
}

{
}
});
blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
Kokkos::DefaultExecutionSpace().fence();
}
else {
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();

values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
{
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
Expand Down Expand Up @@ -1029,7 +1095,7 @@ namespace Tpetra {
template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_LID); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > fillLogicalBlocks(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > unfillFormerBlockCrs(const CrsMatrix<S, LO, GO, NODE>& pointMatrix); \
template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix);
Expand Down
Loading