Skip to content

Commit

Permalink
use GIDs to convert from point matrix to block matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
kliegeois committed Apr 11, 2024
1 parent f5169f5 commit db3283e
Showing 1 changed file with 70 additions and 26 deletions.
96 changes: 70 additions & 26 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ namespace Tpetra {
RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);
if(meshColMap.is_null()) throw std::runtime_error("ERROR: Cannot create mesh colmap");

auto localMeshColMap = meshColMap->getLocalMap();
auto localPointColMap = pointColMap.getLocalMap();
auto localPointRowMap = pointRowMap.getLocalMap();

const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);

Expand Down Expand Up @@ -362,9 +366,22 @@ namespace Tpetra {
blockRowptr(i) = offset_b;

const LO offset_p = pointRowptr(i*blockSize);
const LO offset_p_max = pointRowptr((i+1)*blockSize);

LO filled_block = 0;
for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
auto lcol_GID = localMeshColMap.getLocalElement(bcol_GID);

for (LO k=0; k<offset_b_max-offset_b; ++k) {
blockColind(offset_b + k) = pointColind(offset_p + k * blockSize)/blockSize;
bool visited = false;
for (LO k=0; k<filled_block; ++k) {
if (blockColind(offset_b + k) == lcol_GID)
visited = true;
}
if (!visited) {
blockColind(offset_b + filled_block) = lcol_GID;
++filled_block;
}
}
});

Expand Down Expand Up @@ -411,35 +428,62 @@ namespace Tpetra {
const offset_type bs2 = blockSize * blockSize;

auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize);

auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();

{
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

// For each block in the row...
for (offset_type block=0; block < numBlocks; block++) {

// For each entry in the block...
for(LO little_row=0; little_row<blockSize; little_row++) {
offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
for(LO little_col=0; little_col<blockSize; little_col++) {
blockValues((blkBeg+block) * bs2 + little_row * blockSize + little_col) =
pointValues[point_row_offset + block*blockSize + little_col];
}
}
{
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

}
offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
auto blockColind = meshCrsGraph->getLocalGraphDevice().entries;

row_map_type pointGColind("pointGColind", pointColind.extent(0));

Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i));
});

row_map_type blockGColind("blockGColind", blockColind.extent(0));

Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i));
});

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) {

offset_type block_inv=static_cast<offset_type>(-1);
offset_type little_col_inv=static_cast<offset_type>(-1);
for (offset_type block_2=0; block_2 < numBlocks; block_2++) {
for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) {
if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) {
block_inv = block_2;
little_col_inv = little_col_2;
break;
}
}
if (block_inv!=static_cast<offset_type>(-1))
break;
}

for(LO little_row=0; little_row<blockSize; little_row++) {
offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
blockValues((blkBeg+block_inv) * bs2 + little_row * blockSize + little_col_inv) = pointValues[pointRowptr[i*blockSize+little_row] + point_i];
}
}
});
}
blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
Kokkos::DefaultExecutionSpace().fence();
}
Expand Down

0 comments on commit db3283e

Please sign in to comment.