diff --git a/.gitignore b/.gitignore index 22d232fe90..3d3d8ff0ab 100644 --- a/.gitignore +++ b/.gitignore @@ -79,6 +79,7 @@ src/test/zboxloop.exe src/test/test_mp src/test/test_mp_pcg src/test/test_mp_pcg_3d +src/test/test_csr_overlap src/examples/ex[0-9] src/examples/ex[0-9].exe src/examples/ex1[0-8] diff --git a/src/parcsr_mv/CMakeLists.txt b/src/parcsr_mv/CMakeLists.txt index 7c0f810394..bdd160cbfe 100644 --- a/src/parcsr_mv/CMakeLists.txt +++ b/src/parcsr_mv/CMakeLists.txt @@ -38,6 +38,7 @@ set(REGULAR_SRCS par_csr_communication.c par_csr_filter.c par_csr_matop.c + par_csr_overlap.c par_csr_matrix.c par_csr_matrix_stats.c par_csr_matmat.c diff --git a/src/parcsr_mv/Makefile b/src/parcsr_mv/Makefile index 05fa5ae7f4..220cfe92dd 100644 --- a/src/parcsr_mv/Makefile +++ b/src/parcsr_mv/Makefile @@ -27,6 +27,7 @@ HEADERS =\ par_chord_matrix.h\ par_csr_communication.h\ par_csr_matrix.h\ + par_csr_overlap.h\ par_vector.h FILES =\ @@ -52,6 +53,7 @@ FILES =\ par_csr_matmat.c\ par_csr_matvec.c\ par_csr_matop_marked.c\ + par_csr_overlap.c\ par_csr_triplemat.c\ par_make_system.c\ par_vector.c\ diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 82bf54ff91..b2417e351b 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -535,6 +535,71 @@ typedef struct * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +/****************************************************************************** + * + * Header info for overlapping domain decomposition + * + *****************************************************************************/ + +#ifndef hypre_PAR_CSR_OVERLAP_HEADER +#define hypre_PAR_CSR_OVERLAP_HEADER + +/*-------------------------------------------------------------------------- + * hypre_OverlapData + * + * Data structure for overlapping domain decomposition. + * Stores information about the extended subdomain for a processor. + *--------------------------------------------------------------------------*/ + +typedef struct hypre_OverlapData_struct +{ + /* Overlap configuration */ + HYPRE_Int overlap_order; /* Overlap order (delta >= 0) */ + + /* Original local partition info */ + HYPRE_Int num_local_rows; /* Original local rows */ + HYPRE_BigInt first_row_index; /* First row owned by this proc */ + HYPRE_BigInt last_row_index; /* Last row owned by this proc */ + + /* Extended subdomain information */ + HYPRE_Int num_extended_rows; /* Total rows in extended domain */ + HYPRE_Int num_overlap_rows; /* External rows (from overlap) */ + HYPRE_BigInt *extended_row_indices; /* Global indices of all extended rows */ + HYPRE_Int *row_is_owned; /* 1 if row is owned, 0 if external */ + + /* Communication package for fetching overlap data */ + hypre_ParCSRCommPkg *overlap_comm_pkg; + + /* External CSR matrix (fetched from other procs) */ + hypre_CSRMatrix *A_ext; /* CSR matrix of external rows */ + HYPRE_BigInt *ext_map; /* Global row indices for external rows */ + +} hypre_OverlapData; + +/*-------------------------------------------------------------------------- + * Accessor macros for hypre_OverlapData + *--------------------------------------------------------------------------*/ + +#define hypre_OverlapDataOverlapOrder(data) ((data)->overlap_order) +#define hypre_OverlapDataNumLocalRows(data) ((data)->num_local_rows) +#define hypre_OverlapDataFirstRowIndex(data) ((data)->first_row_index) +#define hypre_OverlapDataLastRowIndex(data) ((data)->last_row_index) +#define hypre_OverlapDataNumExtendedRows(data) ((data)->num_extended_rows) +#define hypre_OverlapDataNumOverlapRows(data) ((data)->num_overlap_rows) +#define hypre_OverlapDataExtendedRowIndices(data) ((data)->extended_row_indices) +#define hypre_OverlapDataRowIsOwned(data) ((data)->row_is_owned) +#define hypre_OverlapDataOverlapCommPkg(data) ((data)->overlap_comm_pkg) +#define hypre_OverlapDataExternalMatrix(data) ((data)->A_ext) +#define hypre_OverlapDataExternalRowMap(data) ((data)->ext_map) + +#endif /* hypre_PAR_CSR_OVERLAP_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /****************************************************************************** * * Tree structure for keeping track of numbers (e.g. column numbers) - @@ -1311,6 +1376,19 @@ HYPRE_Int hypre_ParVectorGetValuesDevice(hypre_ParVector *vector, HYPRE_Int num_ /* HYPRE_parcsr_vector.c */ HYPRE_Int hypre_ParVectorStridedCopy( hypre_ParVector *x, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, HYPRE_Complex *data ); + +/* par_csr_overlap.c */ +hypre_OverlapData* hypre_OverlapDataCreate( void ); +HYPRE_Int hypre_OverlapDataDestroy( hypre_OverlapData *overlap_data ); +HYPRE_Int hypre_ParCSRMatrixComputeOverlap( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, + hypre_OverlapData **overlap_data_ptr ); +HYPRE_Int hypre_ParCSRMatrixGetExternalMatrix( hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data ); +HYPRE_Int hypre_ParCSRMatrixCreateExtendedMatrix( hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data, + hypre_CSRMatrix **A_local_ptr, + HYPRE_BigInt **col_map_ptr, + HYPRE_Int *num_cols_local_ptr ); /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup.h b/src/parcsr_mv/_hypre_parcsr_mv_mup.h index f8ee8b1e5c..8568640a20 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup.h @@ -286,6 +286,20 @@ hypre_NumbersQuery_dbl( hypre_NumbersNode *node, const HYPRE_Int n ); HYPRE_Int hypre_NumbersQuery_long_dbl( hypre_NumbersNode *node, const HYPRE_Int n ); +hypre_OverlapData* +hypre_OverlapDataCreate_flt( void ); +hypre_OverlapData* +hypre_OverlapDataCreate_dbl( void ); +hypre_OverlapData* +hypre_OverlapDataCreate_long_dbl( void ); + +HYPRE_Int +hypre_OverlapDataDestroy_flt( hypre_OverlapData *overlap_data ); +HYPRE_Int +hypre_OverlapDataDestroy_dbl( hypre_OverlapData *overlap_data ); +HYPRE_Int +hypre_OverlapDataDestroy_long_dbl( hypre_OverlapData *overlap_data ); + void hypre_ParAat_RowSizes_flt( HYPRE_Int **C_diag_i, HYPRE_Int **C_offd_i, HYPRE_Int *B_marker, HYPRE_Int *A_diag_i, HYPRE_Int *A_diag_j, HYPRE_Int *A_offd_i, HYPRE_Int *A_offd_j, HYPRE_BigInt *A_col_map_offd, HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j, HYPRE_BigInt *A_ext_row_map, HYPRE_Int *C_diag_size, HYPRE_Int *C_offd_size, HYPRE_Int num_rows_diag_A, HYPRE_Int num_cols_offd_A, HYPRE_Int num_rows_A_ext, HYPRE_BigInt first_col_diag_A, HYPRE_BigInt first_row_index_A ); void @@ -587,6 +601,13 @@ hypre_ParCSRMatrixCompressOffdMap_dbl( hypre_ParCSRMatrix *A ); HYPRE_Int hypre_ParCSRMatrixCompressOffdMap_long_dbl( hypre_ParCSRMatrix *A ); +HYPRE_Int +hypre_ParCSRMatrixComputeOverlap_flt( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, hypre_OverlapData **overlap_data_ptr ); +HYPRE_Int +hypre_ParCSRMatrixComputeOverlap_dbl( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, hypre_OverlapData **overlap_data_ptr ); +HYPRE_Int +hypre_ParCSRMatrixComputeOverlap_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, hypre_OverlapData **overlap_data_ptr ); + HYPRE_Int hypre_ParCSRMatrixComputeScalingTagged_flt( hypre_ParCSRMatrix *A, HYPRE_Int type, HYPRE_MemoryLocation memloc_tags, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_ParVector **scaling_ptr ); HYPRE_Int @@ -636,6 +657,13 @@ hypre_ParCSRMatrixCreateAssumedPartition_dbl( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixCreateAssumedPartition_long_dbl( hypre_ParCSRMatrix *matrix ); +HYPRE_Int +hypre_ParCSRMatrixCreateExtendedMatrix_flt( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data, hypre_CSRMatrix **A_local_ptr, HYPRE_BigInt **col_map_ptr, HYPRE_Int *num_cols_local_ptr ); +HYPRE_Int +hypre_ParCSRMatrixCreateExtendedMatrix_dbl( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data, hypre_CSRMatrix **A_local_ptr, HYPRE_BigInt **col_map_ptr, HYPRE_Int *num_cols_local_ptr ); +HYPRE_Int +hypre_ParCSRMatrixCreateExtendedMatrix_long_dbl( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data, hypre_CSRMatrix **A_local_ptr, HYPRE_BigInt **col_map_ptr, HYPRE_Int *num_cols_local_ptr ); + hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromDenseBlockMatrix_flt( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts, HYPRE_BigInt *col_starts, hypre_DenseBlockMatrix *B ); hypre_ParCSRMatrix* @@ -783,6 +811,13 @@ hypre_ParCSRMatrixGenerateFFFCHost_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_mar HYPRE_Int hypre_ParCSRMatrixGenerateFFFCHost_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGetExternalMatrix_flt( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data ); +HYPRE_Int +hypre_ParCSRMatrixGetExternalMatrix_dbl( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data ); +HYPRE_Int +hypre_ParCSRMatrixGetExternalMatrix_long_dbl( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data ); + HYPRE_Int hypre_ParCSRMatrixGetLocalRange_flt( hypre_ParCSRMatrix *matrix, HYPRE_BigInt *row_start, HYPRE_BigInt *row_end, HYPRE_BigInt *col_start, HYPRE_BigInt *col_end ); HYPRE_Int diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h b/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h index 0863b0b593..81ac951636 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h @@ -95,6 +95,8 @@ #define hypre_NumbersNEntered HYPRE_FIXEDPRECISION_FUNC ( hypre_NumbersNEntered ) #define hypre_NumbersNewNode HYPRE_FIXEDPRECISION_FUNC ( hypre_NumbersNewNode ) #define hypre_NumbersQuery HYPRE_FIXEDPRECISION_FUNC ( hypre_NumbersQuery ) +#define hypre_OverlapDataCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_OverlapDataCreate ) +#define hypre_OverlapDataDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_OverlapDataDestroy ) #define hypre_ParAat_RowSizes HYPRE_FIXEDPRECISION_FUNC ( hypre_ParAat_RowSizes ) #define hypre_ParBooleanAAt HYPRE_FIXEDPRECISION_FUNC ( hypre_ParBooleanAAt ) #define hypre_ParBooleanMatmul HYPRE_FIXEDPRECISION_FUNC ( hypre_ParBooleanMatmul ) @@ -142,6 +144,7 @@ #define hypre_ParCSRMatrixColSum HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixColSum ) #define hypre_ParCSRMatrixColSumHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixColSumHost ) #define hypre_ParCSRMatrixCompressOffdMap HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCompressOffdMap ) +#define hypre_ParCSRMatrixComputeOverlap HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixComputeOverlap ) #define hypre_ParCSRMatrixComputeScalingTagged HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixComputeScalingTagged ) #define hypre_ParCSRMatrixCopy HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopy ) #define hypre_ParCSRMatrixCopyColMapOffdToDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopyColMapOffdToDevice ) @@ -149,6 +152,7 @@ #define hypre_ParCSRMatrixCopy_C HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopy_C ) #define hypre_ParCSRMatrixCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreate ) #define hypre_ParCSRMatrixCreateAssumedPartition HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateAssumedPartition ) +#define hypre_ParCSRMatrixCreateExtendedMatrix HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateExtendedMatrix ) #define hypre_ParCSRMatrixCreateFromDenseBlockMatrix HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateFromDenseBlockMatrix ) #define hypre_ParCSRMatrixCreateFromParVector HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateFromParVector ) #define hypre_ParCSRMatrixDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixDestroy ) @@ -171,6 +175,7 @@ #define hypre_ParCSRMatrixGenerateFFFC3 HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFC3 ) #define hypre_ParCSRMatrixGenerateFFFCD3 HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFCD3 ) #define hypre_ParCSRMatrixGenerateFFFCHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFCHost ) +#define hypre_ParCSRMatrixGetExternalMatrix HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetExternalMatrix ) #define hypre_ParCSRMatrixGetLocalRange HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetLocalRange ) #define hypre_ParCSRMatrixGetRow HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetRow ) #define hypre_ParCSRMatrixGetRowHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetRowHost ) diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h b/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h index 7f9aa3906c..fb125c2693 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h @@ -92,6 +92,8 @@ #undef hypre_NumbersNEntered #undef hypre_NumbersNewNode #undef hypre_NumbersQuery +#undef hypre_OverlapDataCreate +#undef hypre_OverlapDataDestroy #undef hypre_ParAat_RowSizes #undef hypre_ParBooleanAAt #undef hypre_ParBooleanMatmul @@ -139,6 +141,7 @@ #undef hypre_ParCSRMatrixColSum #undef hypre_ParCSRMatrixColSumHost #undef hypre_ParCSRMatrixCompressOffdMap +#undef hypre_ParCSRMatrixComputeOverlap #undef hypre_ParCSRMatrixComputeScalingTagged #undef hypre_ParCSRMatrixCopy #undef hypre_ParCSRMatrixCopyColMapOffdToDevice @@ -146,6 +149,7 @@ #undef hypre_ParCSRMatrixCopy_C #undef hypre_ParCSRMatrixCreate #undef hypre_ParCSRMatrixCreateAssumedPartition +#undef hypre_ParCSRMatrixCreateExtendedMatrix #undef hypre_ParCSRMatrixCreateFromDenseBlockMatrix #undef hypre_ParCSRMatrixCreateFromParVector #undef hypre_ParCSRMatrixDestroy @@ -168,6 +172,7 @@ #undef hypre_ParCSRMatrixGenerateFFFC3 #undef hypre_ParCSRMatrixGenerateFFFCD3 #undef hypre_ParCSRMatrixGenerateFFFCHost +#undef hypre_ParCSRMatrixGetExternalMatrix #undef hypre_ParCSRMatrixGetLocalRange #undef hypre_ParCSRMatrixGetRow #undef hypre_ParCSRMatrixGetRowHost diff --git a/src/parcsr_mv/headers b/src/parcsr_mv/headers index c7e6b97d09..a03ebe9074 100755 --- a/src/parcsr_mv/headers +++ b/src/parcsr_mv/headers @@ -42,6 +42,7 @@ cat par_csr_assumed_part.h >> $INTERNAL_HEADER cat new_commpkg.h >> $INTERNAL_HEADER cat par_vector.h >> $INTERNAL_HEADER cat par_csr_matrix.h >> $INTERNAL_HEADER +cat par_csr_overlap.h >> $INTERNAL_HEADER cat numbers.h >> $INTERNAL_HEADER cat par_chord_matrix.h >> $INTERNAL_HEADER cat par_make_system.h >> $INTERNAL_HEADER diff --git a/src/parcsr_mv/mup.fixed b/src/parcsr_mv/mup.fixed index e26fcfd8e0..41f71207a2 100644 --- a/src/parcsr_mv/mup.fixed +++ b/src/parcsr_mv/mup.fixed @@ -37,6 +37,8 @@ hypre_NumbersEnter hypre_NumbersNEntered hypre_NumbersNewNode hypre_NumbersQuery +hypre_OverlapDataCreate +hypre_OverlapDataDestroy hypre_ParAat_RowSizes hypre_ParBooleanAAt hypre_ParBooleanMatmul @@ -84,6 +86,7 @@ hypre_ParCSRMatrixClone_v2 hypre_ParCSRMatrixColSum hypre_ParCSRMatrixColSumHost hypre_ParCSRMatrixCompressOffdMap +hypre_ParCSRMatrixComputeOverlap hypre_ParCSRMatrixComputeScalingTagged hypre_ParCSRMatrixCopy hypre_ParCSRMatrixCopyColMapOffdToDevice @@ -91,6 +94,7 @@ hypre_ParCSRMatrixCopyColMapOffdToHost hypre_ParCSRMatrixCopy_C hypre_ParCSRMatrixCreate hypre_ParCSRMatrixCreateAssumedPartition +hypre_ParCSRMatrixCreateExtendedMatrix hypre_ParCSRMatrixCreateFromDenseBlockMatrix hypre_ParCSRMatrixCreateFromParVector hypre_ParCSRMatrixDestroy @@ -113,6 +117,7 @@ hypre_ParCSRMatrixGenerateFFFC hypre_ParCSRMatrixGenerateFFFC3 hypre_ParCSRMatrixGenerateFFFCD3 hypre_ParCSRMatrixGenerateFFFCHost +hypre_ParCSRMatrixGetExternalMatrix hypre_ParCSRMatrixGetLocalRange hypre_ParCSRMatrixGetRow hypre_ParCSRMatrixGetRowHost diff --git a/src/parcsr_mv/mup_fixed.c b/src/parcsr_mv/mup_fixed.c index 53a37d5e31..3cb4fe04c1 100644 --- a/src/parcsr_mv/mup_fixed.c +++ b/src/parcsr_mv/mup_fixed.c @@ -320,6 +320,22 @@ hypre_NumbersQuery( hypre_NumbersNode *node, const HYPRE_Int n ) /*--------------------------------------------------------------------------*/ +hypre_OverlapData* +hypre_OverlapDataCreate( void ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_OverlapDataCreate)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_OverlapDataDestroy( hypre_OverlapData *overlap_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_OverlapDataDestroy)( overlap_data ); +} + +/*--------------------------------------------------------------------------*/ + void hypre_ParAat_RowSizes( HYPRE_Int **C_diag_i, HYPRE_Int **C_offd_i, HYPRE_Int *B_marker, HYPRE_Int *A_diag_i, HYPRE_Int *A_diag_j, HYPRE_Int *A_offd_i, HYPRE_Int *A_offd_j, HYPRE_BigInt *A_col_map_offd, HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j, HYPRE_BigInt *A_ext_row_map, HYPRE_Int *C_diag_size, HYPRE_Int *C_offd_size, HYPRE_Int num_rows_diag_A, HYPRE_Int num_cols_offd_A, HYPRE_Int num_rows_A_ext, HYPRE_BigInt first_col_diag_A, HYPRE_BigInt first_row_index_A ) { @@ -664,6 +680,14 @@ hypre_ParCSRMatrixCompressOffdMap( hypre_ParCSRMatrix *A ) /*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_ParCSRMatrixComputeOverlap( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, hypre_OverlapData **overlap_data_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixComputeOverlap)( A, overlap_order, overlap_data_ptr ); +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixComputeScalingTagged( hypre_ParCSRMatrix *A, HYPRE_Int type, HYPRE_MemoryLocation memloc_tags, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_ParVector **scaling_ptr ) { @@ -720,6 +744,14 @@ hypre_ParCSRMatrixCreateAssumedPartition( hypre_ParCSRMatrix *matrix ) /*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_ParCSRMatrixCreateExtendedMatrix( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data, hypre_CSRMatrix **A_local_ptr, HYPRE_BigInt **col_map_ptr, HYPRE_Int *num_cols_local_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixCreateExtendedMatrix)( A, overlap_data, A_local_ptr, col_map_ptr, num_cols_local_ptr ); +} + +/*--------------------------------------------------------------------------*/ + hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromDenseBlockMatrix( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts, HYPRE_BigInt *col_starts, hypre_DenseBlockMatrix *B ) { @@ -888,6 +920,14 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, /*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_ParCSRMatrixGetExternalMatrix( hypre_ParCSRMatrix *A, hypre_OverlapData *overlap_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGetExternalMatrix)( A, overlap_data ); +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixGetLocalRange( hypre_ParCSRMatrix *matrix, HYPRE_BigInt *row_start, HYPRE_BigInt *row_end, HYPRE_BigInt *col_start, HYPRE_BigInt *col_end ) { diff --git a/src/parcsr_mv/par_csr_matrix.c b/src/parcsr_mv/par_csr_matrix.c index 9834d50225..876f457078 100644 --- a/src/parcsr_mv/par_csr_matrix.c +++ b/src/parcsr_mv/par_csr_matrix.c @@ -995,7 +995,7 @@ hypre_ParCSRMatrixPrintIJ( const hypre_ParCSRMatrix *matrix, if ((file = fopen(new_filename, "w")) == NULL) { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: can't open output file %s\n"); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: can't open output file\n"); return hypre_error_flag; } @@ -1453,7 +1453,7 @@ hypre_ParCSRMatrixReadIJ( MPI_Comm comm, if ((file = fopen(new_filename, "r")) == NULL) { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: can't open output file %s\n"); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: can't open output file\n"); return hypre_error_flag; } diff --git a/src/parcsr_mv/par_csr_overlap.c b/src/parcsr_mv/par_csr_overlap.c new file mode 100644 index 0000000000..80f5506b9d --- /dev/null +++ b/src/parcsr_mv/par_csr_overlap.c @@ -0,0 +1,738 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** + * + * Overlapping domain decomposition computation for ParCSR matrices + * + *****************************************************************************/ + +#include "_hypre_utilities.h" +#include "_hypre_parcsr_mv.h" + +/*-------------------------------------------------------------------------- + * Create and initialize an overlap data structure. + *--------------------------------------------------------------------------*/ + +hypre_OverlapData* +hypre_OverlapDataCreate(void) +{ + hypre_OverlapData *overlap_data; + + overlap_data = hypre_CTAlloc(hypre_OverlapData, 1, HYPRE_MEMORY_HOST); + + hypre_OverlapDataOverlapOrder(overlap_data) = 0; + hypre_OverlapDataNumLocalRows(overlap_data) = 0; + hypre_OverlapDataFirstRowIndex(overlap_data) = 0; + hypre_OverlapDataLastRowIndex(overlap_data) = -1; + hypre_OverlapDataNumExtendedRows(overlap_data) = 0; + hypre_OverlapDataNumOverlapRows(overlap_data) = 0; + hypre_OverlapDataExtendedRowIndices(overlap_data) = NULL; + hypre_OverlapDataRowIsOwned(overlap_data) = NULL; + hypre_OverlapDataOverlapCommPkg(overlap_data) = NULL; + hypre_OverlapDataExternalMatrix(overlap_data) = NULL; + hypre_OverlapDataExternalRowMap(overlap_data) = NULL; + + return overlap_data; +} + +/*-------------------------------------------------------------------------- + * Destroy an overlap data structure and free all associated memory. + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_OverlapDataDestroy(hypre_OverlapData *overlap_data) +{ + if (overlap_data) + { + hypre_TFree(hypre_OverlapDataExtendedRowIndices(overlap_data), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_OverlapDataRowIsOwned(overlap_data), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_OverlapDataExternalRowMap(overlap_data), HYPRE_MEMORY_HOST); + + if (hypre_OverlapDataOverlapCommPkg(overlap_data)) + { + hypre_MatvecCommPkgDestroy(hypre_OverlapDataOverlapCommPkg(overlap_data)); + } + + if (hypre_OverlapDataExternalMatrix(overlap_data)) + { + hypre_CSRMatrixDestroy(hypre_OverlapDataExternalMatrix(overlap_data)); + } + + hypre_TFree(overlap_data, HYPRE_MEMORY_HOST); + } + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * Check if a sorted BigInt array contains a value using binary search. + *--------------------------------------------------------------------------*/ + +static HYPRE_Int +hypre_BigIntArrayContains(HYPRE_BigInt *array, HYPRE_Int size, HYPRE_BigInt value) +{ + HYPRE_Int left = 0; + HYPRE_Int right = size - 1; + + while (left <= right) + { + HYPRE_Int mid = (left + right) / 2; + if (array[mid] == value) + { + return 1; + } + else if (array[mid] < value) + { + left = mid + 1; + } + else + { + right = mid - 1; + } + } + + return 0; +} + +/*-------------------------------------------------------------------------- + * Compute the union of two sorted BigInt arrays, returning a new sorted array. + *--------------------------------------------------------------------------*/ + +static HYPRE_BigInt* +hypre_BigIntArrayUnion(HYPRE_BigInt *arr1, HYPRE_Int size1, + HYPRE_BigInt *arr2, HYPRE_Int size2, + HYPRE_Int *result_size) +{ + HYPRE_Int alloc_size = size1 + size2; + HYPRE_BigInt *result; + HYPRE_Int i = 0, j = 0, k = 0; + + if (alloc_size == 0) + { + *result_size = 0; + return NULL; + } + + result = hypre_TAlloc(HYPRE_BigInt, alloc_size, HYPRE_MEMORY_HOST); + + while (i < size1 && j < size2) + { + if (arr1[i] < arr2[j]) + { + result[k++] = arr1[i++]; + } + else if (arr1[i] > arr2[j]) + { + result[k++] = arr2[j++]; + } + else + { + result[k++] = arr1[i++]; + j++; + } + } + + while (i < size1) + { + result[k++] = arr1[i++]; + } + + while (j < size2) + { + result[k++] = arr2[j++]; + } + + *result_size = k; + + return result; +} + +/*-------------------------------------------------------------------------- + * Compute the overlapping domain decomposition for a ParCSR matrix. + * + * Given a matrix A and overlap order delta, this function computes + * the extended set of row indices that includes all rows within + * delta hops of the locally owned rows. + * + * Arguments: + * A - Input ParCSR matrix + * overlap_order - Number of overlap levels (delta >= 0) + * overlap_data_ptr - Output overlap data structure + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixComputeOverlap(hypre_ParCSRMatrix *A, + HYPRE_Int overlap_order, + hypre_OverlapData **overlap_data_ptr) +{ + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + HYPRE_Int num_procs, my_id; + hypre_OverlapData *overlap_data; + + /* Matrix data */ + hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); + hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); + HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); + HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); + HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); + + HYPRE_Int num_rows = hypre_CSRMatrixNumRows(diag); + HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); + HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); + HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A); + + /* Working sets */ + HYPRE_BigInt *current_set = NULL; + HYPRE_Int current_size = 0; + HYPRE_BigInt *new_set = NULL; + HYPRE_Int new_size = 0; + HYPRE_BigInt *external_indices = NULL; + HYPRE_Int num_external = 0; + + HYPRE_Int level, i, j, jj; + + hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm_rank(comm, &my_id); + + /* Create overlap data structure */ + overlap_data = hypre_OverlapDataCreate(); + hypre_OverlapDataOverlapOrder(overlap_data) = overlap_order; + hypre_OverlapDataNumLocalRows(overlap_data) = num_rows; + hypre_OverlapDataFirstRowIndex(overlap_data) = first_row; + hypre_OverlapDataLastRowIndex(overlap_data) = first_row + num_rows - 1; + + /* Initialize current set with local rows */ + current_size = num_rows; + if (num_rows > 0) + { + current_set = hypre_TAlloc(HYPRE_BigInt, num_rows, HYPRE_MEMORY_HOST); + for (i = 0; i < num_rows; i++) + { + current_set[i] = first_row + (HYPRE_BigInt) i; + } + } + + /* If overlap_order is 0, we're done - just local rows */ + if (overlap_order == 0 || num_procs == 1) + { + hypre_OverlapDataNumExtendedRows(overlap_data) = num_rows; + hypre_OverlapDataNumOverlapRows(overlap_data) = 0; + hypre_OverlapDataExtendedRowIndices(overlap_data) = current_set; + + /* Build ownership array - all owned */ + if (num_rows > 0) + { + hypre_OverlapDataRowIsOwned(overlap_data) = hypre_TAlloc(HYPRE_Int, num_rows, HYPRE_MEMORY_HOST); + for (i = 0; i < num_rows; i++) + { + hypre_OverlapDataRowIsOwned(overlap_data)[i] = 1; + } + } + + *overlap_data_ptr = overlap_data; + return hypre_error_flag; + } + + /* Iteratively expand the set for each level of overlap */ + for (level = 0; level < overlap_order; level++) + { + /* Find all neighbors of current set that are not already in current set */ + /* First, collect potential new external indices */ + HYPRE_BigInt *potential_external = NULL; + HYPRE_Int num_potential = 0; + HYPRE_Int potential_alloc = 0; + + /* Estimate allocation */ + potential_alloc = num_cols_offd; + if (potential_alloc > 0) + { + potential_external = hypre_TAlloc(HYPRE_BigInt, potential_alloc, HYPRE_MEMORY_HOST); + } + + /* For each row in current set, find its off-processor neighbors */ + for (i = 0; i < current_size; i++) + { + HYPRE_BigInt global_row = current_set[i]; + + /* Check if this row is local */ + if (global_row >= first_row && global_row < first_row + num_rows) + { + HYPRE_Int local_row = (HYPRE_Int)(global_row - first_row); + + /* Check off-diagonal neighbors */ + for (jj = offd_i[local_row]; jj < offd_i[local_row + 1]; jj++) + { + HYPRE_Int col = offd_j[jj]; + HYPRE_BigInt global_col = col_map_offd[col]; + + /* Check if already in current set */ + if (!hypre_BigIntArrayContains(current_set, current_size, global_col)) + { + /* Check if already in potential list */ + HYPRE_Int found = 0; + for (j = 0; j < num_potential; j++) + { + if (potential_external[j] == global_col) + { + found = 1; + break; + } + } + + if (!found) + { + /* Add to potential list */ + if (num_potential >= potential_alloc) + { + potential_alloc = hypre_max(2 * potential_alloc, num_potential + 100); + potential_external = hypre_TReAlloc(potential_external, HYPRE_BigInt, + potential_alloc, HYPRE_MEMORY_HOST); + } + potential_external[num_potential++] = global_col; + } + } + } + } + } + + /* Sort potential external indices */ + if (num_potential > 1) + { + hypre_BigQsort0(potential_external, 0, num_potential - 1); + } + + /* At this point, for multi-processor case with overlap > 1, + * we need to communicate to find rows that are neighbors of external rows. + * For level > 0, we need to get the sparsity pattern of external rows + * to find their neighbors. */ + + if (level > 0) + { + hypre_ParCSRCommPkg *ext_comm_pkg = NULL; + hypre_CSRMatrix *A_ext = NULL; + void *request = NULL; + + /* Build comm pkg for external indices (works with num_external = 0) */ + hypre_ParCSRFindExtendCommPkg(comm, global_num_rows, first_row, num_rows, + hypre_ParCSRMatrixRowStarts(A), + hypre_ParCSRMatrixAssumedPartition(A), + num_external, external_indices, &ext_comm_pkg); + + /* Fetch graph of external connections A_ext (pattern only, no data needed) */ + /* All processes must call this, even with num_external = 0 */ + hypre_ParcsrGetExternalRowsInit(A, num_external, external_indices, + ext_comm_pkg, 0, &request); + A_ext = hypre_ParcsrGetExternalRowsWait(request); + + /* Find neighbors of external rows (only if this process received rows) */ + if (A_ext) + { + HYPRE_Int *ext_i = hypre_CSRMatrixI(A_ext); + HYPRE_BigInt *ext_j = hypre_CSRMatrixBigJ(A_ext); + HYPRE_Int ext_num_rows = hypre_CSRMatrixNumRows(A_ext); + + for (i = 0; i < ext_num_rows; i++) + { + for (jj = ext_i[i]; jj < ext_i[i + 1]; jj++) + { + HYPRE_BigInt neighbor = ext_j[jj]; + + /* Check if already in current set or potential */ + if (!hypre_BigIntArrayContains(current_set, current_size, neighbor)) + { + HYPRE_Int found = 0; + for (j = 0; j < num_potential; j++) + { + if (potential_external[j] == neighbor) + { + found = 1; + break; + } + } + + if (!found) + { + if (num_potential >= potential_alloc) + { + potential_alloc = hypre_max(2 * potential_alloc, num_potential + 100); + potential_external = hypre_TReAlloc(potential_external, HYPRE_BigInt, + potential_alloc, HYPRE_MEMORY_HOST); + } + potential_external[num_potential++] = neighbor; + } + } + } + } + + hypre_CSRMatrixDestroy(A_ext); + } + + if (ext_comm_pkg) + { + hypre_MatvecCommPkgDestroy(ext_comm_pkg); + } + + /* Re-sort after adding more */ + if (num_potential > 1) + { + hypre_BigQsort0(potential_external, 0, num_potential - 1); + + /* Remove duplicates */ + HYPRE_Int unique_count = 1; + for (i = 1; i < num_potential; i++) + { + if (potential_external[i] != potential_external[unique_count - 1]) + { + potential_external[unique_count++] = potential_external[i]; + } + } + num_potential = unique_count; + } + } + + /* Merge current set with potential external to get new set */ + new_set = hypre_BigIntArrayUnion(current_set, current_size, + potential_external, num_potential, + &new_size); + + /* Update external indices for next level */ + hypre_TFree(external_indices, HYPRE_MEMORY_HOST); + num_external = num_potential; + external_indices = potential_external; + + /* Replace current set */ + hypre_TFree(current_set, HYPRE_MEMORY_HOST); + current_set = new_set; + current_size = new_size; + } + + /* Store final extended set */ + hypre_OverlapDataNumExtendedRows(overlap_data) = current_size; + hypre_OverlapDataNumOverlapRows(overlap_data) = current_size - num_rows; + hypre_OverlapDataExtendedRowIndices(overlap_data) = current_set; + + /* Build ownership array */ + if (current_size > 0) + { + hypre_OverlapDataRowIsOwned(overlap_data) = hypre_TAlloc(HYPRE_Int, current_size, + HYPRE_MEMORY_HOST); + for (i = 0; i < current_size; i++) + { + HYPRE_BigInt row = current_set[i]; + if (row >= first_row && row < first_row + num_rows) + { + hypre_OverlapDataRowIsOwned(overlap_data)[i] = 1; + } + else + { + hypre_OverlapDataRowIsOwned(overlap_data)[i] = 0; + } + } + } + + /* Build communication package for overlap if needed */ + if (current_size > num_rows && num_procs > 1) + { + /* Collect external row indices */ + HYPRE_BigInt *ext_indices = hypre_TAlloc(HYPRE_BigInt, current_size - num_rows, HYPRE_MEMORY_HOST); + HYPRE_Int ext_count = 0; + + for (i = 0; i < current_size; i++) + { + if (!hypre_OverlapDataRowIsOwned(overlap_data)[i]) + { + ext_indices[ext_count++] = current_set[i]; + } + } + + /* Build comm pkg */ + hypre_ParCSRFindExtendCommPkg(comm, global_num_rows, first_row, num_rows, + hypre_ParCSRMatrixRowStarts(A), + hypre_ParCSRMatrixAssumedPartition(A), + ext_count, ext_indices, + &hypre_OverlapDataOverlapCommPkg(overlap_data)); + + /* Store external row map */ + hypre_OverlapDataExternalRowMap(overlap_data) = ext_indices; + } + + /* Clean up */ + hypre_TFree(external_indices, HYPRE_MEMORY_HOST); + + *overlap_data_ptr = overlap_data; + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * Fetch the actual CSR matrix including data for overlap rows from + * neighboring processors. + * + * Arguments: + * A - Input ParCSR matrix + * overlap_data - Overlap data with communication package + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGetExternalMatrix(hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data) +{ + HYPRE_Int num_external; + HYPRE_BigInt *ext_indices; + hypre_ParCSRCommPkg *comm_pkg; + void *request = NULL; + + if (!overlap_data) + { + return hypre_error_flag; + } + + num_external = hypre_OverlapDataNumOverlapRows(overlap_data); + ext_indices = hypre_OverlapDataExternalRowMap(overlap_data); + comm_pkg = hypre_OverlapDataOverlapCommPkg(overlap_data); + + /* If no external rows, nothing to do */ + if (num_external == 0 || !comm_pkg) + { + return hypre_error_flag; + } + + /* Destroy existing external matrix if any */ + if (hypre_OverlapDataExternalMatrix(overlap_data)) + { + hypre_CSRMatrixDestroy(hypre_OverlapDataExternalMatrix(overlap_data)); + hypre_OverlapDataExternalMatrix(overlap_data) = NULL; + } + + /* Fetch external matrix (with data) */ + hypre_ParcsrGetExternalRowsInit(A, num_external, ext_indices, comm_pkg, 1, &request); + hypre_OverlapDataExternalMatrix(overlap_data) = hypre_ParcsrGetExternalRowsWait(request); + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * Extract the local submatrix including overlap rows. + * Returns a Square CSR matrix containing all rows in the extended domain, + * with column indices restricted to only those in the extended domain. + * Entries referencing columns outside the extended domain are dropped. + * + * Arguments: + * A - Input ParCSR matrix + * overlap_data - Overlap data with external rows + * A_local_ptr - Output CSR matrix for local extended domain (square) + * col_map_ptr - Output array mapping local col index to global col index + * num_cols_local_ptr - Output number of columns in local matrix + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixCreateExtendedMatrix(hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data, + hypre_CSRMatrix **A_local_ptr, + HYPRE_BigInt **col_map_ptr, + HYPRE_Int *num_cols_local_ptr) +{ + hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); + hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); + HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); + HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); + HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); + HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); + HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); + HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); + HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); + HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); + HYPRE_BigInt first_col = hypre_ParCSRMatrixFirstColDiag(A); + + hypre_CSRMatrix *A_ext = hypre_OverlapDataExternalMatrix(overlap_data); + HYPRE_Int *ext_i = NULL; + HYPRE_BigInt *ext_j = NULL; + HYPRE_Complex *ext_data = NULL; + HYPRE_Int num_ext_rows = 0; + + HYPRE_BigInt *extended_rows = hypre_OverlapDataExtendedRowIndices(overlap_data); + HYPRE_Int num_extended = hypre_OverlapDataNumExtendedRows(overlap_data); + HYPRE_Int *row_is_owned = hypre_OverlapDataRowIsOwned(overlap_data); + + hypre_CSRMatrix *A_local; + HYPRE_Int *A_local_i; + HYPRE_Int *A_local_j; + HYPRE_Complex *A_local_data; + HYPRE_Int A_local_nnz; + + HYPRE_BigInt *col_map = NULL; + HYPRE_Int num_cols_local; + + HYPRE_Int i, jj, k; + HYPRE_Int ext_row_counter = 0; + + /* For a square matrix, the column map IS the extended rows (which is already sorted) */ + num_cols_local = num_extended; + col_map = hypre_TAlloc(HYPRE_BigInt, num_cols_local, HYPRE_MEMORY_HOST); + for (i = 0; i < num_cols_local; i++) + { + col_map[i] = extended_rows[i]; + } + + /* Get external rows data */ + if (A_ext) + { + ext_i = hypre_CSRMatrixI(A_ext); + ext_j = hypre_CSRMatrixBigJ(A_ext); + ext_data = hypre_CSRMatrixData(A_ext); + num_ext_rows = hypre_CSRMatrixNumRows(A_ext); + } + + /* First pass: count nonzeros (only those with columns in extended domain) */ + A_local_nnz = 0; + ext_row_counter = 0; + + for (i = 0; i < num_extended; i++) + { + HYPRE_BigInt global_row = extended_rows[i]; + + if (row_is_owned[i]) + { + /* Local row */ + HYPRE_Int local_row = (HYPRE_Int)(global_row - first_row); + + /* Count diagonal entries that are in extended domain */ + for (jj = diag_i[local_row]; jj < diag_i[local_row + 1]; jj++) + { + HYPRE_BigInt global_col = first_col + (HYPRE_BigInt) diag_j[jj]; + /* Check if column is in extended domain */ + if (hypre_BigBinarySearch(col_map, global_col, num_cols_local) >= 0) + { + A_local_nnz++; + } + } + + /* Count off-diagonal entries that are in extended domain */ + for (jj = offd_i[local_row]; jj < offd_i[local_row + 1]; jj++) + { + HYPRE_BigInt global_col = col_map_offd[offd_j[jj]]; + if (hypre_BigBinarySearch(col_map, global_col, num_cols_local) >= 0) + { + A_local_nnz++; + } + } + } + else + { + /* External row */ + if (A_ext && ext_row_counter < num_ext_rows) + { + for (jj = ext_i[ext_row_counter]; jj < ext_i[ext_row_counter + 1]; jj++) + { + HYPRE_BigInt global_col = ext_j[jj]; + if (hypre_BigBinarySearch(col_map, global_col, num_cols_local) >= 0) + { + A_local_nnz++; + } + } + ext_row_counter++; + } + } + } + + /* Create local CSR matrix (square: num_extended x num_extended) */ + A_local = hypre_CSRMatrixCreate(num_extended, num_cols_local, A_local_nnz); + hypre_CSRMatrixInitialize(A_local); + + A_local_i = hypre_CSRMatrixI(A_local); + A_local_j = hypre_CSRMatrixJ(A_local); + A_local_data = hypre_CSRMatrixData(A_local); + + /* Second pass: fill the matrix */ + k = 0; + ext_row_counter = 0; + A_local_i[0] = 0; + + for (i = 0; i < num_extended; i++) + { + HYPRE_BigInt global_row = extended_rows[i]; + + if (row_is_owned[i]) + { + /* Local row */ + HYPRE_Int local_row = (HYPRE_Int)(global_row - first_row); + + /* Copy diagonal entries that are in extended domain */ + for (jj = diag_i[local_row]; jj < diag_i[local_row + 1]; jj++) + { + HYPRE_BigInt global_col = first_col + (HYPRE_BigInt) diag_j[jj]; + + /* Find local column index using binary search */ + HYPRE_Int local_col = hypre_BigBinarySearch(col_map, global_col, num_cols_local); + if (local_col >= 0) + { + A_local_j[k] = local_col; + A_local_data[k] = diag_data[jj]; + k++; + } + } + + /* Copy off-diagonal entries that are in extended domain */ + for (jj = offd_i[local_row]; jj < offd_i[local_row + 1]; jj++) + { + HYPRE_BigInt global_col = col_map_offd[offd_j[jj]]; + + HYPRE_Int local_col = hypre_BigBinarySearch(col_map, global_col, num_cols_local); + if (local_col >= 0) + { + A_local_j[k] = local_col; + A_local_data[k] = offd_data[jj]; + k++; + } + } + } + else + { + /* External row */ + if (A_ext && ext_row_counter < num_ext_rows) + { + for (jj = ext_i[ext_row_counter]; jj < ext_i[ext_row_counter + 1]; jj++) + { + HYPRE_BigInt global_col = ext_j[jj]; + + HYPRE_Int local_col = hypre_BigBinarySearch(col_map, global_col, num_cols_local); + if (local_col >= 0) + { + A_local_j[k] = local_col; + A_local_data[k] = ext_data[jj]; + k++; + } + } + ext_row_counter++; + } + } + + A_local_i[i + 1] = k; + } + + /* Sort each row by column index */ + for (i = 0; i < num_extended; i++) + { + HYPRE_Int row_start = A_local_i[i]; + HYPRE_Int row_nnz = A_local_i[i + 1] - row_start; + if (row_nnz > 1) + { + hypre_qsort2(&A_local_j[row_start], &A_local_data[row_start], 0, row_nnz - 1); + } + } + + /* Reorder the matrix to put the diagonal first */ + hypre_CSRMatrixReorder(A_local); + + /* Return results */ + *A_local_ptr = A_local; + *col_map_ptr = col_map; + *num_cols_local_ptr = num_cols_local; + + return hypre_error_flag; +} diff --git a/src/parcsr_mv/par_csr_overlap.h b/src/parcsr_mv/par_csr_overlap.h new file mode 100644 index 0000000000..4003a09d61 --- /dev/null +++ b/src/parcsr_mv/par_csr_overlap.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** + * + * Header info for overlapping domain decomposition + * + *****************************************************************************/ + +#ifndef hypre_PAR_CSR_OVERLAP_HEADER +#define hypre_PAR_CSR_OVERLAP_HEADER + +/*-------------------------------------------------------------------------- + * hypre_OverlapData + * + * Data structure for overlapping domain decomposition. + * Stores information about the extended subdomain for a processor. + *--------------------------------------------------------------------------*/ + +typedef struct hypre_OverlapData_struct +{ + /* Overlap configuration */ + HYPRE_Int overlap_order; /* Overlap order (delta >= 0) */ + + /* Original local partition info */ + HYPRE_Int num_local_rows; /* Original local rows */ + HYPRE_BigInt first_row_index; /* First row owned by this proc */ + HYPRE_BigInt last_row_index; /* Last row owned by this proc */ + + /* Extended subdomain information */ + HYPRE_Int num_extended_rows; /* Total rows in extended domain */ + HYPRE_Int num_overlap_rows; /* External rows (from overlap) */ + HYPRE_BigInt *extended_row_indices; /* Global indices of all extended rows */ + HYPRE_Int *row_is_owned; /* 1 if row is owned, 0 if external */ + + /* Communication package for fetching overlap data */ + hypre_ParCSRCommPkg *overlap_comm_pkg; + + /* External CSR matrix (fetched from other procs) */ + hypre_CSRMatrix *A_ext; /* CSR matrix of external rows */ + HYPRE_BigInt *ext_map; /* Global row indices for external rows */ + +} hypre_OverlapData; + +/*-------------------------------------------------------------------------- + * Accessor macros for hypre_OverlapData + *--------------------------------------------------------------------------*/ + +#define hypre_OverlapDataOverlapOrder(data) ((data)->overlap_order) +#define hypre_OverlapDataNumLocalRows(data) ((data)->num_local_rows) +#define hypre_OverlapDataFirstRowIndex(data) ((data)->first_row_index) +#define hypre_OverlapDataLastRowIndex(data) ((data)->last_row_index) +#define hypre_OverlapDataNumExtendedRows(data) ((data)->num_extended_rows) +#define hypre_OverlapDataNumOverlapRows(data) ((data)->num_overlap_rows) +#define hypre_OverlapDataExtendedRowIndices(data) ((data)->extended_row_indices) +#define hypre_OverlapDataRowIsOwned(data) ((data)->row_is_owned) +#define hypre_OverlapDataOverlapCommPkg(data) ((data)->overlap_comm_pkg) +#define hypre_OverlapDataExternalMatrix(data) ((data)->A_ext) +#define hypre_OverlapDataExternalRowMap(data) ((data)->ext_map) + +#endif /* hypre_PAR_CSR_OVERLAP_HEADER */ diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 99907e3ad5..8f8b4583e8 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -625,3 +625,16 @@ HYPRE_Int hypre_ParVectorGetValuesDevice(hypre_ParVector *vector, HYPRE_Int num_ /* HYPRE_parcsr_vector.c */ HYPRE_Int hypre_ParVectorStridedCopy( hypre_ParVector *x, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, HYPRE_Complex *data ); + +/* par_csr_overlap.c */ +hypre_OverlapData* hypre_OverlapDataCreate( void ); +HYPRE_Int hypre_OverlapDataDestroy( hypre_OverlapData *overlap_data ); +HYPRE_Int hypre_ParCSRMatrixComputeOverlap( hypre_ParCSRMatrix *A, HYPRE_Int overlap_order, + hypre_OverlapData **overlap_data_ptr ); +HYPRE_Int hypre_ParCSRMatrixGetExternalMatrix( hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data ); +HYPRE_Int hypre_ParCSRMatrixCreateExtendedMatrix( hypre_ParCSRMatrix *A, + hypre_OverlapData *overlap_data, + hypre_CSRMatrix **A_local_ptr, + HYPRE_BigInt **col_map_ptr, + HYPRE_Int *num_cols_local_ptr ); diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 50c2b85545..69b4ba7d30 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -23,6 +23,7 @@ set(TEST_SRCS ams_driver.c struct_migrate.c ij_assembly.c + test_csr_overlap.c ) # Collect targets derived from source file names diff --git a/src/test/Makefile b/src/test/Makefile index 624b0a4da0..8cfc0ffc55 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -70,7 +70,10 @@ HYPRE_DRIVERS =\ ams_driver.c\ struct_migrate.c\ ij_mm.c\ - zboxloop.c\ + zboxloop.c + +HYPRE_UNIT_TESTS =\ + test_csr_overlap.c HYPRE_MP_DRIVERS =\ ij_mp.c\ @@ -102,8 +105,13 @@ endif HYPRE_F77_EXAMPLES_DRIVER_EXECS=${HYPRE_F77_EXAMPLES_DRIVERS:.c=} HYPRE_DRIVER_F77_EXECS=${HYPRE_DRIVERS_F77:.f=} HYPRE_DRIVER_CXX_EXECS=${HYPRE_DRIVERS_CXX:.cxx=} +HYPRE_UNIT_EXECS=${HYPRE_UNIT_TESTS:.c=} + +all: drivers tests -all: ${HYPRE_DRIVER_EXECS} +tests: ${HYPRE_UNIT_EXECS} + +drivers: ${HYPRE_DRIVER_EXECS} all77: ${HYPRE_DRIVER_F77_EXECS} @@ -120,6 +128,7 @@ distclean: clean rm -f ${HYPRE_F77_EXAMPLES_DRIVER_EXECS} rm -f ${HYPRE_DRIVER_F77_EXECS} rm -f ${HYPRE_DRIVER_CXX_EXECS} cxx_* + rm -f ${HYPRE_UNIT_EXECS} rm -f TEST_examples/*.out* rm -f TEST_examples/*.err* rm -f TEST_ij/*.out* @@ -212,6 +221,12 @@ test_mp_pcg_3d: test_mp_pcg_3d.o @echo "Building" $@ "... " ${LINK_CC} -o $@ $< ${LFLAGS} +# Unit tests + +test_csr_overlap: test_csr_overlap.o + @echo "Building" $@ "... " + ${LINK_CC} -o $@ $< ${LFLAGS} + # RDF: Keep these for now hypre_set_precond: hypre_set_precond.o diff --git a/src/test/test_csr_overlap.c b/src/test/test_csr_overlap.c new file mode 100644 index 0000000000..0fb8e5862a --- /dev/null +++ b/src/test/test_csr_overlap.c @@ -0,0 +1,2495 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/*-------------------------------------------------------------------------- + * Test driver for overlap matrix extraction + * + * This test file contains: + * 1. Unit tests for overlap extraction with small hard-coded matrices + * 2. Benchmarking with generated laplacian matrices + *--------------------------------------------------------------------------*/ + +#include +#include +#include +#include +#include + +#include "HYPRE.h" +#include "HYPRE_parcsr_mv.h" +#include "HYPRE_parcsr_ls.h" +#include "_hypre_utilities.h" +#include "_hypre_parcsr_mv.h" + +#define CHECK_TOLERANCE 1.0e-10 + +/*-------------------------------------------------------------------------- + * Macro to print test result (PASSED/FAILED) on rank 0 + *--------------------------------------------------------------------------*/ +#define PRINT_TEST_RESULT(my_id, error) \ + do { \ + if ((my_id) == 0) \ + { \ + if ((error) == 0) \ + { \ + hypre_printf("PASSED\n"); \ + } \ + else \ + { \ + hypre_printf("FAILED\n"); \ + } \ + } \ + } while (0) + +/*-------------------------------------------------------------------------- + * Helper function to compare two CSR matrices + * Returns 0 if matrices are equal (within tolerance), 1 otherwise + * Uses Frobenius norm of the difference matrix + *--------------------------------------------------------------------------*/ +static HYPRE_Int +CompareCSRMatrices(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Real tol) +{ + HYPRE_Int num_rows_A = hypre_CSRMatrixNumRows(A); + HYPRE_Int num_cols_A = hypre_CSRMatrixNumCols(A); + HYPRE_Int num_rows_B = hypre_CSRMatrixNumRows(B); + HYPRE_Int num_cols_B = hypre_CSRMatrixNumCols(B); + hypre_CSRMatrix *diff; + HYPRE_Real fnorm_diff, fnorm_A; + HYPRE_Real rel_error; + + /* Check dimensions */ + if (num_rows_A != num_rows_B || num_cols_A != num_cols_B) + { + hypre_printf("Matrix dimensions mismatch: (%d x %d) vs (%d x %d)\n", + num_rows_A, num_cols_A, num_rows_B, num_cols_B); + return 1; + } + + /* Compute difference matrix: diff = A - B */ + diff = hypre_CSRMatrixAdd(1.0, A, -1.0, B); + if (!diff) + { + hypre_printf("Failed to compute difference matrix\n"); + return 1; + } + + /* Compute Frobenius norm of difference */ + fnorm_diff = hypre_CSRMatrixFnorm(diff); + fnorm_A = hypre_CSRMatrixFnorm(A); + + /* Compute relative error */ + rel_error = (fnorm_A > 0.0) ? (fnorm_diff / fnorm_A) : fnorm_diff; + + /* Cleanup */ + hypre_CSRMatrixDestroy(diff); + + /* Check if relative error is within tolerance */ + if (rel_error > tol) + { + hypre_printf("Matrix comparison failed: ||A - B||_F / ||A||_F = %e (tolerance = %e)\n", + rel_error, tol); + return 1; + } + + return 0; +} + +/*-------------------------------------------------------------------------- + * Create a CSR matrix from hard-coded values + * num_rows: number of rows + * num_cols: number of columns + * I: row pointer array (length num_rows+1) + * J: column index array (length nnz) + * data: value array (length nnz) + *--------------------------------------------------------------------------*/ +static hypre_CSRMatrix* +CreateCSRMatrixFromData(HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int nnz, + HYPRE_Int *I, HYPRE_Int *J, HYPRE_Real *data) +{ + hypre_CSRMatrix *matrix; + HYPRE_Int i; + + matrix = hypre_CSRMatrixCreate(num_rows, num_cols, nnz); + hypre_CSRMatrixInitialize(matrix); + + /* Copy I array */ + for (i = 0; i <= num_rows; i++) + { + hypre_CSRMatrixI(matrix)[i] = I[i]; + } + + /* Copy J and data arrays */ + for (i = 0; i < nnz; i++) + { + hypre_CSRMatrixJ(matrix)[i] = J[i]; + hypre_CSRMatrixData(matrix)[i] = data[i]; + } + + hypre_CSRMatrixNumNonzeros(matrix) = nnz; + + return matrix; +} + +/*-------------------------------------------------------------------------- + * Create a simple 1D Laplacian matrix using GenerateLaplacian + * Global size: n, partitioned across num_procs processors (1D partitioning) + *--------------------------------------------------------------------------*/ +static hypre_ParCSRMatrix* +Create1DLaplacian(MPI_Comm comm, HYPRE_Int n, HYPRE_Int num_procs, HYPRE_Int my_id) +{ + HYPRE_Real values[4]; + HYPRE_Int p, q, r; + HYPRE_Int P = num_procs, Q = 1, R = 1; + + /* Compute processor coordinates for 1D partitioning */ + p = my_id % P; + q = ((my_id - p) / P) % Q; + r = (my_id - p - P * q) / (P * Q); + + /* Set up 1D stencil values (tridiagonal) */ + values[0] = 2.0; /* diagonal */ + values[1] = -1.0; /* x-direction */ + values[2] = 0.0; /* y-direction (not used) */ + values[3] = 0.0; /* z-direction (not used) */ + + return GenerateLaplacian(comm, n, 1, 1, P, Q, R, p, q, r, values); +} + +/*-------------------------------------------------------------------------- + * Create a 2D Laplacian matrix using GenerateLaplacian + * Global size: nx x ny, partitioned across num_procs processors (1D partitioning) + *--------------------------------------------------------------------------*/ +static hypre_ParCSRMatrix* +Create2DLaplacian(MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int num_procs, HYPRE_Int my_id) +{ + HYPRE_Real values[4]; + HYPRE_Int p, q, r; + HYPRE_Int P = num_procs, Q = 1, R = 1; + + /* Compute processor coordinates for 1D partitioning */ + p = my_id % P; + q = ((my_id - p) / P) % Q; + r = (my_id - p - P * q) / (P * Q); + + /* Set up 2D 5-point stencil values */ + values[0] = 4.0; /* diagonal */ + values[1] = -1.0; /* x-direction */ + values[2] = -1.0; /* y-direction */ + values[3] = 0.0; /* z-direction (not used) */ + + return GenerateLaplacian(comm, nx, ny, 1, P, Q, R, p, q, r, values); +} + +/*-------------------------------------------------------------------------- + * Create a 2D Laplacian matrix using GenerateLaplacian with 2D partitioning + * Global size: nx x ny, partitioned across Px x Py processors (2D partitioning) + *--------------------------------------------------------------------------*/ +static hypre_ParCSRMatrix* +Create2DLaplacian2DPart(MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, + HYPRE_Int Px, HYPRE_Int Py, HYPRE_Int my_id) +{ + HYPRE_Real values[4]; + HYPRE_Int p, q, r; + HYPRE_Int P = Px, Q = Py, R = 1; + + /* Compute processor coordinates for 2D partitioning */ + p = my_id % P; + q = ((my_id - p) / P) % Q; + r = (my_id - p - P * q) / (P * Q); + + /* Set up 2D 5-point stencil values */ + values[0] = 4.0; /* diagonal */ + values[1] = -1.0; /* x-direction */ + values[2] = -1.0; /* y-direction */ + values[3] = 0.0; /* z-direction (not used) */ + + return GenerateLaplacian(comm, nx, ny, 1, P, Q, R, p, q, r, values); +} + +/*-------------------------------------------------------------------------- + * Create a 3D Laplacian matrix using GenerateLaplacian with 3D partitioning + * Global size: nx x ny x nz, partitioned across Px x Py x Pz processors + *--------------------------------------------------------------------------*/ +static hypre_ParCSRMatrix* +Create3DLaplacian3DPart(MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, + HYPRE_Int Px, HYPRE_Int Py, HYPRE_Int Pz, HYPRE_Int my_id) +{ + HYPRE_Real values[4]; + HYPRE_Int p, q, r; + HYPRE_Int P = Px, Q = Py, R = Pz; + + /* Compute processor coordinates for 3D partitioning */ + p = my_id % P; + q = ((my_id - p) / P) % Q; + r = (my_id - p - P * q) / (P * Q); + + /* Set up 3D 7-point stencil values */ + values[0] = 6.0; /* diagonal */ + values[1] = -1.0; /* x-direction */ + values[2] = -1.0; /* y-direction */ + values[3] = -1.0; /* z-direction */ + + return GenerateLaplacian(comm, nx, ny, nz, P, Q, R, p, q, r, values); +} + +/*-------------------------------------------------------------------------- + * Unit test: 1D grid with 1D partitioning, overlap=1 + * Test case: 4x4 matrix on 2 processors + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test1_Grid1D_Part1D_Overlap1(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 1; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 2 processors */ + if (num_procs >= 2) + { + participate = (my_id < 2) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test1_Grid1D_Part1D_Overlap1: Skipping (requires at least 2 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must wait for test to complete */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test1_Grid1D_Part1D_Overlap1 (2 procs): "); + } + + /* Create 4x4 1D Laplacian */ + A = Create1DLaplacian(test_comm, 4, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (test_my_id == 0) + { + /* Proc 0: Extended domain is rows 0,1,2 with columns 0,1,2 + * Global matrix row 0: [2, -1] at columns [0,1] -> [2, -1] at local cols [0,1] + * Global matrix row 1: [-1, 2, -1] at columns [0,1,2] -> [-1, 2, -1] at local cols [0,1,2] + * Global matrix row 2: [-1, 2] at columns [1,2] -> [-1, 2] at local cols [1,2] + * Expected 3x3 matrix: + */ + HYPRE_Int I_expected[4] = {0, 2, 5, 7}; + HYPRE_Int J_expected[7] = {0, 1, /* row 0 (global 0): columns 0,1 (global 0,1) */ + 0, 1, 2, /* row 1 (global 1): columns 0,1,2 (global 0,1,2) */ + 1, 2 /* row 2 (global 2): columns 1,2 (global 1,2) */ + }; + HYPRE_Real data_expected[7] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0 /* row 2 */ + }; + + A_expected = CreateCSRMatrixFromData(3, 3, 7, I_expected, J_expected, data_expected); + } + else if (test_my_id == 1) + { + /* Proc 1: Extended domain is rows 1,2,3 with columns 1,2,3 + * Expected 3x3 matrix: + */ + HYPRE_Int I_expected[4] = {0, 2, 5, 7}; + HYPRE_Int J_expected[7] = {0, 1, /* row 0 (global 1): columns 0,1 (global 1,2) */ + 0, 1, 2, /* row 1 (global 2): columns 0,1,2 (global 1,2,3) */ + 1, 2 /* row 2 (global 3): columns 1,2 (global 2,3) */ + }; + HYPRE_Real data_expected[7] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0 /* row 2 */ + }; + + A_expected = CreateCSRMatrixFromData(3, 3, 7, I_expected, J_expected, data_expected); + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test1_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test1_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 1D grid with 1D partitioning, overlap=2 + * Test case: 6x6 matrix on 3 processors + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test2_Grid1D_Part1D_Overlap2(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 2; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 3 processors */ + if (num_procs >= 3) + { + participate = (my_id < 3) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test2_Grid1D_Part1D_Overlap2: Skipping (requires at least 3 processors)\n"); + } + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test2_Grid1D_Part1D_Overlap2 (3 procs): "); + } + + /* Create 6x6 1D Laplacian */ + A = Create1DLaplacian(test_comm, 6, 3, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (test_my_id == 0) + { + /* Proc 0: Owns rows 0,1. With overlap=2: + * - 1st hop: gets row 2 (from proc 1) + * - 2nd hop: gets row 3 (via row 2 from proc 1) + * Extended domain: rows 0,1,2,3 with columns 0,1,2,3. + * Expected 4x4 matrix: + */ + HYPRE_Int I_expected[5] = {0, 2, 5, 8, 10}; + HYPRE_Int J_expected[10] = {0, 1, /* row 0 (global 0): columns 0,1 */ + 0, 1, 2, /* row 1 (global 1): columns 0,1,2 */ + 1, 2, 3, /* row 2 (global 2): columns 1,2,3 */ + 2, 3 /* row 3 (global 3): columns 2,3 */ + }; + HYPRE_Real data_expected[10] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0, -1.0, /* row 2 */ + -1.0, 2.0 /* row 3 */ + }; + + A_expected = CreateCSRMatrixFromData(4, 4, 10, I_expected, J_expected, data_expected); + } + else if (test_my_id == 1) + { + /* Proc 1: Owns rows 2,3. With overlap=2, should get rows 0,1 (from proc 0) and rows 4,5 (from proc 2) + * Extended domain (full): rows 0,1,2,3,4,5 with columns 0,1,2,3,4,5. + * Expected 6x6 matrix: + */ + HYPRE_Int I_expected[7] = {0, 2, 5, 8, 11, 14, 16}; + HYPRE_Int J_expected[16] = {0, 1, /* row 0 (global 0): columns 0,1 */ + 0, 1, 2, /* row 1 (global 1): columns 0,1,2 */ + 1, 2, 3, /* row 2 (global 2): columns 1,2,3 */ + 2, 3, 4, /* row 3 (global 3): columns 2,3,4 */ + 3, 4, 5, /* row 4 (global 4): columns 3,4,5 */ + 4, 5 /* row 5 (global 5): columns 4,5 */ + }; + HYPRE_Real data_expected[16] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0, -1.0, /* row 2 */ + -1.0, 2.0, -1.0, /* row 3 */ + -1.0, 2.0, -1.0, /* row 4 */ + -1.0, 2.0 /* row 5 */ + }; + + A_expected = CreateCSRMatrixFromData(6, 6, 16, I_expected, J_expected, data_expected); + } + else if (test_my_id == 2) + { + /* Proc 2: Owns rows 4,5. With overlap=2: + * - 1st hop: gets row 3 (from proc 1) + * - 2nd hop: gets row 2 (via row 3 from proc 1) + * Extended domain: rows 2,3,4,5 with columns 2,3,4,5. + * Expected 4x4 matrix: + */ + HYPRE_Int I_expected[5] = {0, 2, 5, 8, 10}; + HYPRE_Int J_expected[10] = {0, 1, /* row 0 (global 2): columns 0,1 (global 2,3) */ + 0, 1, 2, /* row 1 (global 3): columns 0,1,2 (global 2,3,4) */ + 1, 2, 3, /* row 2 (global 4): columns 1,2,3 (global 3,4,5) */ + 2, 3 /* row 3 (global 5): columns 2,3 (global 4,5) */ + }; + HYPRE_Real data_expected[10] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0, -1.0, /* row 2 */ + -1.0, 2.0 /* row 3 */ + }; + + A_expected = CreateCSRMatrixFromData(4, 4, 10, I_expected, J_expected, data_expected); + } + + if (A_expected) + { + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test2_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test2_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 1D grid with 1D partitioning, overlap=8 + * Test case: 8x8 matrix on 4 processors + * With overlap=8, each processor should gather the entire global matrix + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test3_Grid1D_Part1D_Overlap8(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 8; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 4 processors */ + if (num_procs >= 4) + { + participate = (my_id < 4) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test3_Grid1D_Part1D_Overlap8: Skipping (requires at least 4 processors)\n"); + } + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test3_Grid1D_Part1D_Overlap8 (4 procs): "); + } + + /* Create 8x8 1D Laplacian */ + A = Create1DLaplacian(test_comm, 8, 4, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix - with overlap=8, all processors should have the full 8x8 matrix */ + { + hypre_CSRMatrix *A_expected = NULL; + HYPRE_Int num_extended = hypre_OverlapDataNumExtendedRows(overlap_data); + + /* Expected: full 8x8 matrix for all processors */ + HYPRE_Int I_expected[9] = {0, 2, 5, 8, 11, 14, 17, 20, 22}; + HYPRE_Int J_expected[22] = {0, 1, /* row 0: columns 0,1 */ + 0, 1, 2, /* row 1: columns 0,1,2 */ + 1, 2, 3, /* row 2: columns 1,2,3 */ + 2, 3, 4, /* row 3: columns 2,3,4 */ + 3, 4, 5, /* row 4: columns 3,4,5 */ + 4, 5, 6, /* row 5: columns 4,5,6 */ + 5, 6, 7, /* row 6: columns 5,6,7 */ + 6, 7 /* row 7: columns 6,7 */ + }; + HYPRE_Real data_expected[22] = {2.0, -1.0, /* row 0 */ + -1.0, 2.0, -1.0, /* row 1 */ + -1.0, 2.0, -1.0, /* row 2 */ + -1.0, 2.0, -1.0, /* row 3 */ + -1.0, 2.0, -1.0, /* row 4 */ + -1.0, 2.0, -1.0, /* row 5 */ + -1.0, 2.0, -1.0, /* row 6 */ + -1.0, 2.0 /* row 7 */ + }; + + A_expected = CreateCSRMatrixFromData(8, 8, 22, I_expected, J_expected, data_expected); + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test3_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test3_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + + /* Verify that we got all 8 rows */ + if (num_extended != 8) + { + hypre_printf("Proc %d: Expected 8 extended rows, got %d\n", test_my_id, num_extended); + error = 1; + } + + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 2D grid with 1D partitioning, overlap=2 + * Test case: 4x4 2D grid on 2 processors + * Tests overlap extraction for 2D problem with 1D processor layout + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test4_Grid2D_Part1D_Overlap2(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 2; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 2 processors */ + if (num_procs >= 2) + { + participate = (my_id < 2) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test4_Grid2D_Part1D_Overlap2: Skipping (requires at least 2 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test4_Grid2D_Part1D_Overlap2 (2 procs): "); + } + + /* Create 4x4 2D Laplacian with 1D partitioning */ + A = Create2DLaplacian(test_comm, 4, 4, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + HYPRE_Int num_extended = hypre_OverlapDataNumExtendedRows(overlap_data); + + /* With overlap=2 on a 4x4 grid, both processors get the full 16x16 matrix */ + /* 2D 5-point stencil structure for 4x4 grid (row-major ordering) */ + HYPRE_Int I_expected[17] = {0, 3, 7, 11, 16, 20, 25, 28, 32, 36, 39, 44, 48, 53, 57, 61, 64}; + HYPRE_Int J_expected[64] = + { + 0, 1, 2, /* row 0 */ + 1, 0, 3, 8, /* row 1 */ + 2, 0, 3, 4, /* row 2 */ + 3, 1, 2, 5, 10, /* row 3 */ + 4, 2, 5, 6, /* row 4 */ + 5, 3, 4, 7, 12, /* row 5 */ + 6, 4, 7, /* row 6 */ + 7, 5, 6, 14, /* row 7 */ + 8, 1, 9, 10, /* row 8 */ + 9, 8, 11, /* row 9 */ + 10, 3, 8, 11, 12, /* row 10 */ + 11, 9, 10, 13, /* row 11 */ + 12, 5, 10, 13, 14, /* row 12 */ + 13, 11, 12, 15, /* row 13 */ + 14, 7, 12, 15, /* row 14 */ + 15, 13, 14 /* row 15 */ + }; + HYPRE_Real data_expected[64] = + { + 4.0, -1.0, -1.0, /* row 0 */ + 4.0, -1.0, -1.0, -1.0, /* row 1 */ + 4.0, -1.0, -1.0, -1.0, /* row 2 */ + 4.0, -1.0, -1.0, -1.0, -1.0, /* row 3 */ + 4.0, -1.0, -1.0, -1.0, /* row 4 */ + 4.0, -1.0, -1.0, -1.0, -1.0, /* row 5 */ + 4.0, -1.0, -1.0, /* row 6 */ + 4.0, -1.0, -1.0, -1.0, /* row 7 */ + 4.0, -1.0, -1.0, -1.0, /* row 8 */ + 4.0, -1.0, -1.0, /* row 9 */ + 4.0, -1.0, -1.0, -1.0, -1.0, /* row 10 */ + 4.0, -1.0, -1.0, -1.0, /* row 11 */ + 4.0, -1.0, -1.0, -1.0, -1.0, /* row 12 */ + 4.0, -1.0, -1.0, -1.0, /* row 13 */ + 4.0, -1.0, -1.0, -1.0, /* row 14 */ + 4.0, -1.0, -1.0 /* row 15 */ + }; + + A_expected = CreateCSRMatrixFromData(16, 16, 64, I_expected, J_expected, data_expected); + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test4_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test4_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 2D grid with 2D partitioning, overlap=2 + * Test case: 4x4 2D grid on 2x2 processor grid (4 processors) + * Tests overlap extraction for 2D problem with 2D processor layout + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test5_Grid2D_Part2D_Overlap1(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 1; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 4 processors */ + if (num_procs >= 4) + { + participate = (my_id < 4) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test5_Grid2D_Part2D_Overlap2: Skipping (requires at least 4 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test5_Grid2D_Part2D_Overlap1 (4 procs): "); + } + + /* Create 4x4 2D Laplacian with 2x2 processor partitioning */ + A = Create2DLaplacian2DPart(test_comm, 4, 4, 2, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (!A_local) + { + hypre_printf("Proc %d: Failed to extract local overlap matrix\n", test_my_id); + error = 1; + } + else + { + HYPRE_Int num_rows_local = hypre_CSRMatrixNumRows(A_local); + HYPRE_Int num_cols_local_actual = hypre_CSRMatrixNumCols(A_local); + + /* Expected matrices (8x8) for each process */ + if (num_rows_local == 8 && num_cols_local_actual == 8) + { + if (test_my_id == 0) + { + HYPRE_Int I_expected[9] = {0, 3, 7, 11, 16, 19, 22, 25, 28}; + HYPRE_Int J_expected[28] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 6, 1, 2, 3, 5, 7, 1, 4, 5, 3, 4, 5, 2, 6, 7, 3, 6, 7}; + HYPRE_Real data_expected[28] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(8, 8, 28, I_expected, J_expected, data_expected); + } + else if (test_my_id == 1) + { + HYPRE_Int I_expected[9] = {0, 3, 6, 10, 13, 18, 22, 25, 28}; + HYPRE_Int J_expected[28] = {0, 1, 2, 0, 1, 4, 0, 2, 3, 4, 2, 3, 5, 1, 2, 4, 5, 6, 3, 4, 5, 7, 4, 6, 7, 5, 6, 7}; + HYPRE_Real data_expected[28] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(8, 8, 28, I_expected, J_expected, data_expected); + } + else if (test_my_id == 2) + { + HYPRE_Int I_expected[9] = {0, 3, 6, 10, 15, 18, 22, 25, 28}; + HYPRE_Int J_expected[28] = {0, 1, 2, 0, 1, 3, 0, 2, 3, 4, 1, 2, 3, 5, 6, 2, 4, 5, 3, 4, 5, 7, 3, 6, 7, 5, 6, 7}; + HYPRE_Real data_expected[28] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(8, 8, 28, I_expected, J_expected, data_expected); + } + else if (test_my_id == 3) + { + HYPRE_Int I_expected[9] = {0, 3, 6, 9, 12, 17, 21, 25, 28}; + HYPRE_Int J_expected[28] = {0, 1, 4, 0, 1, 5, 2, 3, 4, 2, 3, 6, 0, 2, 4, 5, 6, 1, 4, 5, 7, 3, 4, 6, 7, 5, 6, 7}; + HYPRE_Real data_expected[28] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(8, 8, 28, I_expected, J_expected, data_expected); + } + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 8 x 8)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + A_expected = NULL; + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test5_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test5_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 2D grid with 2D partitioning, overlap=2 + * Test case: 4x4 2D grid on 2x2 processor grid (4 processors) + * Tests overlap extraction for 2D problem with 2D processor layout and overlap=2 + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test6_Grid2D_Part2D_Overlap2(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 2; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 4 processors */ + if (num_procs >= 4) + { + participate = (my_id < 4) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test6_Grid2D_Part2D_Overlap2: Skipping (requires at least 4 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test6_Grid2D_Part2D_Overlap2 (4 procs): "); + } + + /* Create 4x4 2D Laplacian with 2x2 processor partitioning */ + { + A = Create2DLaplacian2DPart(test_comm, 4, 4, 2, 2, test_my_id); + } + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (!A_local) + { + hypre_printf("Proc %d: Failed to extract local overlap matrix\n", test_my_id); + error = 1; + } + else + { + HYPRE_Int num_rows_local = hypre_CSRMatrixNumRows(A_local); + HYPRE_Int num_cols_local_actual = hypre_CSRMatrixNumCols(A_local); + + /* Hard-coded expected matrices from test6_expected_csr.out files (13x13 matrices) */ + if (num_rows_local == 13 && num_cols_local_actual == 13) + { + if (test_my_id == 0) + { + HYPRE_Int I_expected[14] = {0, 3, 7, 11, 16, 20, 23, 28, 31, 35, 40, 43, 46, 49}; + HYPRE_Int J_expected[49] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 8, 1, 2, 3, 6, 9, 1, 4, + 5, 6, 4, 5, 7, 3, 4, 6, 7, 12, 5, 6, 7, 2, 8, 9, 10, 3, + 8, 9, 11, 12, 8, 10, 11, 9, 10, 11, 6, 9, 12 + }; + HYPRE_Real data_expected[49] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + 4.0 + }; + A_expected = CreateCSRMatrixFromData(13, 13, 49, I_expected, J_expected, data_expected); + } + else if (test_my_id == 1) + { + HYPRE_Int I_expected[14] = {0, 3, 7, 10, 15, 19, 22, 27, 31, 34, 39, 43, 46, 49}; + HYPRE_Int J_expected[49] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 1, 2, 3, 6, 8, 1, 4, + 5, 6, 4, 5, 7, 3, 4, 6, 7, 9, 5, 6, 7, 10, 3, 8, 9, + 6, 8, 9, 10, 11, 7, 9, 10, 12, 9, 11, 12, 10, 11, 12 + }; + HYPRE_Real data_expected[49] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0 + }; + A_expected = CreateCSRMatrixFromData(13, 13, 49, I_expected, J_expected, data_expected); + } + else if (test_my_id == 2) + { + HYPRE_Int I_expected[14] = {0, 3, 6, 10, 15, 18, 22, 27, 30, 34, 39, 42, 46, 49}; + HYPRE_Int J_expected[49] = {0, 1, 2, 0, 1, 3, 0, 2, 3, 5, 1, 2, 3, 4, 6, 3, 4, + 9, 2, 5, 6, 7, 3, 5, 6, 8, 9, 5, 7, 8, 6, 7, 8, 11, + 4, 6, 9, 10, 11, 9, 10, 12, 8, 9, 11, 12, 10, 11, 12 + }; + HYPRE_Real data_expected[49] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0 + }; + A_expected = CreateCSRMatrixFromData(13, 13, 49, I_expected, J_expected, data_expected); + } + else if (test_my_id == 3) + { + HYPRE_Int I_expected[14] = {0, 3, 6, 9, 14, 18, 21, 26, 29, 33, 38, 42, 46, 49}; + HYPRE_Int J_expected[49] = {0, 3, 6, 1, 2, 3, 1, 2, 4, 0, 1, 3, 4, 9, 2, 3, 4, + 10, 5, 6, 7, 0, 5, 6, 8, 9, 5, 7, 8, 6, 7, 8, 11, 3, + 6, 9, 10, 11, 4, 9, 10, 12, 8, 9, 11, 12, 10, 11, 12 + }; + HYPRE_Real data_expected[49] = + { + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, 4.0, -1.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0 + }; + A_expected = CreateCSRMatrixFromData(13, 13, 49, I_expected, J_expected, data_expected); + } + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 13 x 13)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + A_expected = NULL; + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test6_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test6_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 2D grid with 2D partitioning, overlap=3 + * Test case: 4x4 2D grid on 2x2 processor grid (4 processors) + * Tests overlap extraction for 2D problem with 2D processor layout and overlap=3 + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test7_Grid2D_Part2D_Overlap3(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 3; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 4 processors */ + if (num_procs >= 4) + { + participate = (my_id < 4) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test7_Grid2D_Part2D_Overlap3: Skipping (requires at least 4 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test7_Grid2D_Part2D_Overlap3 (4 procs): "); + } + + /* Create 4x4 2D Laplacian with 2x2 processor partitioning */ + A = Create2DLaplacian2DPart(test_comm, 4, 4, 2, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap (external) matrix */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (!A_local) + { + hypre_printf("Proc %d: Failed to extract local overlap matrix\n", test_my_id); + error = 1; + } + else + { + HYPRE_Int num_rows_local = hypre_CSRMatrixNumRows(A_local); + HYPRE_Int num_cols_local_actual = hypre_CSRMatrixNumCols(A_local); + + /* Hard-coded expected matrices from test7_expected_csr.out files (15x15 matrices) */ + if (num_rows_local == 15 && num_cols_local_actual == 15) + { + if (test_my_id == 0) + { + HYPRE_Int I_expected[16] = {0, 3, 7, 11, 16, 20, 23, 28, 32, 36, 41, 44, 48, 53, 56, 59}; + HYPRE_Int J_expected[59] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 8, 1, 2, 3, 6, 9, 1, 4, 5, 6, 4, 5, + 7, 3, 4, 6, 7, 12, 5, 6, 7, 13, 2, 8, 9, 10, 3, 8, 9, 11, 12, 8, 10, + 11, 9, 10, 11, 14, 6, 9, 12, 13, 14, 7, 12, 13, 11, 12, 14 + }; + HYPRE_Real data_expected[59] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, -1.0, 4.0, + -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(15, 15, 59, I_expected, J_expected, data_expected); + } + else if (test_my_id == 1) + { + HYPRE_Int I_expected[16] = {0, 3, 7, 11, 16, 20, 23, 28, 32, 35, 40, 43, 48, 52, 56, 59}; + HYPRE_Int J_expected[59] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 8, 1, 2, 3, 6, 9, 1, 4, 5, 6, 4, 5, + 7, 3, 4, 6, 7, 11, 5, 6, 7, 12, 2, 8, 9, 3, 8, 9, 10, 11, 9, 10, 13, + 6, 9, 11, 12, 13, 7, 11, 12, 14, 10, 11, 13, 14, 12, 13, 14 + }; + HYPRE_Real data_expected[59] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(15, 15, 59, I_expected, J_expected, data_expected); + } + else if (test_my_id == 2) + { + HYPRE_Int I_expected[16] = {0, 3, 7, 11, 16, 19, 24, 27, 31, 36, 39, 43, 48, 52, 56, 59}; + HYPRE_Int J_expected[59] = {0, 1, 2, 0, 1, 3, 4, 0, 2, 3, 7, 1, 2, 3, 5, 8, 1, 4, 5, 3, 4, 5, + 6, 11, 5, 6, 12, 2, 7, 8, 9, 3, 7, 8, 10, 11, 7, 9, 10, 8, 9, 10, + 13, 5, 8, 11, 12, 13, 6, 11, 12, 14, 10, 11, 13, 14, 12, 13, 14 + }; + HYPRE_Real data_expected[59] = + { + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(15, 15, 59, I_expected, J_expected, data_expected); + } + else if (test_my_id == 3) + { + HYPRE_Int I_expected[16] = {0, 3, 6, 11, 15, 18, 23, 27, 31, 36, 39, 43, 48, 52, 56, 59}; + HYPRE_Int J_expected[59] = {0, 2, 3, 1, 2, 7, 0, 1, 2, 5, 8, 0, 3, 4, 5, 3, 4, 6, 2, 3, 5, + 6, 11, 4, 5, 6, 12, 1, 7, 8, 9, 2, 7, 8, 10, 11, 7, 9, 10, 8, 9, + 10, 13, 5, 8, 11, 12, 13, 6, 11, 12, 14, 10, 11, 13, 14, 12, 13, 14 + }; + HYPRE_Real data_expected[59] = + { + 4.0, -1.0, -1.0, 4.0, + -1.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + 4.0, -1.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, 4.0, -1.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0, -1.0, + -1.0, -1.0, 4.0 + }; + A_expected = CreateCSRMatrixFromData(15, 15, 59, I_expected, J_expected, data_expected); + } + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 15 x 15)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + A_expected = NULL; + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test7_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test7_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 3D grid with 3D partitioning, overlap=1 + * Test case: 3x3x3 3D grid on 2x2x2 processor grid (8 processors) + * Tests overlap extraction for 3D problem with 3D processor layout and overlap=1 + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test8_Grid3D_Part3D_Overlap1(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 1; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 8 processors */ + if (num_procs >= 8) + { + participate = (my_id < 8) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test8_Grid3D_Part3D_Overlap1: Skipping (requires at least 8 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test8_Grid3D_Part3D_Overlap1 (8 procs): "); + } + + /* Create 3x3x3 3D Laplacian with 2x2x2 processor partitioning */ + A = Create3DLaplacian3DPart(test_comm, 3, 3, 3, 2, 2, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap rows */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + HYPRE_Int test_my_id; + hypre_MPI_Comm_rank(test_comm, &test_my_id); + + if (!A_local) + { + hypre_printf("Proc %d: Failed to extract local overlap matrix\n", test_my_id); + error = 1; + } + else + { + HYPRE_Int num_rows_local = hypre_CSRMatrixNumRows(A_local); + HYPRE_Int num_cols_local_actual = hypre_CSRMatrixNumCols(A_local); + + /* Hard-coded expected matrices from test8_expected_csr.out files */ + if (test_my_id == 0) + { + /* 20x20, nnz=92 */ + if (num_rows_local == 20 && num_cols_local_actual == 20) + { + HYPRE_Int I_expected[21] = {0, 4, 9, 14, 20, 25, 31, 37, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92}; + HYPRE_Int J_expected[92] = {0, 1, 2, 4, 0, 1, 3, 5, 8, 0, 2, 3, 6, 12, 1, 2, 3, 7, 9, 13, + 0, 4, 5, 6, 16, 1, 4, 5, 7, 10, 17, 2, 4, 6, 7, 14, 18, 3, 5, + 6, 7, 11, 15, 19, 1, 8, 9, 10, 3, 8, 9, 11, 5, 8, 10, 11, 7, 9, + 10, 11, 2, 12, 13, 14, 3, 12, 13, 15, 6, 12, 14, 15, 7, 13, 14, + 15, 4, 16, 17, 18, 5, 16, 17, 19, 6, 16, 18, 19, 7, 17, 18, 19 + }; + HYPRE_Real data_expected[92] = + { + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(20, 20, 92, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 20 x 20)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 1) + { + /* 12x12, nnz=48 */ + if (num_rows_local == 12 && num_cols_local_actual == 12) + { + HYPRE_Int I_expected[13] = {0, 4, 8, 12, 16, 20, 25, 30, 36, 39, 42, 45, 48}; + HYPRE_Int J_expected[48] = {0, 1, 2, 4, 0, 1, 3, 5, 0, 2, 3, 6, 1, 2, 3, 7, 0, 4, 5, 6, + 1, 4, 5, 7, 8, 2, 4, 6, 7, 10, 3, 5, 6, 7, 9, 11, 5, 8, 9, + 7, 8, 9, 6, 10, 11, 7, 10, 11 + }; + HYPRE_Real data_expected[48] = + { + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, 6.0, + -1.0, -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(12, 12, 48, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 12 x 12)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 2) + { + /* 12x12, nnz=48 */ + if (num_rows_local == 12 && num_cols_local_actual == 12) + { + HYPRE_Int I_expected[13] = {0, 4, 8, 12, 16, 20, 25, 30, 36, 39, 42, 45, 48}; + HYPRE_Int J_expected[48] = {0, 1, 2, 4, 0, 1, 3, 5, 0, 2, 3, 6, 1, 2, 3, 7, + 0, 4, 5, 6, 1, 4, 5, 7, 8, 2, 4, 6, 7, 10, 3, 5, + 6, 7, 9, 11, 5, 8, 9, 7, 8, 9, 6, 10, 11, 7, 10, 11 + }; + HYPRE_Real data_expected[48] = + { + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, 6.0, + -1.0, -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(12, 12, 48, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 12 x 12)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 3) + { + /* 7x7, nnz=23 */ + if (num_rows_local == 7 && num_cols_local_actual == 7) + { + HYPRE_Int I_expected[8] = {0, 3, 6, 9, 12, 16, 21, 23}; + HYPRE_Int J_expected[23] = {0, 1, 4, 0, 1, 5, 2, 3, 4, 2, 3, 5, 0, 2, 4, 5, 1, 3, 4, 5, 6, 5, 6}; + HYPRE_Real data_expected[23] = + { + 6.0, -1.0, -1.0, -1.0, + 6.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(7, 7, 23, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 7 x 7)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 4) + { + /* 12x12, nnz=48 */ + if (num_rows_local == 12 && num_cols_local_actual == 12) + { + HYPRE_Int I_expected[13] = {0, 4, 8, 12, 16, 20, 25, 30, 36, 39, 42, 45, 48}; + HYPRE_Int J_expected[48] = {0, 1, 2, 4, 0, 1, 3, 5, 0, 2, 3, 6, 1, 2, 3, 7, + 0, 4, 5, 6, 1, 4, 5, 7, 8, 2, 4, 6, 7, 10, 3, 5, + 6, 7, 9, 11, 5, 8, 9, 7, 8, 9, 6, 10, 11, 7, 10, 11 + }; + HYPRE_Real data_expected[48] = + { + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, 6.0, + -1.0, -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(12, 12, 48, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 12 x 12)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 5) + { + /* 7x7, nnz=23 */ + if (num_rows_local == 7 && num_cols_local_actual == 7) + { + HYPRE_Int I_expected[8] = {0, 3, 6, 9, 12, 16, 21, 23}; + HYPRE_Int J_expected[23] = {0, 1, 4, 0, 1, 5, 2, 3, 4, 2, 3, 5, + 0, 2, 4, 5, 1, 3, 4, 5, 6, 5, 6 + }; + HYPRE_Real data_expected[23] = + { + 6.0, -1.0, -1.0, -1.0, + 6.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(7, 7, 23, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 7 x 7)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 6) + { + /* 7x7, nnz=23 */ + if (num_rows_local == 7 && num_cols_local_actual == 7) + { + HYPRE_Int I_expected[8] = {0, 3, 6, 9, 12, 16, 21, 23}; + HYPRE_Int J_expected[23] = {0, 1, 4, 0, 1, 5, 2, 3, 4, 2, 3, 5, + 0, 2, 4, 5, 1, 3, 4, 5, 6, 5, 6 + }; + HYPRE_Real data_expected[23] = + { + 6.0, -1.0, -1.0, -1.0, + 6.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(7, 7, 23, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 7 x 7)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else if (test_my_id == 7) + { + /* 4x4, nnz=10 */ + if (num_rows_local == 4 && num_cols_local_actual == 4) + { + HYPRE_Int I_expected[5] = {0, 2, 4, 6, 10}; + HYPRE_Int J_expected[10] = {0, 3, 1, 3, 2, 3, 0, 1, 2, 3}; + HYPRE_Real data_expected[10] = + { + 6.0, -1.0, 6.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(4, 4, 10, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 4 x 4)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + } + } + else + { + hypre_printf("Proc %d: Unexpected processor ID (expected 0-7)\n", test_my_id); + error = 1; + A_expected = NULL; + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test8_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test8_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Unit test: 3D grid with 3D partitioning, overlap=6 (full domain coverage) + * Test case: 3x3x3 3D grid on 2x2x2 processor grid (8 processors) + * Tests overlap extraction for 3D problem with 3D processor layout and overlap=6 + * With overlap=6, all processors should have the full global matrix (27x27) + *--------------------------------------------------------------------------*/ +static HYPRE_Int +Test9_Grid3D_Part3D_Overlap6(MPI_Comm comm, HYPRE_Int print_matrices) +{ + hypre_ParCSRMatrix *A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Int error = 0; + HYPRE_Int overlap_order = 6; + HYPRE_Int test_my_id, my_id, num_procs; + MPI_Comm test_comm = MPI_COMM_NULL; + HYPRE_Int participate = 0; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + /* Create subcommunicator with first 8 processors */ + if (num_procs >= 8) + { + participate = (my_id < 8) ? 1 : hypre_MPI_UNDEFINED; + hypre_MPI_Comm_split(comm, participate, my_id, &test_comm); + } + else + { + if (my_id == 0) + { + hypre_printf("Test9_Grid3D_Part3D_Overlap6: Skipping (requires at least 8 processors)\n"); + } + hypre_MPI_Barrier(comm); + return 0; + } + + /* Only participating processes run the test */ + if (test_comm == MPI_COMM_NULL) + { + /* Non-participating processes must still synchronize */ + hypre_MPI_Barrier(comm); + return 0; + } + + hypre_MPI_Comm_rank(test_comm, &test_my_id); + if (test_my_id == 0) + { + hypre_printf("Test9_Grid3D_Part3D_Overlap6 (8 procs): "); + } + + /* Create 3x3x3 3D Laplacian with 2x2x2 processor partitioning */ + A = Create3DLaplacian3DPart(test_comm, 3, 3, 3, 2, 2, 2, test_my_id); + + /* Compute overlap */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + + /* Get overlap rows */ + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + + /* Extract local overlap matrix */ + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + + /* Create expected matrix and compare */ + { + hypre_CSRMatrix *A_expected = NULL; + + if (!A_local) + { + hypre_printf("Proc %d: Failed to extract local overlap matrix\n", test_my_id); + error = 1; + } + else + { + HYPRE_Int num_rows_local = hypre_CSRMatrixNumRows(A_local); + HYPRE_Int num_cols_local_actual = hypre_CSRMatrixNumCols(A_local); + + /* All processors should have the same 27x27 matrix (full domain coverage) */ + if (num_rows_local == 27 && num_cols_local_actual == 27) + { + /* Same expected matrix for all 8 processors */ + HYPRE_Int I_expected[28] = {0, 4, 9, 14, 20, 25, 31, 37, 44, 48, 53, 58, 64, 68, 73, + 78, 84, 88, 93, 97, 102, 107, 113, 117, 122, 126, 131, 135}; + HYPRE_Int J_expected[135] = {0, 1, 2, 4, 0, 1, 3, 5, 8, 0, 2, 3, 6, 12, 1, 2, 3, 7, 9, 13, + 0, 4, 5, 6, 18, 1, 4, 5, 7, 10, 19, 2, 4, 6, 7, 14, 20, 3, 5, + 6, 7, 11, 15, 21, 1, 8, 9, 10, 3, 8, 9, 11, 16, 5, 8, 10, 11, + 22, 7, 9, 10, 11, 17, 23, 2, 12, 13, 14, 3, 12, 13, 15, 16, 6, + 12, 14, 15, 24, 7, 13, 14, 15, 17, 25, 9, 13, 16, 17, 11, 15, + 16, 17, 26, 4, 18, 19, 20, 5, 18, 19, 21, 22, 6, 18, 20, 21, 24, + 7, 19, 20, 21, 23, 25, 10, 19, 22, 23, 11, 21, 22, 23, 26, 14, + 20, 24, 25, 15, 21, 24, 25, 26, 17, 23, 25, 26}; + HYPRE_Real data_expected[135] = { + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, -1.0, 6.0, -1.0, + -1.0, -1.0, -1.0, 6.0, + -1.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + 6.0, -1.0, -1.0, -1.0, + -1.0, 6.0, -1.0, -1.0, + -1.0, -1.0, 6.0 + }; + A_expected = CreateCSRMatrixFromData(27, 27, 135, I_expected, J_expected, data_expected); + } + else + { + hypre_printf("Proc %d: Unexpected matrix dimensions: %d x %d (expected 27 x 27)\n", + test_my_id, num_rows_local, num_cols_local_actual); + error = 1; + A_expected = NULL; + } + + if (A_expected) + { + if (print_matrices) + { + char filename_expected[256]; + char filename_computed[256]; + hypre_sprintf(filename_expected, "test9_expected_ij.out.%05d", test_my_id); + hypre_sprintf(filename_computed, "test9_computed_ij.out.%05d", test_my_id); + hypre_CSRMatrixPrintIJ(A_expected, 0, 0, filename_expected); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename_computed); + } + if (CompareCSRMatrices(A_expected, A_local, CHECK_TOLERANCE) != 0) + { + hypre_printf("Proc %d: Extracted matrix does not match expected matrix\n", test_my_id); + error = 1; + } + hypre_CSRMatrixDestroy(A_expected); + } + } + } + + /* Cleanup */ + if (A_local) + { + hypre_CSRMatrixDestroy(A_local); + } + if (col_map) + { + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + } + if (overlap_data) + { + hypre_OverlapDataDestroy(overlap_data); + } + if (A) + { + hypre_ParCSRMatrixDestroy(A); + } + + if (test_comm != MPI_COMM_NULL) + { + PRINT_TEST_RESULT(test_my_id, error); + hypre_MPI_Comm_free(&test_comm); + } + + /* Synchronize all processes before returning */ + hypre_MPI_Barrier(comm); + + return error; +} + +/*-------------------------------------------------------------------------- + * Benchmark: Generate laplacian and test overlap extraction + *--------------------------------------------------------------------------*/ +static HYPRE_Int +BenchmarkOverlap(MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, + HYPRE_Int Px, HYPRE_Int Py, HYPRE_Int Pz, + HYPRE_Int overlap_order, HYPRE_Int print_matrices) +{ + HYPRE_Int my_id, num_procs; + HYPRE_ParCSRMatrix A; + hypre_OverlapData *overlap_data; + hypre_CSRMatrix *A_local; + HYPRE_BigInt *col_map; + HYPRE_Int num_cols_local; + HYPRE_Real time_start, time_end, time_overlap, time_extract; + HYPRE_Int num_extended, num_local, num_overlap; + + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + if (my_id == 0) + { + hypre_printf("\nBenchmark: Overlap extraction\n"); + hypre_printf(" Problem size: %d x %d x %d\n", nx, ny, nz); + hypre_printf(" Processor grid: %d x %d x %d\n", Px, Py, Pz); + hypre_printf(" Overlap order: %d\n", overlap_order); + } + + /* Generate laplacian using GenerateLaplacian */ + { + HYPRE_Real *values; + HYPRE_Int p, q, r; + + /* Compute processor coordinates */ + p = my_id % Px; + q = ((my_id - p) / Px) % Py; + r = (my_id - p - Px * q) / (Px * Py); + + /* Set up 7-point stencil values */ + values = hypre_CTAlloc(HYPRE_Real, 4, HYPRE_MEMORY_HOST); + if (nx > 1) + { + values[0] += 2.0; + values[1] = -1.0; + } + if (ny > 1) + { + values[0] += 2.0; + values[2] = -1.0; + } + if (nz > 1) + { + values[0] += 2.0; + values[3] = -1.0; + } + + A = GenerateLaplacian(comm, nx, ny, nz, Px, Py, Pz, p, q, r, values); + + hypre_TFree(values, HYPRE_MEMORY_HOST); + } + + /* Print global matrix if requested */ + if (print_matrices) + { + char filename[256]; + hypre_sprintf(filename, "benchmark_global_matrix_ij.out"); + hypre_ParCSRMatrixPrintIJ(A, 0, 0, filename); + if (my_id == 0) + { + hypre_printf(" Printed global matrix to %s.\n", filename); + } + } + + /* Benchmark overlap computation */ + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + + time_start = hypre_MPI_Wtime(); + hypre_ParCSRMatrixComputeOverlap(A, overlap_order, &overlap_data); + hypre_MPI_Barrier(comm); + time_end = hypre_MPI_Wtime(); + time_overlap = time_end - time_start; + + /* Benchmark overlap row fetching */ + time_start = hypre_MPI_Wtime(); + hypre_ParCSRMatrixGetExternalMatrix(A, overlap_data); + hypre_MPI_Barrier(comm); + time_end = hypre_MPI_Wtime(); + time_overlap += (time_end - time_start); + + /* Benchmark local matrix extraction */ + time_start = hypre_MPI_Wtime(); + hypre_ParCSRMatrixCreateExtendedMatrix(A, overlap_data, &A_local, &col_map, &num_cols_local); + hypre_MPI_Barrier(comm); + time_end = hypre_MPI_Wtime(); + time_extract = time_end - time_start; + + /* Print overlapped matrices if requested */ + if (print_matrices) + { + char filename[256]; + hypre_sprintf(filename, "benchmark_overlap_matrix_ij.out.%05d", my_id); + hypre_CSRMatrixPrintIJ(A_local, 0, 0, filename); + } + + /* Gather statistics */ + num_extended = hypre_OverlapDataNumExtendedRows(overlap_data); + num_local = hypre_OverlapDataNumLocalRows(overlap_data); + num_overlap = hypre_OverlapDataNumOverlapRows(overlap_data); + + if (my_id == 0) + { + hypre_printf(" Overlap computation time: %e seconds\n", time_overlap); + hypre_printf(" Extraction time: %e seconds\n", time_extract); + hypre_printf(" Total time: %e seconds\n", time_overlap + time_extract); + } + + /* Print per-processor stats */ + { + HYPRE_Int i; + HYPRE_Int *local_nnz = hypre_TAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); + HYPRE_Int *local_extended = hypre_TAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); + HYPRE_Int *local_overlap = hypre_TAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); + HYPRE_Int nnz_local = hypre_CSRMatrixNumNonzeros(A_local); + + hypre_MPI_Gather(&nnz_local, 1, HYPRE_MPI_INT, local_nnz, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Gather(&num_extended, 1, HYPRE_MPI_INT, local_extended, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Gather(&num_overlap, 1, HYPRE_MPI_INT, local_overlap, 1, HYPRE_MPI_INT, 0, comm); + + if (my_id == 0) + { + hypre_printf("\n Per-processor statistics:\n"); + hypre_printf(" Proc Extended Rows Overlap Rows Local NNZ\n"); + for (i = 0; i < num_procs; i++) + { + hypre_printf(" %3d %8d %8d %10d\n", + i, local_extended[i], local_overlap[i], local_nnz[i]); + } + } + + hypre_TFree(local_nnz, HYPRE_MEMORY_HOST); + hypre_TFree(local_extended, HYPRE_MEMORY_HOST); + hypre_TFree(local_overlap, HYPRE_MEMORY_HOST); + } + + /* Cleanup */ + hypre_CSRMatrixDestroy(A_local); + hypre_TFree(col_map, HYPRE_MEMORY_HOST); + hypre_OverlapDataDestroy(overlap_data); + HYPRE_ParCSRMatrixDestroy(A); + + return 0; +} + +/*-------------------------------------------------------------------------- + * Main function + *--------------------------------------------------------------------------*/ +int +main(int argc, char *argv[]) +{ + MPI_Comm comm; + HYPRE_Int my_id, num_procs; + HYPRE_Int error = 0; + HYPRE_Int test_mode = 1; /* 1=unit tests, 0=benchmark */ + HYPRE_Int nx, ny, nz; + HYPRE_Int Px, Py, Pz; + HYPRE_Int overlap_order = 1; + HYPRE_Int print_matrices = 0; + HYPRE_Int i; + + /* Initialize MPI and HYPRE */ + hypre_MPI_Init(&argc, &argv); + comm = hypre_MPI_COMM_WORLD; + hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_size(comm, &num_procs); + + HYPRE_Init(); + + /* Parse command line */ + i = 1; + nx = num_procs * 64; ny = 64; nz = 64; + Px = num_procs; Py = 1; Pz = 1; + while (i < argc) + { + if (strcmp(argv[i], "-benchmark") == 0) + { + test_mode = 0; + i++; + } + else if (strcmp(argv[i], "-n") == 0) + { + nx = atoi(argv[++i]); + ny = atoi(argv[++i]); + nz = atoi(argv[++i]); + i++; + } + else if (strcmp(argv[i], "-P") == 0) + { + Px = atoi(argv[++i]); + Py = atoi(argv[++i]); + Pz = atoi(argv[++i]); + i++; + } + else if (strcmp(argv[i], "-ov") == 0) + { + overlap_order = atoi(argv[++i]); + i++; + } + else if (strcmp(argv[i], "-print") == 0) + { + print_matrices = 1; + i++; + } + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0) + { + if (my_id == 0) + { + hypre_printf("Usage: %s [options]\n", argv[0]); + hypre_printf("Options:\n"); + hypre_printf(" -benchmark : Run benchmark instead of unit tests\n"); + hypre_printf(" -n : Problem size (default: 20 20 20)\n"); + hypre_printf(" -P : Processor grid (default: 2 2 2)\n"); + hypre_printf(" -ov : Overlap order (default: 1)\n"); + hypre_printf(" -print : Print expected and computed matrices to .ij files\n"); + hypre_printf(" -h, -help : Print this help\n"); + } + HYPRE_Finalize(); + hypre_MPI_Finalize(); + return 0; + } + else + { + if (my_id == 0) + { + hypre_printf("Unknown option: %s\n", argv[i]); + } + i++; + } + } + + if (test_mode) + { + /* Run unit tests */ + if (my_id == 0) + { + hypre_printf("\n========================================\n"); + hypre_printf("Unit Tests for Overlap Extraction\n"); + hypre_printf("========================================\n\n"); + } + + error += Test1_Grid1D_Part1D_Overlap1(comm, print_matrices); + error += Test2_Grid1D_Part1D_Overlap2(comm, print_matrices); + error += Test3_Grid1D_Part1D_Overlap8(comm, print_matrices); + error += Test4_Grid2D_Part1D_Overlap2(comm, print_matrices); + error += Test5_Grid2D_Part2D_Overlap1(comm, print_matrices); + error += Test6_Grid2D_Part2D_Overlap2(comm, print_matrices); + error += Test7_Grid2D_Part2D_Overlap3(comm, print_matrices); + error += Test8_Grid3D_Part3D_Overlap1(comm, print_matrices); + error += Test9_Grid3D_Part3D_Overlap6(comm, print_matrices); + + if (my_id == 0) + { + hypre_printf("\n"); + if (error == 0) + { + hypre_printf("All unit tests PASSED\n"); + } + else + { + hypre_printf("Some unit tests FAILED (errors: %d)\n", error); + } + } + } + else + { + /* Run Warmup */ + if (my_id == 0) + { + hypre_printf("\nWarmup phase..."); + } + BenchmarkOverlap(comm, 10 * num_procs, 10, 10, num_procs, 1, 1, 0, 0); + + /* Run Benchmark */ + BenchmarkOverlap(comm, nx, ny, nz, Px, Py, Pz, overlap_order, print_matrices); + } + + HYPRE_Finalize(); + hypre_MPI_Finalize(); + + return error; +} diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 50e2a288f8..798728331b 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1027,7 +1027,7 @@ hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int m, hypre_MPI_Comm *comms ) { - hypre_assert(n >= 0); + hypre_assert(n >= 0 || hypre_MPI_UNDEFINED); hypre_assert(m >= 0); return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int)n, (hypre_int)m, comms);