From 1550fd99fec2caf862df4814ec21fef590f52f37 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 8 Nov 2024 20:25:52 -0700 Subject: [PATCH 01/50] Tacho : new options (dofs-per-node, pivot-tol, amd) Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_Tacho_decl.hpp | 2 + packages/amesos2/src/Amesos2_Tacho_def.hpp | 27 +++++-- .../frosch/test/Thyra_Xpetra_Laplace/main.cpp | 1 + .../shylu_node/tacho/cmake/Tacho_config.h.in | 3 + .../tacho/example/Tacho_ExampleDriver.hpp | 36 +++++++-- .../tacho/src/Tacho_CrsMatrixBase.hpp | 5 +- .../shylu_node/tacho/src/Tacho_Driver.hpp | 54 ++++++++++++- .../tacho/src/impl/Tacho_Driver_Impl.hpp | 19 ++++- .../tacho/src/impl/Tacho_GraphTools.hpp | 5 ++ .../tacho/src/impl/Tacho_GraphTools_Metis.cpp | 35 ++++++-- .../tacho/src/impl/Tacho_GraphTools_Metis.hpp | 13 ++- .../tacho/src/impl/Tacho_LU_Internal.hpp | 19 ++++- .../tacho/src/impl/Tacho_Lapack_Team.hpp | 79 +++++++++++++++++-- .../src/impl/Tacho_NumericTools_Base.hpp | 3 +- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 21 +++-- .../src/impl/Tacho_NumericTools_Serial.hpp | 3 +- .../impl/Tacho_TeamFunctor_FactorizeLU.hpp | 23 +++++- 17 files changed, 301 insertions(+), 47 deletions(-) diff --git a/packages/amesos2/src/Amesos2_Tacho_decl.hpp b/packages/amesos2/src/Amesos2_Tacho_decl.hpp index 95c71b184dc6..07acdaa91e49 100644 --- a/packages/amesos2/src/Amesos2_Tacho_decl.hpp +++ b/packages/amesos2/src/Amesos2_Tacho_decl.hpp @@ -196,6 +196,8 @@ class TachoSolver : public SolverCore int small_problem_threshold_size; int streams; bool verbose; + int dofs_per_node; + bool pivot_pert; // int num_kokkos_threads; // int max_num_superblocks; } data_; diff --git a/packages/amesos2/src/Amesos2_Tacho_def.hpp b/packages/amesos2/src/Amesos2_Tacho_def.hpp index 221e505dbc54..e4f1bd98566b 100644 --- a/packages/amesos2/src/Amesos2_Tacho_def.hpp +++ b/packages/amesos2/src/Amesos2_Tacho_def.hpp @@ -27,10 +27,12 @@ TachoSolver::TachoSolver( Teuchos::RCP B ) : SolverCore(A, X, B) { - data_.method = 1; // Cholesky - data_.variant = 2; // solver variant - data_.streams = 1; // # of streams - data_.verbose = false; // verbose + data_.method = 1; // Cholesky + data_.variant = 2; // solver variant + data_.streams = 1; // # of streams + data_.dofs_per_node = 1; // DoFs / node + data_.pivot_pert = false; // Diagonal pertubation + data_.verbose = false; // verbose } @@ -82,7 +84,11 @@ TachoSolver::symbolicFactorization_impl() // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks); // Symbolic factorization currently must be done on host - data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_); + if (data_.dofs_per_node > 1) { + data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_); + } else { + data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_); + } data_.solver.initialize(); } return status; @@ -102,6 +108,11 @@ TachoSolver::numericFactorization_impl() if(do_optimization()) { this->matrixA_->returnValues_kokkos_view(device_nzvals_view_); } + if (data_.pivot_pert) { + data_.solver.useDefaultPivotTolerance(); + } else { + data_.solver.useNoPivotTolerance(); + } data_.solver.factorize(device_nzvals_view_); } return status; @@ -223,6 +234,10 @@ TachoSolver::setParameters_impl(const Teuchos::RCPget ("verbose", false); // # of streams data_.streams = parameterList->get ("num-streams", 1); + // DoFs / node + data_.dofs_per_node = parameterList->get ("dofs-per-node", 1); + // Perturb tiny pivots + data_.pivot_pert = parameterList->get ("perturb-pivot", false); // TODO: Confirm param options // data_.num_kokkos_threads = parameterList->get("kokkos-threads", 1); // data_.max_num_superblocks = parameterList->get("max-num-superblocks", 4); @@ -243,6 +258,8 @@ TachoSolver::getValidParameters_impl() const pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK."); pl->set("verbose", false, "Verbosity"); pl->set("num-streams", 1, "Number of GPU streams"); + pl->set("dofs-per-node", 1, "DoFs per node"); + pl->set("perturb-pivot", false, "Perturb tiny pivots"); // TODO: Confirm param options // pl->set("kokkos-threads", 1, "Number of threads"); diff --git a/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp b/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp index 18b413f02d96..3e0fbdb81231 100644 --- a/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp +++ b/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp @@ -270,6 +270,7 @@ int main(int argc, char *argv[]) } else { assert(false); } + writeMM("Laplace.mtx",KMonolithic); RCP > xSolution = MultiVectorFactory::Build(KMonolithic->getMap(),1); RCP > xRightHandSide = MultiVectorFactory::Build(KMonolithic->getMap(),1); diff --git a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in index 9daaa2f69860..a537bf9648c0 100644 --- a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in +++ b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in @@ -25,6 +25,9 @@ /* Define if want to build with CHOLMOD enabled */ #cmakedefine TACHO_HAVE_SUITESPARSE +/* Define if want to build with TrilinosSS enabled */ +#cmakedefine TACHO_HAVE_TRILINOS_SS + /* Define if want to build with VTune enabled */ #cmakedefine TACHO_HAVE_VTUNE diff --git a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp index 450e04608954..dce1a645b801 100644 --- a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp +++ b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp @@ -25,8 +25,11 @@ template int driver(int argc, char *argv[]) { std::string file = "test.mtx"; std::string graph_file = ""; std::string weight_file = ""; + int dofs_per_node = 1; + bool perturbPivot = false; int nrhs = 1; bool randomRHS = true; + bool onesRHS = false; std::string method_name = "chol"; int method = 1; // 1 - Chol, 2 - LDL, 3 - SymLU int small_problem_thres = 1024; @@ -47,6 +50,8 @@ template int driver(int argc, char *argv[]) { opts.set_option("file", "Input file (MatrixMarket SPD matrix)", &file); opts.set_option("graph", "Input condensed graph", &graph_file); opts.set_option("weight", "Input condensed graph weight", &weight_file); + opts.set_option("dofs-per-node", "# DoFs per node", &dofs_per_node); + opts.set_option("perturb", "Flag to perturb tiny pivots", &perturbPivot); opts.set_option("nrhs", "Number of RHS vectors", &nrhs); opts.set_option("method", "Solution method: chol, ldl, lu", &method_name); opts.set_option("small-problem-thres", "LAPACK is used smaller than this thres", &small_problem_thres); @@ -55,6 +60,7 @@ template int driver(int argc, char *argv[]) { opts.set_option("device-solve-thres", "Device function is used above this subproblem size", &device_solve_thres); opts.set_option("variant", "algorithm variant in levelset scheduling; 0, 1 and 2", &variant); opts.set_option("nstreams", "# of streams used in CUDA; on host, it is ignored", &nstreams); + opts.set_option("one-rhs", "Set RHS to be ones", &onesRHS); opts.set_option("no-warmup", "Flag to turn off warmup", &no_warmup); opts.set_option("nfacts", "# of factorizations to perform", &nfacts); opts.set_option("nsolves", "# of solves to perform", &nsolves); @@ -125,6 +131,8 @@ template int driver(int argc, char *argv[]) { if (!in.good()) { std::cout << "Failed in open the file: " << graph_file << std::endl; return -1; + } else if (verbose) { + std::cout << " > Condensed graph file: " << graph_file << std::endl; } in >> m_graph; @@ -135,8 +143,10 @@ template int driver(int argc, char *argv[]) { aj_graph = ordinal_type_array_host("aj", ap_graph(m_graph)); for (ordinal_type i = 0; i < m_graph; ++i) { const ordinal_type jbeg = ap_graph(i), jend = ap_graph(i + 1); - for (ordinal_type j = jbeg; j < jend; ++j) + for (ordinal_type j = jbeg; j < jend; ++j) { in >> aj_graph(j); + aj_graph(j) --; // base-one + } } } @@ -146,6 +156,8 @@ template int driver(int argc, char *argv[]) { if (!in.good()) { std::cout << "Failed in open the file: " << weight_file << std::endl; return -1; + } else if (verbose) { + std::cout << " > Weight file for condensed graph: " << weight_file << std::endl; } ordinal_type m(0); in >> m; @@ -160,17 +172,21 @@ template int driver(int argc, char *argv[]) { Tacho::Driver solver; /// common options - solver.setSolutionMethod(method); - solver.setSmallProblemThresholdsize(small_problem_thres); solver.setVerbose(verbose); + solver.setSolutionMethod(method); + solver.setLevelSetOptionAlgorithmVariant(variant); + solver.setLevelSetOptionNumStreams(nstreams); /// graph options solver.setOrderConnectedGraphSeparately(); /// levelset options + solver.setSmallProblemThresholdsize(small_problem_thres); solver.setLevelSetOptionDeviceFunctionThreshold(device_factor_thres, device_solve_thres); - solver.setLevelSetOptionAlgorithmVariant(variant); - solver.setLevelSetOptionNumStreams(nstreams); + if (perturbPivot) { + if (verbose) std::cout << " > perturb tiny pivots" << std::endl; + solver.useDefaultPivotTolerance(); + } auto values_on_device = Kokkos::create_mirror_view(typename device_type::memory_space(), A.Values()); Kokkos::deep_copy(values_on_device, A.Values()); @@ -178,7 +194,10 @@ template int driver(int argc, char *argv[]) { /// inputs are used for graph reordering and analysis if (m_graph > 0 && m_graph < A.NumRows()) solver.analyze(A.NumRows(), A.RowPtr(), A.Cols(), m_graph, ap_graph, aj_graph, aw_graph); - else + else if (dofs_per_node > 1) { + if (verbose) std::cout << " > DoFs / node = " << dofs_per_node << std::endl; + solver.analyze(A.NumRows(), dofs_per_node, A.RowPtr(), A.Cols()); + } else solver.analyze(A.NumRows(), A.RowPtr(), A.Cols()); /// create numeric tools and levelset tools @@ -202,7 +221,10 @@ template int driver(int argc, char *argv[]) { t("t", A.NumRows(), nrhs); // temp workspace (store permuted rhs) { - if (randomRHS) { + if (onesRHS) { + const value_type one(1.0); + Kokkos::deep_copy (b, one); + } else if (randomRHS) { Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(b, random, value_type(1)); } else { diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp index 5f5278497a86..ed9b3e0ae693 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp @@ -371,7 +371,8 @@ inline static void applyPermutationToCrsMatrixLower(/* */ CrsMatrixType &A, cons template inline double computeRelativeResidual(const CrsMatrixBase &A, const Kokkos::View &x, - const Kokkos::View &b) { + const Kokkos::View &b, + const bool verbose = false) { const bool test = (size_t(A.NumRows()) != size_t(A.NumCols()) || size_t(A.NumRows()) != size_t(b.extent(0)) || size_t(x.extent(0)) != size_t(b.extent(0)) || size_t(x.extent(1)) != size_t(b.extent(1))); if (test) @@ -405,6 +406,8 @@ inline double computeRelativeResidual(const CrsMatrixBase diff += arith_traits::real((h_b(i, p) - s) * arith_traits::conj(h_b(i, p) - s)); } } + if (verbose) + std::cout << " Relative residual norm = " << sqrt(diff) << " / " << sqrt(norm) << " = " << sqrt(diff/norm) << std::endl; return sqrt(diff / norm); } diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp index 274f4c952092..a8da49d93806 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp @@ -16,6 +16,7 @@ /// \author Kyungjoo Kim (kyukim@sandia.gov) #include "Tacho.hpp" +#include "Tacho_Util.hpp" #include #include @@ -24,7 +25,7 @@ namespace Tacho { /// forward decl class Graph; -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) class GraphTools_Metis; #else class GraphTools; @@ -42,6 +43,7 @@ template class NumericToolsLe template struct Driver { public: using value_type = ValueType; + using mag_type = typename ArithTraits::mag_type; using device_type = DeviceType; using exec_space = typename device_type::execution_space; using exec_memory_space = typename device_type::memory_space; @@ -63,7 +65,7 @@ template struct Driver { using crs_matrix_type = CrsMatrixBase; using crs_matrix_type_host = CrsMatrixBase; -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) using graph_tools_type = GraphTools_Metis; #else using graph_tools_type = GraphTools; @@ -160,6 +162,8 @@ template struct Driver { ordinal_type _variant; // algorithmic variant in levelset 0: naive, 1: invert diagonals ordinal_type _nstreams; // on cuda, multi streams are used + mag_type _pivot_tol; // tolerance for tiny pivot perturbation + // parallelism and memory constraint is made via this parameter ordinal_type _max_num_superblocks; // # of superblocks in the memoyrpool @@ -206,6 +210,10 @@ template struct Driver { void setLevelSetOptionNumStreams(const ordinal_type nstreams); void setLevelSetOptionAlgorithmVariant(const ordinal_type variant); + void setPivotTolerance(const mag_type pivot_tol); + void useNoPivotTolerance(); + void useDefaultPivotTolerance(); + /// /// get interface /// @@ -222,6 +230,7 @@ template struct Driver { template int analyze(const ordinal_type m, const arg_size_type_array &ap, const arg_ordinal_type_array &aj, const bool duplicate = false) { + _m = m; if (duplicate) { @@ -270,6 +279,7 @@ template struct Driver { const arg_perm_type_array &perm, const arg_perm_type_array &peri, const bool duplicate = false) { _m = m; + // this takes the user-specified perm, such that analyze() won't call graph partitioner if (duplicate) { /// for most cases, ap and aj are from host; so construct ap and aj and mirror to device _h_ap = size_type_array_host(Kokkos::ViewAllocateWithoutInitializing("h_ap"), ap.extent(0)); @@ -375,6 +385,46 @@ template struct Driver { return analyze(); } + template + int analyze(const ordinal_type m, const ordinal_type blk_size, + const arg_size_type_array &ap, const arg_ordinal_type_array &aj, + const bool duplicate = false) { + + if (blk_size > 1) { + //condense graph before calling analyze + const size_type nnz = ap(m); + size_type m_graph = m / blk_size; + size_type nnz_graph = nnz / (blk_size*blk_size); + TACHO_TEST_FOR_EXCEPTION((m != blk_size * m_graph || nnz != blk_size*blk_size * nnz_graph), + std::logic_error, "Failed to initialize the condensed graph"); + + size_type_array_host ap_graph + (Kokkos::ViewAllocateWithoutInitializing("ap_graph"), 1+m_graph); + ordinal_type_array_host aj_graph + (Kokkos::ViewAllocateWithoutInitializing("aj_graph"), nnz_graph); + ordinal_type_array_host aw_graph + (Kokkos::ViewAllocateWithoutInitializing("wgs"), m_graph); + // condense the graph + nnz_graph = 0; + ap_graph(0) = 0; + for (size_type i = 0; i < m; i += blk_size) { + for (size_type k = ap(i); k < ap(i+1); k++) { + if (aj(k)%blk_size == 0) { + aj_graph(nnz_graph) = aj(k)/blk_size; + nnz_graph++; + } + aw_graph(i/blk_size) = blk_size; + ap_graph((i/blk_size)+1) = nnz_graph; + } + } + TACHO_TEST_FOR_EXCEPTION((nnz != blk_size*blk_size * nnz_graph), + std::logic_error, "Failed to condense graph"); + return analyze(m, ap, aj, m_graph, ap_graph, aj_graph, aw_graph, duplicate); + } else { + return analyze(m, ap, aj, duplicate); + } + } + int initialize(); int factorize(const value_type_array &ax); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp index bf5e720265ee..605130e14fa9 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp @@ -26,7 +26,7 @@ Driver::Driver() _h_perm(), _peri(), _h_peri(), _m_graph(0), _nnz_graph(0), _h_ap_graph(), _h_aj_graph(), _h_perm_graph(), _h_peri_graph(), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1), _mb(-1), _nb(-1), _front_update_mode(-1), _levelset(0), _device_level_cut(0), _device_factor_thres(128), - _device_solve_thres(128), _variant(2), _nstreams(16), _max_num_superblocks(-1) {} + _device_solve_thres(128), _variant(2), _nstreams(16), _pivot_tol(0.0), _max_num_superblocks(-1) {} /// /// duplicate the object @@ -157,6 +157,19 @@ template void Driver::setLevelSetOptionNumStr _nstreams = nstreams; } +template void Driver::setPivotTolerance(const mag_type pivot_tol) { + _pivot_tol = pivot_tol; +} + +template void Driver::useNoPivotTolerance() { + _pivot_tol = 0.0; +} + +template void Driver::useDefaultPivotTolerance() { + using arith_traits = ArithTraits; + _pivot_tol = sqrt(arith_traits::epsilon()); +} + /// /// get interface /// @@ -373,7 +386,7 @@ template int Driver::factorize(const value_ty if (_m < _small_problem_thres) { factorize_small_host(ax); } else { - _N->factorize(ax, _verbose); + _N->factorize(ax, _pivot_tol, _verbose); } return 0; } @@ -541,7 +554,7 @@ double Driver::computeRelativeResidual(const value_type_array &ax, const CrsMatrixBase A; A.setExternalMatrix(_m, _m, _nnz, _ap, _aj, ax); - return Tacho::computeRelativeResidual(A, x, b); + return Tacho::computeRelativeResidual(A, x, b, _verbose); } template diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp index 9d48cd14fb96..a4a7c2948e46 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp @@ -67,6 +67,11 @@ class GraphTools { _perm(i) = i; _peri(i) = i; } + if (verbose) { + printf("Summary: GraphTools (Default)\n"); + printf("=============================\n"); + printf( " Use Natural Ordering\n\n" ); + } } ordinal_type_array PermVector() const { return _perm; } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp index a85ef651cc4a..a475f729f38a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp @@ -13,7 +13,7 @@ #include "Tacho_Util.hpp" -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) #include "Tacho_GraphTools_Metis.hpp" namespace Tacho { @@ -39,8 +39,15 @@ GraphTools_Metis::GraphTools_Metis(const Graph &g) { for (ordinal_type i = 0; i < static_cast(_adjncy.extent(0)); ++i) _adjncy(i) = g_col_idx(i); +#if defined(TACHO_HAVE_METIS) + _algo = 2; METIS_SetDefaultOptions(_options); _options[METIS_OPTION_NUMBERING] = 0; +#elif defined(TACHO_HAVE_TRILINOS_SS) + _algo = 1; +#else + _algo = 0; +#endif _perm_t = idx_t_array(do_not_initialize_tag("idx_t_perm"), _nvts); _peri_t = idx_t_array(do_not_initialize_tag("idx_t_peri"), _nvts); @@ -52,7 +59,12 @@ GraphTools_Metis::GraphTools_Metis(const Graph &g) { GraphTools_Metis::~GraphTools_Metis() {} void GraphTools_Metis::setVerbose(const bool verbose) { _verbose = verbose; } -void GraphTools_Metis::setOption(const int id, const idx_t value) { _options[id] = value; } +void GraphTools_Metis::setOption(const int id, const idx_t value) { +#if defined(TACHO_HAVE_METIS) + _options[id] = value; +#endif +} +void GraphTools_Metis::setAlgorithm(const int algo) { _algo = algo; } /// /// reorder by amd @@ -81,13 +93,12 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { Kokkos::Timer timer; double t_metis = 0; - int algo = 2; - if (algo == 0) { + if (_algo == 0) { for (ordinal_type i = 0; i < _nvts; ++i) { _perm(i) = i; _peri(i) = i; } - } else if (algo == 1) { + } else if (_algo == 1) { int ierr = 0; double amd_info[TRILINOS_AMD_INFO]; @@ -100,8 +111,10 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { _peri(_perm(i)) = i; } - TACHO_TEST_FOR_EXCEPTION(ierr != METIS_OK, std::runtime_error, "Failed in trilinos_amd"); + // ierr != TRILINOS_AMD_OK && ierr != TRILINOS_AMD_OK_BUT_JUMBLED + TACHO_TEST_FOR_EXCEPTION(ierr < TRILINOS_AMD_OK, std::runtime_error, "Failed in trilinos_amd"); } else { +#if defined(TACHO_HAVE_METIS) int ierr = 0; idx_t *xadj = (idx_t *)_xadj.data(); @@ -121,11 +134,19 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { } TACHO_TEST_FOR_EXCEPTION(ierr != METIS_OK, std::runtime_error, "Failed in METIS_NodeND"); +#else + TACHO_TEST_FOR_EXCEPTION(true, std::runtime_error, "METIS is not enabled"); +#endif } _is_ordered = true; if (verbose) { - printf("Summary: GraphTools (Metis)\n"); + if (_algo == 0) + printf("Summary: GraphTools (Natural)\n"); + else if (_algo == 1) + printf("Summary: GraphTools (AMD)\n"); + else + printf("Summary: GraphTools (Metis)\n"); printf("===========================\n"); switch (verbose) { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp index e3dd1856e601..87119b84de0f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp @@ -16,11 +16,12 @@ #include "Tacho_Util.hpp" -#if defined(TACHO_HAVE_METIS) #include "Tacho_Graph.hpp" #include "trilinos_amd.h" -#include "metis.h" +#if defined(TACHO_HAVE_METIS) + #include "metis.h" +#endif namespace Tacho { @@ -28,6 +29,9 @@ class GraphTools_Metis { public: typedef typename UseThisDevice::type host_device_type; + #if !defined(TACHO_HAVE_METIS) + typedef ordinal_type idx_t; + #endif typedef Kokkos::View idx_t_array; typedef Kokkos::View ordinal_type_array; @@ -36,7 +40,10 @@ class GraphTools_Metis { idx_t _nvts; idx_t_array _xadj, _adjncy, _vwgt; + int _algo; + #if defined(TACHO_HAVE_METIS) idx_t _options[METIS_NOPTIONS]; + #endif // metis output idx_t_array _perm_t, _peri_t; @@ -61,6 +68,7 @@ class GraphTools_Metis { void setVerbose(const bool verbose); void setOption(const int id, const idx_t value); + void setAlgorithm(const int algo); template ordering_type amd_order(ordering_type n, const ordering_type *xadj, @@ -82,4 +90,3 @@ class GraphTools_Metis { } // namespace Tacho #endif -#endif diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp index b30c0c85c34f..26f4c4c202c6 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp @@ -25,7 +25,6 @@ template <> struct LU { template KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const ViewTypeA &A, const ViewTypeP &P) { typedef typename ViewTypeA::non_const_value_type value_type; - // typedef typename ViewTypeP::non_const_value_type p_value_type; static_assert(ViewTypeA::rank == 2, "A is not rank 2 view."); static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); @@ -41,6 +40,24 @@ template <> struct LU { return r_val; } + template + KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const double tol, const ViewTypeA &A, const ViewTypeP &P) { + typedef typename ViewTypeA::non_const_value_type value_type; + + static_assert(ViewTypeA::rank == 2, "A is not rank 2 view."); + static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); + + TACHO_TEST_FOR_ABORT(P.extent(0) < 4 * A.extent(0), "P should be 4*A.extent(0) ."); + + int r_val(0); + const ordinal_type m = A.extent(0), n = A.extent(1); + if (m > 0 && n > 0) { + /// factorize LU + LapackTeam::getrf(member, tol, m, n, A.data(), A.stride_1(), P.data(), &r_val); + } + return r_val; + } + template KOKKOS_INLINE_FUNCTION static int modify(const MemberType &member, const ordinal_type m, const ViewTypeP &P) { static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp index cde52b82693a..939ff6f240d8 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp @@ -231,13 +231,13 @@ template struct LapackTeam { template static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const int m, const int n, T *KOKKOS_RESTRICT A, const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { + *info = 0; if (m <= 0 || n <= 0) return; using arith_traits = ArithTraits; using mag_type = typename arith_traits::mag_type; - - const T zero(0); + const mag_type zero(0); const int as0 = 1; for (int p = 0; p < m; ++p) { const int iend = m - p - 1, jend = n - p - 1; @@ -248,8 +248,9 @@ template struct LapackTeam { *KOKKOS_RESTRICT a12 = A + (p) * as0 + (p + 1) * as1, *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; + int idx(0); + mag_type val(0.0); { - int idx(0); using reducer_value_type = typename Kokkos::MaxLoc::value_type; reducer_value_type value; Kokkos::MaxLoc reducer_value(value); @@ -265,10 +266,11 @@ template struct LapackTeam { reducer_value); member.team_barrier(); idx = value.loc; + val = value.val; /// pivot Kokkos::single(Kokkos::PerThread(member), [&]() { - if (*info == 0 && *alpha11 == zero) { + if (*info == 0 && val == zero) { *info = 1+p; } ipiv[p] = p + idx + 1; @@ -279,9 +281,74 @@ template struct LapackTeam { member.team_barrier(); } } - + const T alpha = *alpha11; // swapped, so contains new pivot + if(val != zero) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); + member.team_barrier(); + } + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, jend), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, iend), + [&](const int &i) { A22[i * as0 + j * as1] -= a21[i * as0] * a12[j * as1]; }); + }); member.team_barrier(); - const T alpha = *alpha11; + } + } + + template + static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const double tol, const int m, const int n, T *KOKKOS_RESTRICT A, + const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { + *info = 0; + if (m <= 0 || n <= 0) + return; + + using arith_traits = ArithTraits; + using mag_type = typename arith_traits::mag_type; + const mag_type zero(0); + //const mag_type tol = sqrt(arith_traits::epsilon()); + const int as0 = 1; + for (int p = 0; p < m; ++p) { + const int iend = m - p - 1, jend = n - p - 1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, // as0 & as1 are leading dimension for rows & cols + *KOKKOS_RESTRICT AB = A + (p) * as0, + *KOKKOS_RESTRICT ABR = alpha11, + *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p) * as1, + *KOKKOS_RESTRICT a12 = A + (p) * as0 + (p + 1) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; + + int idx(0); + mag_type val(0.0); + { + using reducer_value_type = typename Kokkos::MaxLoc::value_type; + reducer_value_type value; + Kokkos::MaxLoc reducer_value(value); + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(member, 1 + iend), + [&](const int &i, reducer_value_type &update) { + const mag_type val = arith_traits::abs(ABR[i * as0]); + if (val > update.val) { + update.val = val; + update.loc = i; + } + }, + reducer_value); + member.team_barrier(); + idx = value.loc; + val = value.val; + + /// pivot + Kokkos::single(Kokkos::PerThread(member), [&]() { + if (val < tol) { + ABR[idx * as0] = (arith_traits::real(ABR[idx * as0]) < zero ? -T(tol) : T(tol)); + } + ipiv[p] = p + idx + 1; + }); + if (idx) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), + [&](const int &j) { swap(AB[j * as1], AB[idx * as0 + j * as1]); }); + member.team_barrier(); + } + } + const T alpha = *alpha11; // swapped, so contains new pivot Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, jend), [&](const int &j) { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp index 312c2bfcefd9..5430e789a462 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp @@ -24,6 +24,7 @@ namespace Tacho { template class NumericToolsBase { public: using value_type = ValueType; + using mag_type = typename ArithTraits::mag_type; using device_type = DeviceType; using exec_space = typename device_type::execution_space; using exec_memory_space = typename device_type::memory_space; @@ -243,7 +244,7 @@ template class NumericToolsBase { } } - inline virtual void factorize(const value_type_array &ax, const ordinal_type verbose = 0) { + inline virtual void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "The function should be overriden by derived classes"); } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 18897036922a..334d514821b7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -88,6 +88,7 @@ #endif #endif + namespace Tacho { template @@ -112,6 +113,7 @@ class NumericToolsLevelSet : public NumericToolsBase { using typename base_type::supernode_info_type; using typename base_type::supernode_type_array_host; using typename base_type::value_type; + using typename base_type::mag_type; using typename base_type::int_type_array; using typename base_type::value_type_array; using typename base_type::value_type_matrix; @@ -3669,7 +3671,7 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const ordinal_type &i) { buf_solve_nrhs_ptr(i) = nrhs * buf_solve_ptr(i); }); Kokkos::deep_copy(_h_buf_solve_nrhs_ptr, _buf_solve_nrhs_ptr); - _nrhs = nrhs; + _nrhs = nrhs; } } } @@ -4204,7 +4206,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void factorizeLU(const value_type_array &ax, const ordinal_type verbose) { + inline void factorizeLU(const value_type_array &ax, const mag_type pivot_tol, const ordinal_type verbose) { constexpr bool is_host = std::is_same::value; Kokkos::Timer timer; Kokkos::Timer tick; @@ -4278,7 +4280,12 @@ class NumericToolsLevelSet : public NumericToolsBase { team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, &rval); - + if (pivot_tol > 0.0) { + using arith_traits = ArithTraits; + using mag_type = typename arith_traits::mag_type; + const mag_type tol = sqrt(arith_traits::epsilon()); + functor.setDiagPertubationTol(pivot_tol); + } // get max vector length const ordinal_type vmax = policy_factor.vector_length_max(); { @@ -4333,7 +4340,9 @@ class NumericToolsLevelSet : public NumericToolsBase { if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (team) returns non-zero error code."); } - + if (_status != 0) { + TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); + } Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { Kokkos::fence(); time_update += tick.seconds(); @@ -4564,7 +4573,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void factorize(const value_type_array &ax, const ordinal_type verbose = 0) override { + inline void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) override { Kokkos::deep_copy(_superpanel_buf, value_type(0)); switch (this->getSolutionMethod()) { case 1: { /// Cholesky @@ -4600,7 +4609,7 @@ class NumericToolsLevelSet : public NumericToolsBase { track_alloc(_piv.span() * sizeof(ordinal_type)); } } - factorizeLU(ax, verbose); + factorizeLU(ax, pivot_tol, verbose); break; } default: { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp index 584930b56525..86b65e7ef78f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp @@ -45,6 +45,7 @@ class NumericToolsSerial : public NumericToolsBase { using typename base_type::ordinal_type_array; using typename base_type::ordinal_type_array_host; using typename base_type::size_type_array; + using typename base_type::mag_type; using typename base_type::value_type; using typename base_type::value_type_array; using typename base_type::value_type_matrix; @@ -475,7 +476,7 @@ class NumericToolsSerial : public NumericToolsBase { /// /// main interface /// - inline void factorize(const value_type_array &ax, const ordinal_type verbose = 0) override { + inline void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) override { { const bool test = !std::is_same::value; TACHO_TEST_FOR_EXCEPTION(test, std::logic_error, "Serial interface works on host device only"); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp index 3ad435b8e853..33caa7532fb0 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp @@ -34,6 +34,9 @@ template struct TeamFunctor_FactorizeLU { using value_type_array = typename supernode_info_type::value_type_array; using value_type_matrix = typename supernode_info_type::value_type_matrix; + using arith_traits = ArithTraits; + using mag_type = typename arith_traits::mag_type; + private: supernode_info_type _info; ordinal_type_array _compute_mode, _level_sids; @@ -44,6 +47,7 @@ template struct TeamFunctor_FactorizeLU { size_type_array _buf_ptr; value_type_array _buf; + mag_type _tol; int *_rval; public: @@ -54,7 +58,8 @@ template struct TeamFunctor_FactorizeLU { TeamFunctor_FactorizeLU(const supernode_info_type &info, const ordinal_type_array &compute_mode, const ordinal_type_array &level_sids, const ordinal_type_array &piv, const value_type_array buf, int *rval) - : _info(info), _compute_mode(compute_mode), _level_sids(level_sids), _piv(piv), _buf(buf), _rval(rval) {} + : _info(info), _compute_mode(compute_mode), _level_sids(level_sids), _piv(piv), _buf(buf), + _tol(0.0), _rval(rval) {} inline void setRange(const ordinal_type pbeg, const ordinal_type pend) { _pbeg = pbeg; @@ -62,6 +67,7 @@ template struct TeamFunctor_FactorizeLU { } inline void setBufferPtr(const size_type_array &buf_ptr) { _buf_ptr = buf_ptr; } + inline void setDiagPertubationTol(const mag_type tol) { _tol = tol; } /// /// Main functions @@ -78,7 +84,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); @@ -117,7 +126,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); @@ -178,7 +190,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); From 9990b4483b333331165a87d141dff32f3054ce61 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 8 Nov 2024 21:49:56 -0700 Subject: [PATCH 02/50] Tacho : compiler warnings Signed-off-by: iyamazaki Tacho : compiler warnings Signed-off-by: iyamazaki --- .../shylu/shylu_node/tacho/src/Tacho_Driver.hpp | 8 ++++---- .../tacho/src/impl/Tacho_Lapack_Team.hpp | 12 ++++++------ .../tacho/src/impl/Tacho_NumericTools_LevelSet.hpp | 3 --- .../src/impl/Tacho_TeamFunctor_ExtractCRS.hpp | 14 +++++++------- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp index a8da49d93806..17f871051458 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp @@ -393,9 +393,9 @@ template struct Driver { if (blk_size > 1) { //condense graph before calling analyze const size_type nnz = ap(m); - size_type m_graph = m / blk_size; + ordinal_type m_graph = m / blk_size; size_type nnz_graph = nnz / (blk_size*blk_size); - TACHO_TEST_FOR_EXCEPTION((m != blk_size * m_graph || nnz != blk_size*blk_size * nnz_graph), + TACHO_TEST_FOR_EXCEPTION((m != blk_size * m_graph || nnz != size_type(blk_size*blk_size) * nnz_graph), std::logic_error, "Failed to initialize the condensed graph"); size_type_array_host ap_graph @@ -407,7 +407,7 @@ template struct Driver { // condense the graph nnz_graph = 0; ap_graph(0) = 0; - for (size_type i = 0; i < m; i += blk_size) { + for (ordinal_type i = 0; i < m; i += blk_size) { for (size_type k = ap(i); k < ap(i+1); k++) { if (aj(k)%blk_size == 0) { aj_graph(nnz_graph) = aj(k)/blk_size; @@ -417,7 +417,7 @@ template struct Driver { ap_graph((i/blk_size)+1) = nnz_graph; } } - TACHO_TEST_FOR_EXCEPTION((nnz != blk_size*blk_size * nnz_graph), + TACHO_TEST_FOR_EXCEPTION((nnz != size_type(blk_size*blk_size) * nnz_graph), std::logic_error, "Failed to condense graph"); return analyze(m, ap, aj, m_graph, ap_graph, aj_graph, aw_graph, duplicate); } else { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp index 939ff6f240d8..f7308a444b94 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp @@ -257,9 +257,9 @@ template struct LapackTeam { Kokkos::parallel_reduce( Kokkos::TeamVectorRange(member, 1 + iend), [&](const int &i, reducer_value_type &update) { - const mag_type val = arith_traits::abs(ABR[i * as0]); - if (val > update.val) { - update.val = val; + const mag_type val_i = arith_traits::abs(ABR[i * as0]); + if (val_i > update.val) { + update.val = val_i; update.loc = i; } }, @@ -324,9 +324,9 @@ template struct LapackTeam { Kokkos::parallel_reduce( Kokkos::TeamVectorRange(member, 1 + iend), [&](const int &i, reducer_value_type &update) { - const mag_type val = arith_traits::abs(ABR[i * as0]); - if (val > update.val) { - update.val = val; + const mag_type val_i = arith_traits::abs(ABR[i * as0]); + if (val_i > update.val) { + update.val = val_i; update.loc = i; } }, diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 334d514821b7..535b117e4c9a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -4281,9 +4281,6 @@ class NumericToolsLevelSet : public NumericToolsBase { team_policy_update policy_update(1, 1, 1); functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, &rval); if (pivot_tol > 0.0) { - using arith_traits = ArithTraits; - using mag_type = typename arith_traits::mag_type; - const mag_type tol = sqrt(arith_traits::epsilon()); functor.setDiagPertubationTol(pivot_tol); } // get max vector length diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp index 3970b8f9c213..5728d40b5f2f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp @@ -192,7 +192,7 @@ template struct TeamFunctor_ExtractCrs { [&](const int& i) { // diagonal block ordinal_type j; - for (ordinal_type j = i; j < s.m; j++) { + for (j = i; j < s.m; j++) { if (AT(i,j) != zero) { int nnz = _rowptr[i+offm]; _colind[nnz] = j+offm; @@ -202,8 +202,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonal blocksa j = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AT(i,j) != zero) { int nnz = _rowptr[i+offm]; _colind[nnz] = _info.gid_colidx(k+offn); @@ -262,8 +262,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonals (each thread extract col, needing atomic-add) ordinal_type i = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AL(i, j) != zero) { ordinal_type gid_i = _info.gid_colidx(k+offn); Kokkos::atomic_add(&(_rowptr[1+gid_i]), 1); @@ -329,8 +329,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonals (each thread extract col, needing atomic-add) ordinal_type i = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AL(i, j) != zero) { ordinal_type gid_i = _info.gid_colidx(k+offn); ordinal_type nnz = Kokkos::atomic_fetch_add(&(_rowptr[gid_i]), 1); From 7cd69d1ee6fce4048677dc582fd462cc33643dfb Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 13 Nov 2024 17:33:15 -0700 Subject: [PATCH 03/50] Tacho : compile error with OpenMP (tol is used only by Team) Signed-off-by: iyamazaki --- .../shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp | 1 - .../shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp | 6 ++++++ .../shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp b/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp index 3e0fbdb81231..18b413f02d96 100644 --- a/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp +++ b/packages/shylu/shylu_dd/frosch/test/Thyra_Xpetra_Laplace/main.cpp @@ -270,7 +270,6 @@ int main(int argc, char *argv[]) } else { assert(false); } - writeMM("Laplace.mtx",KMonolithic); RCP > xSolution = MultiVectorFactory::Build(KMonolithic->getMap(),1); RCP > xRightHandSide = MultiVectorFactory::Build(KMonolithic->getMap(),1); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp index 32f48d3abc48..6304a7c84ec3 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp @@ -61,6 +61,12 @@ template <> struct LU { } } + template + KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const double /*tol*/, const ViewTypeA &A, const ViewTypeP &P) { + // tol is not used, for now + return invoke(member, A, P); + } + template inline static int modify(const ordinal_type m, const ViewTypeP &P) { static constexpr bool runOnHost = run_tacho_on_host_v; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp index b0fa4c8d3885..e177dfa7c9c7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp @@ -61,6 +61,12 @@ template <> struct LU { } } + template + inline static int invoke(MemberType &member, const double /*tol*/, const ViewTypeA &A, const ViewTypeP &P) { + // tol is not used, for now + return invoke(member, A, P); + } + template inline static int modify(const ordinal_type m, const ViewTypeP &P) { static constexpr bool runOnHost = run_tacho_on_host_v; From ead5778b2b3508b31cfd26f8a70365142d854ca4 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 20 Nov 2024 14:40:11 -0700 Subject: [PATCH 04/50] Tacho : check for # of streams > 0 Signed-off-by: iyamazaki --- .../tacho/src/impl/Tacho_NumericTools_LevelSet.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 535b117e4c9a..4da7fc19dca7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -410,7 +410,7 @@ class NumericToolsLevelSet : public NumericToolsBase { /// initialization / release /// inline void initialize(const ordinal_type device_level_cut, const ordinal_type device_factorize_thres, - const ordinal_type device_solve_thres, const int nstreams = 1, const ordinal_type verbose = 0) { + const ordinal_type device_solve_thres, const int nstreams_in = 1, const ordinal_type verbose = 0) { stat_level.n_device_factorize = 0; stat_level.n_device_solve = 0; stat_level.n_team_factorize = 0; @@ -419,6 +419,8 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::Timer timer; timer.reset(); + // # of streams needs to be at least 1 + const int nstreams = max(1, nstreams_in); /// /// level data structure @@ -792,6 +794,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } inline void createStream(const ordinal_type nstreams, const ordinal_type verbose = 0) { + // # of streams needs to be at least 1 + if (nstreams <= 0) return; #if defined(KOKKOS_ENABLE_CUDA) _nstreams = nstreams; if (_streams.size() == size_t(nstreams)) return; From c957dc8f218fcdc9617f053e26054c0c60eda74a Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 23 Jul 2024 12:54:29 -0600 Subject: [PATCH 05/50] MueLu: Set "aggregation: deterministic" in several unit tests Signed-off-by: Christian Glusa --- packages/muelu/test/unit_tests/Aggregates.cpp | 2 ++ packages/muelu/test/unit_tests/Hierarchy.cpp | 2 ++ packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp | 1 + 3 files changed, 5 insertions(+) diff --git a/packages/muelu/test/unit_tests/Aggregates.cpp b/packages/muelu/test/unit_tests/Aggregates.cpp index 48020116bbda..444c6c297b74 100644 --- a/packages/muelu/test/unit_tests/Aggregates.cpp +++ b/packages/muelu/test/unit_tests/Aggregates.cpp @@ -75,6 +75,7 @@ class AggregateGenerator { aggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(0)); aggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); aggFact->SetParameter("aggregation: allow user-specified singletons", Teuchos::ParameterEntry(true)); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 1", Teuchos::ParameterEntry(bPhase1)); aggFact->SetParameter("aggregation: enable phase 2a", Teuchos::ParameterEntry(bPhase2a)); @@ -155,6 +156,7 @@ class AggregateGenerator { aggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(3)); aggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(0)); aggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 1", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 2a", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 2b", Teuchos::ParameterEntry(true)); diff --git a/packages/muelu/test/unit_tests/Hierarchy.cpp b/packages/muelu/test/unit_tests/Hierarchy.cpp index 8ccea6f393d0..3e4e2c55e1c7 100644 --- a/packages/muelu/test/unit_tests/Hierarchy.cpp +++ b/packages/muelu/test/unit_tests/Hierarchy.cpp @@ -234,6 +234,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Hierarchy, Iterate, Scalar, LocalOrdinal, Glob UncoupledAggFact->SetMinNodesPerAggregate(3); UncoupledAggFact->SetMaxNeighAlreadySelected(0); UncoupledAggFact->SetOrdering("natural"); + UncoupledAggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); RCP cdFact; RCP TentPFact = rcp(new TentativePFactory()); @@ -443,6 +444,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Hierarchy, IterateWithImplicitRestriction, Sca UncoupledAggFact->SetMinNodesPerAggregate(3); UncoupledAggFact->SetMaxNeighAlreadySelected(0); UncoupledAggFact->SetOrdering("natural"); + UncoupledAggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); RCP cdFact; RCP TentPFact = rcp(new TentativePFactory()); diff --git a/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp b/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp index 983a132cf14c..3b3768199dca 100644 --- a/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp +++ b/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp @@ -108,6 +108,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(UncoupledAggregationFactory, Build_ML, Scalar, RCP aggFact = rcp(new UncoupledAggregationFactory()); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); // Test the ML style options aggFact->SetParameter("aggregation: match ML phase2a", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: match ML phase2b", Teuchos::ParameterEntry(true)); From 9fa67582fd4b9ae6cf0166a4821119436b17b71c Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Thu, 5 Sep 2024 15:48:46 -0600 Subject: [PATCH 06/50] MueLu: Fix bug in GetMatrixMaxMinusOffDiagonal Signed-off-by: Christian Glusa --- .../MueLu_CoalesceDropFactory_def.hpp | 2 +- .../src/Utils/MueLu_UtilitiesBase_decl.hpp | 4 +-- .../src/Utils/MueLu_UtilitiesBase_def.hpp | 26 ++++++++----------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index 6c8e857d6daf..11aa186788b3 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -439,7 +439,7 @@ void CoalesceDropFactory::Build(Level using MT = typename STS::magnitudeType; RCP ghostedDiag; ArrayRCP ghostedDiagVals; - ArrayRCP negMaxOffDiagonal; + ArrayRCP negMaxOffDiagonal; // RS style needs the max negative off-diagonal, SA style needs the diagonal if (useSignedClassicalRS) { if (ghostedBlockNumber.is_null()) { diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 8aeac791865d..ea6b672d36fe 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -127,9 +127,9 @@ class UtilitiesBase { * @ret: vector containing max_{i\not=k}(-a_ik) */ - static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); + static Teuchos::RCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); - static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); + static Teuchos::RCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); /*! @brief Return vector containing inverse of input vector * diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp index 6e181415b09b..03d855221f0a 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp @@ -584,12 +584,12 @@ UtilitiesBase:: } template -Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> +Teuchos::RCP> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A) { // Get/Create distributed objects RCP rowMap = A.getRowMap(); - auto diag = Xpetra::VectorFactory::Build(rowMap, false); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); // Implement using Kokkos using local_vector_type = typename Vector::dual_view_type::t_dev_um; @@ -597,10 +597,7 @@ UtilitiesBase:: using execution_space = typename local_vector_type::execution_space; using values_type = typename local_matrix_type::values_type; using scalar_type = typename values_type::non_const_value_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; using KAT_S = typename Kokkos::ArithTraits; - using KAT_M = typename Kokkos::ArithTraits; - using size_type = typename local_matrix_type::non_const_size_type; auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); auto local_mat_dev = A.getLocalMatrixDevice(); @@ -609,11 +606,12 @@ UtilitiesBase:: Kokkos::parallel_for( "GetMatrixMaxMinusOffDiagonal", my_policy, KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { - auto mymax = KAT_M::zero(); - auto row = local_mat_dev.row(rowIdx); + auto mymax = KAT_S::zero(); + auto row = local_mat_dev.rowConst(rowIdx); for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { if (rowIdx != row.colidx(entryIdx)) { - mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx))) + mymax = -KAT_S::real(row.value(entryIdx)); } } diag_dev(rowIdx, 0) = mymax; @@ -623,14 +621,14 @@ UtilitiesBase:: } template -Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> +Teuchos::RCP> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber) { TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map."); // Get/Create distributed objects RCP rowMap = A.getRowMap(); - auto diag = Xpetra::VectorFactory::Build(rowMap, false); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); // Implement using Kokkos using local_vector_type = typename Vector::dual_view_type::t_dev_um; @@ -638,10 +636,7 @@ UtilitiesBase:: using execution_space = typename local_vector_type::execution_space; using values_type = typename local_matrix_type::values_type; using scalar_type = typename values_type::non_const_value_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; using KAT_S = typename Kokkos::ArithTraits; - using KAT_M = typename Kokkos::ArithTraits; - using size_type = typename local_matrix_type::non_const_size_type; auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); auto local_mat_dev = A.getLocalMatrixDevice(); @@ -651,11 +646,12 @@ UtilitiesBase:: Kokkos::parallel_for( "GetMatrixMaxMinusOffDiagonal", my_policy, KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { - auto mymax = KAT_M::zero(); + auto mymax = KAT_S::zero(); auto row = local_mat_dev.row(rowIdx); for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) { - mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx))) + mymax = -KAT_S::real(row.value(entryIdx)); } } diag_dev(rowIdx, 0) = mymax; From 39b93498209bc83dd357b187973acbe4a2e34367 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 23 Jul 2024 14:43:32 -0600 Subject: [PATCH 07/50] MueLu: Refactor CoalesceDropFactory_kokkos Signed-off-by: Christian Glusa --- .../MueLu_BoundaryDetection.hpp | 243 +++ .../MueLu_ClassicalDropping.hpp | 176 ++ .../MueLu_CoalesceDropFactory_kokkos_decl.hpp | 10 +- .../MueLu_CoalesceDropFactory_kokkos_def.hpp | 1693 +++++++++-------- .../MatrixTransformation/MueLu_CutDrop.hpp | 515 +++++ .../MueLu_DistanceLaplacianDropping.hpp | 178 ++ .../MueLu_DroppingCommon.hpp | 369 ++++ .../MueLu_MatrixConstruction.hpp | 1029 ++++++++++ .../CoalesceDropFactory_kokkos.cpp | 10 + 9 files changed, 3468 insertions(+), 755 deletions(-) create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp create mode 100644 packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp new file mode 100644 index 000000000000..a000343db43a --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp @@ -0,0 +1,243 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_BOUNDARYDETECTION_HPP +#define MUELU_BOUNDARYDETECTION_HPP + +#include +#include +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "MueLu_LWGraph_kokkos.hpp" +#include "MueLu_Utilities.hpp" +#include "Teuchos_RCP.hpp" +#include "Xpetra_ConfigDefs.hpp" +#include "Xpetra_CrsGraph.hpp" +#include "Xpetra_MultiVector.hpp" + +namespace MueLu::BoundaryDetection { + +// These functors all assume that the boundaryNodes view has been initialized to false. + +// Marks rows as Dirichlet based on value threshold and number of off-diagonal entries. +template +class PointDirichletFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + magnitudeType dirichletThreshold; + local_ordinal_type dirichletNonzeroThreshold; + + public: + PointDirichletFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType dirichletThreshold_, local_ordinal_type dirichletNonzeroThreshold_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , dirichletThreshold(dirichletThreshold_) + , dirichletNonzeroThreshold(dirichletNonzeroThreshold_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + local_ordinal_type nnz = 0; + for (local_ordinal_type k = 0; k < row.length; ++k) { + local_ordinal_type clid = row.colidx(k); + scalar_type val = row.value(k); + if ((rlid != static_cast(clid)) && (ATS::magnitude(val) > dirichletThreshold)) { + ++nnz; + if (nnz == dirichletNonzeroThreshold) { + return; + } + } + } + boundaryNodes(rlid) = true; + } +}; + +// Marks rows as Dirichlet based on abs(rowsum) and abs(diag). +template +class RowSumFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using magATS = Kokkos::ArithTraits; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + magnitudeType rowSumTol; + + public: + RowSumFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType rowSumTol_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , rowSumTol(rowSumTol_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + scalar_type rowsum = ATS::zero(); + scalar_type diagval = ATS::zero(); + auto row = A.rowConst(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + auto val = row.value(k); + if (rlid == static_cast(clid)) + diagval = val; + rowsum += val; + } + if (ATS::magnitude(rowsum) > ATS::magnitude(diagval) * rowSumTol) { + boundaryNodes(rlid) = true; + } + } +}; + +template +class BoundaryFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + local_matrix_type A; + functor_type_0 functor0; + functor_type_1 functor1; + functor_type_2 functor2; + functor_type_3 functor3; + + public: + BoundaryFunctor(local_matrix_type& A_) + : A(A_) + , functor0(0) + , functor1(0) + , functor2(0) + , functor3(0) {} + + BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_) + : A(A_) + , functor0(functor0_) + , functor1(0) + , functor2(0) + , functor3(0) {} + + BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_) + : A(A_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(0) + , functor3(0) {} + + BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) + : A(A_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(0) {} + + BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) + : A(A_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) {} + + KOKKOS_INLINE_FUNCTION + void + operator()(const local_ordinal_type rlid) const { + if constexpr (!std::is_same_v) + functor0(rlid); + if constexpr (!std::is_same_v) + functor1(rlid); + if constexpr (!std::is_same_v) + functor2(rlid); + if constexpr (!std::is_same_v) + functor3(rlid); + } +}; + +// Marks rows as Dirichlet based on value threshold and number of off-diagonal entries. +// Marks blocks as Dirichlet when one row is Dirichlet (useGreedyDirichlet==true) or when all rows are Dirichlet (useGreedyDirichlet==false). +template +class VectorDirichletFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + local_ordinal_type blockSize; + boundary_nodes_view boundaryNodes; + magnitudeType dirichletThreshold; + local_ordinal_type dirichletNonzeroThreshold; + + public: + VectorDirichletFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, boundary_nodes_view boundaryNodes_, magnitudeType dirichletThreshold_, local_ordinal_type dirichletNonzeroThreshold_) + : A(A_) + , blockSize(blockSize_) + , boundaryNodes(boundaryNodes_) + , dirichletThreshold(dirichletThreshold_) + , dirichletNonzeroThreshold(dirichletNonzeroThreshold_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rblid) const { + for (local_ordinal_type rlid = rblid * blockSize; rlid < (rblid + 1) * blockSize; ++rlid) { + auto row = A.rowConst(rlid); + local_ordinal_type nnz = 0; + bool rowIsDirichlet = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + auto val = row.value(k); + if ((rlid != static_cast(clid)) && (ATS::magnitude(val) > dirichletThreshold)) { + ++nnz; + if (nnz == dirichletNonzeroThreshold) { + rowIsDirichlet = false; + break; + } + } + } + if constexpr (useGreedyDirichlet) { + if (rowIsDirichlet) { + boundaryNodes(rblid) = true; + return; + } + } else { + if (!rowIsDirichlet) { + boundaryNodes(rblid) = false; + return; + } + } + } + if constexpr (useGreedyDirichlet) + boundaryNodes(rblid) = false; + else + boundaryNodes(rblid) = true; + } +}; + +} // namespace MueLu::BoundaryDetection + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp new file mode 100644 index 000000000000..b0a7ed2f3554 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp @@ -0,0 +1,176 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_CLASSICALDROPPING_HPP +#define MUELU_CLASSICALDROPPING_HPP + +#include "MueLu_DroppingCommon.hpp" +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Xpetra_Matrix.hpp" +#include "MueLu_Utilities.hpp" + +namespace MueLu::ClassicalDropping { + +template +class AbsDropFunctor { + private: + using matrix_type = Xpetra::Matrix; + using diag_vec_type = Xpetra::MultiVector; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + AbsDropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + auto val = row.value(k); + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +template +class SignedClassicalRSDropFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + SignedClassicalRSDropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + diagVec = Utilities::GetMatrixMaxMinusOffDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + auto neg_aij = -ATS::real(val); + auto max_neg_aik = eps * ATS::real(diag(rlid)); + results(offset + k) = Kokkos::max((neg_aij <= max_neg_aik) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +template +class SignedClassicalSADropFunctor { + private: + using matrix_type = Xpetra::Matrix; + using diag_vec_type = Xpetra::MultiVector; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using mATS = Kokkos::ArithTraits; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + SignedClassicalSADropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + // Construct ghosted matrix diagonal + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + auto val = row.value(k); + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + const bool is_nonpositive = ATS::real(val) <= mATS::zero(); + magnitudeType aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 + if (is_nonpositive) + aij2 = -aij2; + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +} // namespace MueLu::ClassicalDropping + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp index 25da3f2f96bc..e2c1e1abb34d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp @@ -20,7 +20,7 @@ #include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_Level_fwd.hpp" -#include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_LWGraph_kokkos_decl.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_Utilities_fwd.hpp" @@ -102,6 +102,8 @@ class CoalesceDropFactory_kokkos using node_type = Node; private: + using boundary_nodes_type = typename MueLu::LWGraph_kokkos::boundary_nodes_type; + // For compatibility #undef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNames.hpp" @@ -128,6 +130,12 @@ class CoalesceDropFactory_kokkos //@} void Build(Level& currentLevel) const; + + std::tuple, RCP > GetBlockNumberMVs(Level& currentLevel) const; + + std::tuple BuildScalar(Level& currentLevel) const; + + std::tuple BuildVector(Level& currentLevel) const; }; } // namespace MueLu diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index 8696993bde33..4d3d64c43745 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -12,6 +12,7 @@ #include #include +#include #include "Xpetra_Matrix.hpp" @@ -23,435 +24,682 @@ #include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" -#include "MueLu_Utilities.hpp" + +// #define MUELU_COALESCE_DROP_DEBUG 1 + +#include "MueLu_BoundaryDetection.hpp" +#include "MueLu_ClassicalDropping.hpp" +#include "MueLu_CutDrop.hpp" +#include "MueLu_DroppingCommon.hpp" +#include "MueLu_DistanceLaplacianDropping.hpp" +#include "MueLu_MatrixConstruction.hpp" namespace MueLu { -namespace CoalesceDrop_Kokkos_Details { // anonymous +template +RCP CoalesceDropFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("aggregation: row sum drop tol"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); + SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + SET_VALID_ENTRY("aggregation: distance laplacian algo"); + SET_VALID_ENTRY("aggregation: classical algo"); + SET_VALID_ENTRY("aggregation: coloring: localize color graph"); + + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); -template -class ScanFunctor { - public: - ScanFunctor(RowType rows_) - : rows(rows_) {} + SET_VALID_ENTRY("filtered matrix: use root stencil"); + SET_VALID_ENTRY("filtered matrix: use spread lumping"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); + SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); - KOKKOS_INLINE_FUNCTION - void operator()(const LO i, LO& upd, const bool& final) const { - upd += rows(i); - if (final) - rows(i) = upd; - } +#undef SET_VALID_ENTRY + validParamList->set("lightweight wrap", true, "Experimental option for lightweight graph access"); + + // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical")))); + validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("default", "unscaled cut", "scaled cut", "scaled cut symmetric")))); + validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("default", "unscaled cut", "scaled cut", "scaled cut symmetric")))); + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber"); + + return validParamList; +} - private: - RowType rows; -}; - -template -class ClassicalDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - magnitudeType eps; - - public: - ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) - : diag(ghostedDiag) - , eps(threshold) {} - - // Return true if we drop, false if not - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(LO row, LO col, SC val) const { - // We avoid square root by using squared values - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 - - return (aij2 <= eps * eps * aiiajj); +template +void CoalesceDropFactory_kokkos::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList& pL = GetParameterList(); + std::string algo = pL.get("aggregation: drop scheme"); + if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + Input(currentLevel, "Coordinates"); } -}; - -template -class DistanceFunctor { - private: - typedef typename CoordsType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - typedef SC value_type; - - public: - DistanceFunctor(CoordsType coords_) - : coords(coords_) {} - - KOKKOS_INLINE_FUNCTION - magnitudeType distance2(LO row, LO col) const { - SC d = ATS::zero(), s; - for (size_t j = 0; j < coords.extent(1); j++) { - s = coords(row, j) - coords(col, j); - d += s * s; - } - return ATS::magnitude(d); + if (algo == "signed classical sa") + ; + else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { + Input(currentLevel, "BlockNumber"); } +} - private: - CoordsType coords; -}; - -template -class DistanceLaplacianDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) - : diag(ghostedLaplDiag) - , distFunctor(distFunctor_) - , eps(threshold) {} - - // Return true if we drop, false if not - KOKKOS_INLINE_FUNCTION - bool operator()(LO row, LO col, SC /* val */) const { - // We avoid square root by using squared values - - // We ignore incoming value of val as we operate on an auxiliary - // distance Laplacian matrix - typedef typename DistanceFunctor::value_type dSC; - typedef Kokkos::ArithTraits dATS; - auto fval = dATS::one() / distFunctor.distance2(row, col); - - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 - - return (aij2 <= eps * eps * aiiajj); - } +template +void CoalesceDropFactory_kokkos:: + Build(Level& currentLevel) const { + auto A = Get>(currentLevel, "A"); + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); - private: - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - DistanceFunctor distFunctor; - magnitudeType eps; -}; - -template -class ScalarFunctor { - private: - typedef typename MatrixType::StaticCrsGraphType graph_type; - typedef typename graph_type::row_map_type rows_type; - typedef typename graph_type::entries_type cols_type; - typedef typename MatrixType::values_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, - typename rows_type::non_const_type rows_, - typename cols_type::non_const_type colsAux_, - typename vals_type::non_const_type valsAux_, - bool reuseGraph_, bool lumping_, SC /* threshold_ */, - bool aggregationMayCreateDirichlet_) - : A(A_) - , bndNodes(bndNodes_) - , dropFunctor(dropFunctor_) - , rows(rows_) - , colsAux(colsAux_) - , valsAux(valsAux_) - , reuseGraph(reuseGraph_) - , lumping(lumping_) - , aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) { - rowsA = A.graph.row_map; - zero = impl_ATS::zero(); - } + std::tuple results; + if (blkSize == 1) + results = BuildScalar(currentLevel); + else + results = BuildVector(currentLevel); - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& nnz) const { - auto rowView = A.rowConst(row); - auto length = rowView.length; - auto offset = rowsA(row); + if (GetVerbLevel() & Statistics1) { + GlobalOrdinal numDropped = std::get<0>(results); + auto boundaryNodes = std::get<1>(results); - impl_Scalar diag = zero; - LO rownnz = 0; - LO diagID = -1; - for (decltype(length) colID = 0; colID < length; colID++) { - LO col = rowView.colidx(colID); - impl_Scalar val = rowView.value(colID); + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - if ((!bndNodes(row) && !dropFunctor(row, col, rowView.value(colID))) || row == col) { - colsAux(offset + rownnz) = col; + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), + KOKKOS_LAMBDA(const LO i, GO& n) { + if (boundaryNodes(i)) + n++; + }, + numLocalBoundaryNodes); - LO valID = (reuseGraph ? colID : rownnz); - valsAux(offset + valID) = val; - if (row == col) - diagID = valID; + auto comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - rownnz++; + GO numGlobalTotal = A->getGlobalNumEntries(); + GO numGlobalDropped; + MueLu_sumAll(comm, numDropped, numGlobalDropped); - } else { - // Rewrite with zeros (needed for reuseGraph) - valsAux(offset + colID) = zero; - diag += val; - } + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + if (numGlobalTotal != 0) { + GetOStream(Statistics1) << "Number of dropped entries: " + << numGlobalDropped << "/" << numGlobalTotal + << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; } - // How to assert on the device? - // assert(diagIndex != -1); - rows(row + 1) = rownnz; - // if (lumping && diagID != -1) { - if (lumping) { - // Add diag to the diagonal + } +} - // NOTE_KOKKOS: valsAux was allocated with - // ViewAllocateWithoutInitializing. This is not a problem here - // because we explicitly set this value above. - valsAux(offset + diagID) += diag; - } +template +std::tuple>, Teuchos::RCP>> CoalesceDropFactory_kokkos:: + GetBlockNumberMVs(Level& currentLevel) const { + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + RCP ghostedBlockNumber; + GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl; + + // Ghost the column block numbers if we need to + auto A = Get>(currentLevel, "A"); + RCP importer = A->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; + } + return std::make_tuple(BlockNumber, ghostedBlockNumber); +} - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - bndNodes(row) |= (rownnz == 1 && aggregationMayCreateDirichlet); +template +std::tuple::boundary_nodes_type> CoalesceDropFactory_kokkos:: + BuildScalar(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + using MatrixType = Xpetra::CrsMatrix; + using GraphType = Xpetra::CrsGraph; + using local_matrix_type = typename MatrixType::local_matrix_type; + using local_graph_type = typename GraphType::local_graph_type; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + using device_type = typename Node::device_type; + using memory_space = typename device_type::memory_space; + + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + const MT zero = Teuchos::ScalarTraits::zero(); + + auto A = Get>(currentLevel, "A"); - nnz += rownnz; + ////////////////////////////////////////////////////////////////////// + // Process parameterlist + const ParameterList& pL = GetParameterList(); + + // Boundary detection + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + const LocalOrdinal dirichletNonzeroThreshold = 1; + + // Dropping + const std::string algo = pL.get("aggregation: drop scheme"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + MT threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); + bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + // Fill + const bool lumping = pL.get("filtered matrix: use lumping"); + const bool reuseGraph = pL.get("filtered matrix: reuse graph"); + const bool reuseEigenvalue = pL.get("filtered matrix: reuse eigenvalue"); + + const bool useRootStencil = pL.get("filtered matrix: use root stencil"); + const bool useSpreadLumping = pL.get("filtered matrix: use spread lumping"); + TEUCHOS_ASSERT(!useRootStencil); + TEUCHOS_ASSERT(!useSpreadLumping); + + if (algo == "classical") + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + else if (algo == "distance laplacian") + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos))) + TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0"); + + // FIXME: Non-Kokkos implementation does this, but this seems unnecessary. + if (algo == "distance laplacian") + aggregationMayCreateDirichlet = true; + + ////////////////////////////////////////////////////////////////////// + // We perform four sweeps over the rows of A: + // Pass 1: detection of boundary nodes + // Pass 2: diagonal extraction + // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix + // Pass 4: fill of the filtered matrix + // + // Pass 1 and 3 apply a sequence of criteria to each row of the matrix. + + // TODO: We could merge pass 1 and 2. + + auto crsA = rcp_dynamic_cast(A, true)->getCrsMatrix(); + auto lclA = crsA->getLocalMatrixDevice(); + auto range = range_type(0, lclA.numRows()); + + ////////////////////////////////////////////////////////////////////// + // Pass 1: Detect boundary nodes + // + // The following criteria are available: + // - BoundaryDetection::PointDirichletFunctor + // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries + // - BoundaryDetection::RowSumFunctor + // Marks rows as Dirichlet bases on row-sum criterion + + // Dirichlet nodes + auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false + { + SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel); + + // macro that applies boundary detection functors +#define runBoundaryFunctor(...) \ + { \ + auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \ + Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \ } - private: - MatrixType A; - BndViewType bndNodes; - DropFunctorType dropFunctor; - - rows_type rowsA; - - typename rows_type::non_const_type rows; - typename cols_type::non_const_type colsAux; - typename vals_type::non_const_type valsAux; - - bool reuseGraph; - bool lumping; - bool aggregationMayCreateDirichlet; - impl_Scalar zero; -}; - -// collect number nonzeros of blkSize rows in nnz_(row+1) -template -class Stage1aVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - public: - Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) - : kokkosMatrix(kokkosMatrix_) - , nnz(nnz_) - , blkSize(blkSize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& totalnnz) const { - // the following code is more or less what MergeRows is doing - // count nonzero entries in all dof rows associated with node row - LO nodeRowMaxNonZeros = 0; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(row * blkSize + j); - nodeRowMaxNonZeros += rowView.length; + auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + + if (rowSumTol <= 0.) { + runBoundaryFunctor(dirichlet_detection); + } else { + auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol); + runBoundaryFunctor(dirichlet_detection, + apply_rowsum); } - nnz(row + 1) = nodeRowMaxNonZeros; - totalnnz += nodeRowMaxNonZeros; +#undef runBoundaryFunctor } + // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true. + // Otherwise we're now done with it now. + + ////////////////////////////////////////////////////////////////////// + // Pass 2 & 3: Diagonal extraction and determine dropping and construct + // rowptr of filtered matrix + // + // The following criteria are available: + // - Misc::PointwiseDropBoundaryFunctor + // Drop all rows that have been marked as Dirichlet + // - Misc::DropOffRankFunctor + // Drop all entries that are off-rank + // - ClassicalDropping::AbsDropFunctor + // Classical dropping + // - ClassicalDropping::SignedClassicalRSDropFunctor + // Classical RS dropping + // - ClassicalDropping::SignedClassicalSADropFunctor + // Classical signed SA dropping + // - DistanceLaplacian::DropFunctor + // Distance Laplacian dropping + // - Misc::KeepDiagonalFunctor + // Mark diagonal as KEEP + // - Misc::MarkSingletonFunctor + // Mark singletons after dropping as Dirichlet + // - Misc::BlockDiagonalizeFunctor + // Drop coupling between blocks + // + // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms. + + // rowptr of filtered A + auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1); + // Number of nonzeros of filtered A + LocalOrdinal nnz_filtered = 0; + // dropping decisions for each entry + auto results = Kokkos::View("results", lclA.nnz()); // initialized to UNDECIDED + { + SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel); - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType nnz; //< View containing number of nonzeros for current row - blkSizeType blkSize; //< block size (or partial block size in strided maps) -}; - -// build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix -// sort column ids -// translate them into (unique) node ids -// count the node column ids per node row -template -class Stage1bcVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType coldofnnz; //< view containing start and stop indices for subviews - blkSizeType blkSize; //< block size (or partial block size in strided maps) - ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) - Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id - NnzType colnodennz; //< view containing number of column nodes for each node row - BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. - BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. - boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) - - public: - Stage1bcVectorFunctor(MatrixType kokkosMatrix_, - NnzType coldofnnz_, - blkSizeType blkSize_, - ColDofType coldofs_, - Dof2NodeTranslationType dof2node_, - NnzType colnodennz_, - BdryNodeTypeConst dirichletdof_, - BdryNodeType bdrynode_, - boolType usegreedydirichlet_) - : kokkosMatrix(kokkosMatrix_) - , coldofnnz(coldofnnz_) - , blkSize(blkSize_) - , coldofs(coldofs_) - , dof2node(dof2node_) - , colnodennz(colnodennz_) - , dirichletdof(dirichletdof_) - , bdrynode(bdrynode_) - , usegreedydirichlet(usegreedydirichlet_) { - } + std::string functorLabel = "MueLu::CoalesceDrop::CountEntries"; - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode, LO& nnz) const { - LO pos = coldofnnz(rowNode); - if (usegreedydirichlet) { - bdrynode(rowNode) = false; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is Dirichlet - if (dirichletdof(rowNode * blkSize + j)) - bdrynode(rowNode) = true; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos++; + // macro that applied dropping functors +#if !defined(HAVE_MUELU_DEBUG) +#define runCountingFunctor(...) \ + { \ + auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \ + } +#else +#define runCountingFunctor(...) \ + { \ + auto debug = Misc::DebugFunctor(lclA, results); \ + auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \ + } +#endif + + auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results); + + if (threshold != zero) { + auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results); + auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results); + + if (algo == "classical" || algo == "block diagonal classical") { + if (algo == "block diagonal classical") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::AbsDropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(block_diagonalize, + classical_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(block_diagonalize, + classical_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + runCountingFunctor(symmetrize); + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); + } + } else { + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::AbsDropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(classical_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(classical_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + runCountingFunctor(symmetrize); + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); + } } - } - } else { - bdrynode(rowNode) = true; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is not Dirichlet - if (dirichletdof(rowNode * blkSize + j) == false) - bdrynode(rowNode) = false; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos++; + } else if (algo == "signed classical" || algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") { + auto signed_classical_rs_dropping = ClassicalDropping::SignedClassicalRSDropFunctor(*A, threshold, results); + + if (algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (classicalAlgoStr == "default") { + if (aggregationMayCreateDirichlet) { + runCountingFunctor(block_diagonalize, + signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + runCountingFunctor(block_diagonalize, + signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); + } + } else { + if (classicalAlgoStr == "default") { + if (aggregationMayCreateDirichlet) { + runCountingFunctor(signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + runCountingFunctor(signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); + } } - } - } - - // sort coldofs - LO begin = coldofnnz(rowNode); - LO end = coldofnnz(rowNode + 1); - LO n = end - begin; - for (LO i = 0; i < (n - 1); i++) { - for (LO j = 0; j < (n - i - 1); j++) { - if (coldofs(j + begin) > coldofs(j + begin + 1)) { - LO temp = coldofs(j + begin); - coldofs(j + begin) = coldofs(j + begin + 1); - coldofs(j + begin + 1) = temp; + } else if (algo == "signed classical sa") { + if (classicalAlgoStr == "default") { + auto signed_classical_sa_dropping = ClassicalDropping::SignedClassicalSADropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(signed_classical_sa_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + runCountingFunctor(signed_classical_sa_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); } + } else if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + using doubleMultiVector = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; + auto coords = Get>(currentLevel, "Coordinates"); + + auto dist2 = DistanceLaplacian::DistanceFunctor(*A, coords); + + if (algo == "block diagonal distance laplacian") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(block_diagonalize, + dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(block_diagonalize, + dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(block_diagonalize, + drop_boundaries, + cut_drop, + preserve_diagonals); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + runCountingFunctor(symmetrize); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); + } + } else { + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + runCountingFunctor(drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + runCountingFunctor(symmetrize); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); + } + } + } else if (algo == "block diagonal") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + runCountingFunctor(block_diagonalize); + } else { + TEUCHOS_ASSERT(false); } + } else { + Kokkos::deep_copy(results, KEEP); + // FIXME: This seems inconsistent + // runCountingFunctor(drop_boundaries); + auto no_op = Misc::NoOpFunctor(); + runCountingFunctor(no_op); } - size_t cnt = 0; - LO lastNodeID = -1; - for (LO i = 0; i < n; i++) { - LO dofID = coldofs(begin + i); - LO nodeID = dof2node(dofID); - if (nodeID != lastNodeID) { - lastNodeID = nodeID; - coldofs(begin + cnt) = nodeID; - cnt++; - } - } - colnodennz(rowNode + 1) = cnt; - nnz += cnt; - } -}; - -// fill column node id view -template -class Stage1dVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - typedef typename MatrixType::value_type SC; - - private: - ColDofType coldofs; //< view containing mixed node and dof indices (only input) - ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) - ColNodeType colnodes; //< view containing the local node ids associated with columns - ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews - - public: - Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) - : coldofs(coldofs_) - , coldofnnz(coldofnnz_) - , colnodes(colnodes_) - , colnodennz(colnodennz_) { +#undef runCountingFunctor } + GO numDropped = lclA.nnz() - nnz_filtered; + // We now know the number of entries of filtered A and have the final rowptr. + + ////////////////////////////////////////////////////////////////////// + // Pass 4: Create local matrix for filtered A + // + // Dropped entries are optionally lumped to the diagonal. - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode) const { - auto dofbegin = coldofnnz(rowNode); - auto nodebegin = colnodennz(rowNode); - auto nodeend = colnodennz(rowNode + 1); - auto n = nodeend - nodebegin; + RCP filteredA; + RCP graph; + { + SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel); - for (decltype(nodebegin) i = 0; i < n; i++) { - colnodes(nodebegin + i) = coldofs(dofbegin + i); + local_matrix_type lclFilteredA; + local_graph_type lclGraph; + if (reuseGraph) { + filteredA = MatrixFactory::BuildCopy(A); + lclFilteredA = filteredA->getLocalMatrixDevice(); + + auto colidx = entries_type("entries", nnz_filtered); + lclGraph = local_graph_type(colidx, filtered_rowptr); + } else { + auto colidx = entries_type("entries", nnz_filtered); + auto values = values_type("values", nnz_filtered); + lclFilteredA = local_matrix_type("filteredA", + lclA.numRows(), lclA.numCols(), + nnz_filtered, + values, filtered_rowptr, colidx); } - } -}; -} // namespace CoalesceDrop_Kokkos_Details + if (lumping) { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor(lclA, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor(lclA, results, lclFilteredA); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor); + } + } else { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor(lclA, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor(lclA, results, lclFilteredA); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor); + } + } -template -RCP CoalesceDropFactory_kokkos::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); + if (!reuseGraph) + filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()); + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - { - validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("classical", "distance laplacian")))); - } -#undef SET_VALID_ENTRY - validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + if (reuseEigenvalue) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); + } else { + filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - return validParamList; -} + if (!reuseGraph) { + // Use graph of filteredA as graph. + lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice(); + } + graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + } -template -void CoalesceDropFactory_kokkos::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); + LO dofsPerNode = 1; + Set(currentLevel, "DofsPerNode", dofsPerNode); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "A", filteredA); - const ParameterList& pL = GetParameterList(); - if (pL.get("aggregation: drop scheme") == "distance laplacian") - Input(currentLevel, "Coordinates"); + return std::make_tuple(numDropped, boundaryNodes); } template -void CoalesceDropFactory_kokkos:: - Build(Level& currentLevel) const { +std::tuple::boundary_nodes_type> CoalesceDropFactory_kokkos:: + BuildVector(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); + using MatrixType = Xpetra::CrsMatrix; + using GraphType = Xpetra::CrsGraph; + using local_matrix_type = typename MatrixType::local_matrix_type; + using local_graph_type = typename GraphType::local_graph_type; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + using device_type = typename Node::device_type; + using memory_space = typename device_type::memory_space; + typedef Teuchos::ScalarTraits STS; typedef typename STS::magnitudeType MT; const MT zero = Teuchos::ScalarTraits::zero(); @@ -480,276 +728,339 @@ void CoalesceDropFactory_kokkos:: auto amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); - const ParameterList& pL = GetParameterList(); - - // Sanity Checking: ML drop tol scaling is not supported in UncoupledAggregation_Kokkos - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: use ml scaling of drop tol"), std::invalid_argument, "Option: 'aggregation: use ml scaling of drop tol' is not supported in the Kokkos version of CoalesceDroPFactory"); - - std::string algo = pL.get("aggregation: drop scheme"); - - double threshold = pL.get("aggregation: drop tol"); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold - << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - - const typename STS::magnitudeType dirichletThreshold = - STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - - GO numDropped = 0, numTotal = 0; - - RCP graph; - LO dofsPerNode = -1; - - typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; - boundary_nodes_type boundaryNodes; - - RCP filteredA; - if (blkSize == 1 && threshold == zero) { - // Scalar problem without dropping - - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - // Trivial LWGraph construction - graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - - numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; - - filteredA = A; - - } else if (blkSize == 1 && threshold != zero) { - // Scalar problem with dropping - - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - typedef typename Matrix::local_matrix_type local_matrix_type; - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; - typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; - typedef typename local_matrix_type::values_type::non_const_type vals_type; - - LO numRows = A->getLocalNumRows(); - local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); - auto nnzA = kokkosMatrix.nnz(); - auto rowsA = kokkosMatrix.graph.row_map; - - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - - bool reuseGraph = pL.get("filtered matrix: reuse graph"); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + // build a node row map (uniqueMap = non-overlapping) and a node column map + // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation + // stored in the AmalgamationInfo class container contain the local node id + // given a local dof id. The data is calculated in the AmalgamationFactory and + // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) + const RCP uniqueMap = amalInfo->getNodeRowMap(); + const RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? + Array colTranslationArray = *(amalInfo->getColTranslation()); + + Kokkos::View + rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); + Kokkos::View + colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); + + // get number of local nodes + LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); + typedef typename Kokkos::View id_translation_type; + id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); + id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); + Kokkos::deep_copy(rowTranslation, rowTranslationView); + Kokkos::deep_copy(colTranslation, colTranslationView); + + // extract striding information + blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map + LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) + if (A->IsView("stridedMaps") == true) { + const RCP myMap = A->getRowMap("stridedMaps"); + const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value - rows_type rows("FA_rows", numRows + 1); - cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); - vals_type valsAux; - if (reuseGraph) { - SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); - - // Share graph with the original matrix - filteredA = MatrixFactory::Build(A->getCrsGraph()); + ////////////////////////////////////////////////////////////////////// + // Process parameterlist + const ParameterList& pL = GetParameterList(); - // Do a no-op fill-complete - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - filteredA->fillComplete(fillCompleteParams); + // Boundary detection + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + const LocalOrdinal dirichletNonzeroThreshold = 1; + const bool useGreedyDirichlet = pL.get("aggregation: greedy Dirichlet"); + TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems."); + + // Dropping + const std::string algo = pL.get("aggregation: drop scheme"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + MT threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); + bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + // Fill + const bool lumping = pL.get("filtered matrix: use lumping"); + const bool reuseGraph = pL.get("filtered matrix: reuse graph"); + const bool reuseEigenvalue = pL.get("filtered matrix: reuse eigenvalue"); + + const bool useRootStencil = pL.get("filtered matrix: use root stencil"); + const bool useSpreadLumping = pL.get("filtered matrix: use spread lumping"); + TEUCHOS_ASSERT(!useRootStencil); + TEUCHOS_ASSERT(!useSpreadLumping); + + if (algo == "classical") { + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else if (algo == "distance laplacian") { + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + // FIXME: Non-Kokkos implementation does this, but this seems unnecessary. + if (algo == "distance laplacian") + aggregationMayCreateDirichlet = true; + + ////////////////////////////////////////////////////////////////////// + // We perform four sweeps over the rows of A: + // Pass 1: detection of boundary nodes + // Pass 2: diagonal extraction + // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix + // Pass 4: fill of the filtered matrix + // + // Pass 1 and 3 apply a sequence of criteria to each row of the matrix. + + // TODO: We could merge pass 1 and 2. + + auto crsA = rcp_dynamic_cast(A, true)->getCrsMatrix(); + auto lclA = crsA->getLocalMatrixDevice(); + auto range = range_type(0, numNodes); + + ////////////////////////////////////////////////////////////////////// + // Pass 1: Detect boundary nodes + // + // The following criteria are available: + // - BoundaryDetection::VectorDirichletFunctor + // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries + + // Dirichlet nodes + auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false + { + SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel); - // No need to reuseFill, just modify in place - valsAux = filteredA->getLocalMatrixDevice().values; +#define runBoundaryFunctor(...) \ + { \ + auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \ + Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \ + } + if (useGreedyDirichlet) { + auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + runBoundaryFunctor(dirichlet_detection); } else { - // Need an extra array to compress - valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); + auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + runBoundaryFunctor(dirichlet_detection); } +#undef runBoundaryFunctor + } + // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true. + // Otherwise we're now done with it now. + + ////////////////////////////////////////////////////////////////////// + // Pass 2 & 3: Diagonal extraction and determine dropping and construct + // rowptr of filtered matrix + // + // The following criteria are available: + // - Misc::VectorDropBoundaryFunctor + // Drop all rows that have been marked as Dirichlet + // - Misc::DropOffRankFunctor + // Drop all entries that are off-rank + // - ClassicalDropping::AbsDropFunctor + // Classical dropping + // - ClassicalDropping::SignedClassicalRSDropFunctor + // Classical RS dropping + // - ClassicalDropping::SignedClassicalSADropFunctor + // Classical signed SA dropping + // - DistanceLaplacian::DropFunctor + // Distance Laplacian dropping + // - Misc::KeepDiagonalFunctor + // Mark diagonal as KEEP + // - Misc::MarkSingletonFunctor + // Mark singletons after dropping as Dirichlet + + // rowptr of filtered A + auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1); + auto graph_rowptr = rowptr_type("rowptr", numNodes + 1); + // Number of nonzeros of filtered A and graph + Kokkos::pair nnz = {0, 0}; + + // dropping decisions for each entry + auto results = Kokkos::View("results", lclA.nnz()); // initialized to UNDECIDED + { + SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel); - LO nnzFA = 0; - { - if (algo == "classical") { - // Construct overlapped matrix diagonal - RCP ghostedDiag; - { - kokkosMatrix = local_matrix_type(); - SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); - ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); - kokkosMatrix = A->getLocalMatrixDevice(); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); + std::string functorLabel = "MueLu::CoalesceDrop::CountEntries"; - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), - scalarFunctor, nnzFA); - } +#if !defined(HAVE_MUELU_DEBUG) +#define runCountingFunctor(...) \ + { \ + auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \ + } +#else +#define runCountingFunctor(...) \ + { \ + auto debug = Misc::DebugFunctor(lclA, results); \ + auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \ + } +#endif - } else if (algo == "distance laplacian") { - typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> doubleMultiVector; - auto coords = Get>(currentLevel, "Coordinates"); + auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results); - auto uniqueMap = A->getRowMap(); - auto nonUniqueMap = A->getColMap(); + if (threshold != zero) { + auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results); + auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results); - // Construct ghosted coordinates - RCP importer; - { - SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + if (algo == "classical") { + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::AbsDropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(classical_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(classical_dropping, + // drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + TEUCHOS_ASSERT(false); + } else if (classicalAlgoStr == "scaled cut") { + TEUCHOS_ASSERT(false); + } else if (classicalAlgoStr == "scaled cut symmetric") { + TEUCHOS_ASSERT(false); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); } - RCP ghostedCoords; - { - SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, coords->getNumVectors()); - ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); + } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { + auto signed_classical_rs_dropping = ClassicalDropping::SignedClassicalRSDropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(signed_classical_rs_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + runCountingFunctor(signed_classical_rs_dropping, + // drop_boundaries, + preserve_diagonals); } - - auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); - - // Construct Laplacian diagonal - RCP localLaplDiag; - { - SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); - - localLaplDiag = VectorFactory::Build(uniqueMap); - - auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); - auto kokkosGraph = kokkosMatrix.graph; - - Kokkos::parallel_for( - "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0, numRows), - KOKKOS_LAMBDA(const LO row) { - auto rowView = kokkosGraph.rowConst(row); - auto length = rowView.length; - - impl_Scalar d = impl_ATS::zero(); - for (decltype(length) colID = 0; colID < length; colID++) { - auto col = rowView(colID); - if (row != col) - d += impl_ATS::one() / distFunctor.distance2(row, col); - } - localLaplDiagView(row, 0) = d; - }); + } else if (algo == "signed classical sa") { + auto signed_classical_sa_dropping = ClassicalDropping::SignedClassicalSADropFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(signed_classical_sa_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + runCountingFunctor(signed_classical_sa_dropping, + // drop_boundaries, + preserve_diagonals); } - - // Construct ghosted Laplacian diagonal - RCP ghostedLaplDiag; - { - SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); - ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor - dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), - scalarFunctor, nnzFA); + } else if (algo == "distance laplacian") { + using doubleMultiVector = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; + auto coords = Get>(currentLevel, "Coordinates"); + + auto dist2 = DistanceLaplacian::DistanceFunctor(*A, coords); + + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + runCountingFunctor(dist_laplacian_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + runCountingFunctor(dist_laplacian_dropping, + // drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + TEUCHOS_ASSERT(false); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + TEUCHOS_ASSERT(false); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + TEUCHOS_ASSERT(false); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); } + } else { + TEUCHOS_ASSERT(false); } + } else { + Kokkos::deep_copy(results, KEEP); + // runCountingFunctor(drop_boundaries); + auto no_op = Misc::NoOpFunctor(); + runCountingFunctor(no_op); } - numDropped = nnzA - nnzFA; +#undef runCountingFunctor + } + LocalOrdinal nnz_filtered = nnz.first; + LocalOrdinal nnz_graph = nnz.second; + GO numTotal = lclA.nnz(); + GO numDropped = numTotal - nnz_filtered; + // We now know the number of entries of filtered A and have the final rowptr. - { - SubFactoryMonitor m2(*this, "CompressRows", currentLevel); - - // parallel_scan (exclusive) - Kokkos::parallel_scan( - "MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0, numRows + 1), - KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { - update += rows(i); - if (final_pass) - rows(i) = update; - }); - } + ////////////////////////////////////////////////////////////////////// + // Pass 4: Create local matrix for filtered A + // + // Dropped entries are optionally lumped to the diagonal. + + RCP filteredA; + RCP graph; + { + SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel); - // Compress cols (and optionally vals) - // We use a trick here: we moved all remaining elements to the beginning - // of the original row in the main loop, so we don't need to check for - // INVALID here, and just stop when achieving the new number of elements - // per row. - cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); - vals_type vals; + local_matrix_type lclFilteredA; if (reuseGraph) { - GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; - // Only compress cols - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - Kokkos::parallel_for( - "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), - KOKKOS_LAMBDA(const LO i) { - // Is there Kokkos memcpy? - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i + 1) - rows(i); - for (size_t j = 0; j < rownnz; j++) - cols(rowStart + j) = colsAux(rowAStart + j); - }); + lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols()); } else { - // Compress cols and vals - GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); - - Kokkos::parallel_for( - "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), - KOKKOS_LAMBDA(const LO i) { - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i + 1) - rows(i); - for (size_t j = 0; j < rownnz; j++) { - cols(rowStart + j) = colsAux(rowAStart + j); - vals(rowStart + j) = valsAux(rowAStart + j); - } - }); + auto colidx = entries_type("entries", nnz_filtered); + auto values = values_type("values", nnz_filtered); + lclFilteredA = local_matrix_type("filteredA", + lclA.numRows(), lclA.numCols(), + nnz_filtered, + values, filtered_rowptr, colidx); } - kokkos_graph_type kokkosGraph(cols, rows); - + local_graph_type lclGraph; { - SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); - - graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); + auto colidx = entries_type("entries", nnz_graph); + lclGraph = local_graph_type(colidx, graph_rowptr); } - numTotal = A->getLocalNumEntries(); - - dofsPerNode = 1; - - if (!reuseGraph) { - SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - - local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); - auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), - A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); - filteredA = rcp(new CrsMatrixWrap(filteredACrs)); + if (lumping) { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor); + } + } else { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor); + } } - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + filteredA = Xpetra::MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()); + filteredA->SetFixedBlockSize(blkSize); - if (pL.get("filtered matrix: reuse eigenvalue")) { + if (reuseEigenvalue) { // Reuse max eigenvalue from A // It is unclear what eigenvalue is the best for the smoothing, but we already may have // the D^{-1}A estimate in A, may as well use it. @@ -759,144 +1070,18 @@ void CoalesceDropFactory_kokkos:: filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); } - } else if (blkSize > 1 && threshold == zero) { - // Case 3: block problem without filtering - // - // FIXME_KOKKOS: this code is completely unoptimized. It really should do - // a very simple thing: merge rows and produce nodal graph. But the code - // seems very complicated. Can we do better? - - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); - - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - // build a node row map (uniqueMap = non-overlapping) and a node column map - // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation - // stored in the AmalgamationInfo class container contain the local node id - // given a local dof id. The data is calculated in the AmalgamationFactory and - // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) - const RCP uniqueMap = amalInfo->getNodeRowMap(); - const RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? - Array colTranslationArray = *(amalInfo->getColTranslation()); - - Kokkos::View - rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); - Kokkos::View - colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); - - // get number of local nodes - LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); - typedef typename Kokkos::View id_translation_type; - id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); - id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); - Kokkos::deep_copy(rowTranslation, rowTranslationView); - Kokkos::deep_copy(colTranslation, colTranslationView); - - // extract striding information - blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map - LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) - if (A->IsView("stridedMaps") == true) { - const RCP myMap = A->getRowMap("stridedMaps"); - const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } - auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - - // - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type row_map_type; - // typedef typename row_map_type::HostMirror row_map_type_h; - typedef typename kokkos_graph_type::entries_type entries_type; - - // Stage 1c: get number of dof-nonzeros per blkSize node rows - typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); - LO numDofCols = 0; - CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0, numNodes), stage1aFunctor, numDofCols); - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanFunctor); - - // Detect and record dof rows that correspond to Dirichlet boundary conditions - boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - typename entries_type::non_const_type dofcols("dofcols", numDofCols /*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess - - // we have dofcols and dofids from Stage1dVectorFunctor - LO numNodeCols = 0; - typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); - typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); - - CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1bcFunctor, numNodeCols); - - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanNodeFunctor); - - // create column node view - typename entries_type::non_const_type cols("nodecols", numNodeCols); - - CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1dFunctor); - kokkos_graph_type kokkosGraph(cols, rows); - - // create LW graph - graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - - boundaryNodes = bndNodes; + graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); graph->SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); - - dofsPerNode = blkSize; - - filteredA = A; - - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - - Kokkos::parallel_reduce( - "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), - KOKKOS_LAMBDA(const LO i, GO& n) { - if (boundaryNodes(i)) - n++; - }, - numLocalBoundaryNodes); - - auto comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - - if ((GetVerbLevel() & Statistics1) && threshold != zero) { - auto comm = A->getRowMap()->getComm(); - - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - - if (numGlobalTotal != 0) { - GetOStream(Statistics1) << "Number of dropped entries: " - << numGlobalDropped << "/" << numGlobalTotal - << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; - } - } + LO dofsPerNode = blkSize; Set(currentLevel, "DofsPerNode", dofsPerNode); Set(currentLevel, "Graph", graph); Set(currentLevel, "A", filteredA); + + return std::make_tuple(numDropped, boundaryNodes); } + } // namespace MueLu #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp new file mode 100644 index 000000000000..e5068974586c --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp @@ -0,0 +1,515 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_CUTDROP_HPP +#define MUELU_CUTDROP_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "MueLu_DroppingCommon.hpp" +#include "MueLu_Utilities.hpp" +#include "Xpetra_Matrix.hpp" +#include "Xpetra_MultiVector.hpp" +#include "MueLu_DistanceLaplacianDropping.hpp" + +namespace MueLu::CutDrop { + +enum decisionAlgoType { defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric }; + +template +class UnscaledComparison { + public: + using matrix_type = Xpetra::Matrix; + + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + public: + UnscaledComparison(matrix_type& A_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) {} + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const local_ordinal_type offset; + const results_view results; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + return ATS::magnitude(A.values(offset + x) * A.values(offset + x)); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, rlid, results); + } +}; + +template +class ScaledComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + + public: + ScaledComparison(matrix_type& A_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) { + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto x_aij = ATS::magnitude(A.values(offset + x) * A.values(offset + x)); + auto x_aiiajj = ATS::magnitude(diag(rlid) * diag(A.graph.entries(offset + x))); + return (x_aij / x_aiiajj); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, rlid, results); + } +}; + +template +class UnscaledDistanceLaplacianComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + DistanceFunctorType dist2; + + public: + UnscaledDistanceLaplacianComparison(matrix_type& A_, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) + , dist2(dist2_) { + // Construct ghosted distance Laplacian diagonal + diagVec = DistanceLaplacian::getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const DistanceFunctorType2* dist2; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + const scalar_type one = ATS::one(); + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const DistanceFunctorType2* dist2_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , dist2(dist2_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto clid = A.graph.entries(offset + x); + scalar_type val; + if (rlid != clid) { + val = one / dist2->distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + return aij2; + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, &dist2, rlid, results); + } +}; + +template +class ScaledDistanceLaplacianComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + DistanceFunctorType dist2; + + public: + ScaledDistanceLaplacianComparison(matrix_type& A_, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) + , dist2(dist2_) { + // Construct ghosted distance Laplacian diagonal + diagVec = DistanceLaplacian::getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const DistanceFunctorType2* dist2; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + const scalar_type one = ATS::one(); + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const DistanceFunctorType2* dist2_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , dist2(dist2_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto clid = A.graph.entries(offset + x); + scalar_type val; + if (rlid != clid) { + val = one / dist2->distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + return (aij2 / aiiajj); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, &dist2, rlid, results); + } +}; + +template +KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { + auto N = v.extent(0); + size_t start = N / 2; + size_t end = N; + while (end > 1) { + if (start > 0) + start = start - 1; + else { + end = end - 1; + auto temp = v(0); + v(0) = v(end); + v(end) = temp; + } + size_t root = start; + while (2 * root + 1 < end) { + size_t child = 2 * root + 1; + if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) + ++child; + + if (comparator(v(root), v(child))) { + auto temp = v(root); + v(root) = v(child); + v(child) = temp; + root = child; + } else + break; + } + } +} + +template +class CutDropFunctor { + private: + using local_matrix_type = typename comparison_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + comparison_type comparison; + magnitudeType eps; + results_view results; + Kokkos::View index; + + public: + CutDropFunctor(comparison_type& comparison_, magnitudeType threshold) + : A(comparison_.A) + , comparison(comparison_) + , eps(threshold) + , results(comparison_.results) { + index = Kokkos::View("indices", A.nnz()); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type& rlid) const { + auto row = A.rowConst(rlid); + size_t nnz = row.length; + + auto drop_view = Kokkos::subview(results, Kokkos::make_pair(A.graph.row_map(rlid), A.graph.row_map(rlid + 1))); + auto row_permutation = Kokkos::subview(index, Kokkos::make_pair(A.graph.row_map(rlid), A.graph.row_map(rlid + 1))); + + auto comparator = comparison.getComparator(rlid); + + for (size_t i = 0; i < nnz; ++i) { + row_permutation(i) = i; + } + serialHeapSort(row_permutation, comparator); + + size_t keepStart = 0; + size_t dropStart = nnz; + // find index where dropping starts + for (size_t i = 1; i < nnz; ++i) { + auto const& x = row_permutation(i - 1); + auto const& y = row_permutation(i); + if ((drop_view(x) != UNDECIDED) && (drop_view(y) == UNDECIDED)) + keepStart = i; + if ((drop_view(x) != UNDECIDED) || (drop_view(y) != UNDECIDED)) + continue; + magnitudeType x_aij = comparator.get_value(x); + magnitudeType y_aij = comparator.get_value(y); + if (eps * eps * x_aij > y_aij) { + if (i < dropStart) { + dropStart = i; + } + } + } + + // drop everything to the right of where values stop passing threshold + for (size_t i = keepStart; i < nnz; ++i) { + drop_view(row_permutation(i)) = Kokkos::max(dropStart <= i ? DROP : KEEP, drop_view(row_permutation(i))); + } + } +}; + +} // namespace MueLu::CutDrop + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp new file mode 100644 index 000000000000..454a177956eb --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp @@ -0,0 +1,178 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_DISTANCELAPLACIANDROPPING_HPP +#define MUELU_DISTANCELAPLACIANDROPPING_HPP + +#include "MueLu_DroppingCommon.hpp" +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Teuchos_RCP.hpp" +#include "Xpetra_Matrix.hpp" +#include "Xpetra_MultiVector.hpp" +#include "Xpetra_MultiVectorFactory.hpp" + +namespace MueLu::DistanceLaplacian { + +template +class DistanceFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = LocalOrdinal; + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using magATS = Kokkos::ArithTraits; + using coords_type = Xpetra::MultiVector; + using local_coords_type = typename coords_type::dual_view_type_const::t_dev; + + Teuchos::RCP coordsMV; + Teuchos::RCP ghostedCoordsMV; + + local_coords_type coords; + local_coords_type ghostedCoords; + + public: + DistanceFunctor(matrix_type& A, Teuchos::RCP& coords_) { + coordsMV = coords_; + auto importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + ghostedCoordsMV = Xpetra::MultiVectorFactory::Build(importer->getTargetMap(), coordsMV->getNumVectors()); + ghostedCoordsMV->doImport(*coordsMV, *importer, Xpetra::INSERT); + coords = coordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + ghostedCoords = ghostedCoordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + } else { + coords = coordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + ghostedCoords = coords; + } + } + + KOKKOS_FORCEINLINE_FUNCTION + magnitudeType distance2(const local_ordinal_type row, const local_ordinal_type col) const { + magnitudeType d = magATS::zero(); + magnitudeType s; + for (size_t j = 0; j < coords.extent(1); ++j) { + s = coords(row, j) - ghostedCoords(col, j); + d += s * s; + } + return d; + } +}; + +template +Teuchos::RCP > +getDiagonal(Xpetra::Matrix& A, + DistanceFunctorType& distFunctor) { + using scalar_type = Scalar; + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using node_type = Node; + using ATS = Kokkos::ArithTraits; + using impl_scalar_type = typename ATS::val_type; + using implATS = Kokkos::ArithTraits; + using magnitudeType = typename implATS::magnitudeType; + using execution_space = typename Node::execution_space; + using range_type = Kokkos::RangePolicy; + + auto diag = Xpetra::MultiVectorFactory::Build(A.getRowMap(), 1); + { + auto lclA = A.getLocalMatrixDevice(); + auto lclDiag = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + + Kokkos::parallel_for( + "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", + range_type(0, lclA.numRows()), + KOKKOS_LAMBDA(const local_ordinal_type& row) { + auto rowView = lclA.rowConst(row); + auto length = rowView.length; + + magnitudeType d; + impl_scalar_type d2 = implATS::zero(); + for (local_ordinal_type colID = 0; colID < length; colID++) { + auto col = rowView.colidx(colID); + if (row != col) { + d = distFunctor.distance2(row, col); + d2 += implATS::one() / d; + } + } + lclDiag(row, 0) = d2; + }); + } + auto importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + auto ghostedDiag = Xpetra::MultiVectorFactory::Build(A.getColMap(), 1); + ghostedDiag->doImport(*diag, *importer, Xpetra::INSERT); + return ghostedDiag; + } else { + return diag; + } +} + +template +class DropFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + magnitudeType eps; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + DistanceFunctorType dist2; + results_view results; + const scalar_type one = ATS::one(); + + public: + DropFunctor(matrix_type& A_, magnitudeType threshold, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , dist2(dist2_) + , results(results_) { + diagVec = getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + scalar_type val; + if (rlid != clid) { + val = one / dist2.distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +} // namespace MueLu::DistanceLaplacian + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp new file mode 100644 index 000000000000..deba71e88665 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -0,0 +1,369 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_DROPPINGCOMMON_HPP +#define MUELU_DROPPINGCOMMON_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Xpetra_Access.hpp" +#include "Xpetra_Matrix.hpp" + +namespace MueLu { + +enum DecisionType { + UNDECIDED = 0, // no decision has been taken yet, used for initialization + KEEP = 1, // keeep the entry + DROP = 2, // drop it + BOUNDARY = 3 // entry is a boundary +}; + +namespace Misc { + +template +class NoOpFunctor { + public: + NoOpFunctor() {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + } +}; + +template +class PointwiseDropBoundaryFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + PointwiseDropBoundaryFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + const bool isBoundaryRow = boundaryNodes(rlid); + if (isBoundaryRow) { + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + results(offset + k) = Kokkos::max(rlid == clid ? KEEP : DROP, + results(offset + k)); + } + } + } +}; + +template +class VectorDropBoundaryFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using boundary_nodes_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + block_indices_view_type point_to_block; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + VectorDropBoundaryFunctor(local_matrix_type& A_, block_indices_view_type point_to_block_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , point_to_block(point_to_block_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + const bool isBoundaryRow = boundaryNodes(point_to_block(rlid)); + if (isBoundaryRow) { + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + results(offset + k) = Kokkos::max(rlid == clid ? KEEP : DROP, + results(offset + k)); + } + } + } +}; + +template +class KeepDiagonalFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + KeepDiagonalFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((rlid == clid) && (results(offset + k) != BOUNDARY)) { + results(offset + k) = KEEP; + break; + } + } + } +}; + +template +class DropOffRankFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + DropOffRankFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (clid >= A.numRows()) { + results(offset + k) = Kokkos::max(DROP, results(offset + k)); + } + } + } +}; + +template +class MarkSingletonFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + MarkSingletonFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((results(offset + k) == KEEP) && (rlid != clid)) + return; + } + boundaryNodes(rlid) = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (rlid == clid) + results(offset + k) = KEEP; + else + results(offset + k) = BOUNDARY; + } + } +}; + +template +class MarkSingletonVectorFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + block_indices_view_type point_to_block; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + MarkSingletonVectorFunctor(local_matrix_type& A_, block_indices_view_type point_to_block_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , point_to_block(point_to_block_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((results(offset + k) == KEEP) && (rlid != clid)) + return; + } + auto brlid = point_to_block(rlid); + boundaryNodes(brlid) = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (rlid == clid) + results(offset + k) = KEEP; + else + results(offset + k) = BOUNDARY; + } + } +}; + +template +class BlockDiagonalizeFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using block_indices_type = Xpetra::MultiVector; + using local_block_indices_view_type = typename block_indices_type::dual_view_type_const::t_dev; + + local_matrix_type A; + local_block_indices_view_type point_to_block; + local_block_indices_view_type ghosted_point_to_block; + results_view results; + + public: + BlockDiagonalizeFunctor(matrix_type& A_, block_indices_type& point_to_block_, block_indices_type& ghosted_point_to_block_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , point_to_block(point_to_block_.getDeviceLocalView(Xpetra::Access::ReadOnly)) + , ghosted_point_to_block(ghosted_point_to_block_.getDeviceLocalView(Xpetra::Access::ReadOnly)) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (point_to_block(rlid, 0) == ghosted_point_to_block(clid, 0)) { + results(offset + k) = Kokkos::max(KEEP, results(offset + k)); + } else { + results(offset + k) = Kokkos::max(DROP, results(offset + k)); + } + } + } +}; + +template +class DebugFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + DebugFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + if (results(offset + k) == UNDECIDED) { + Kokkos::printf("No dropping decision was taken for entry (%d, %d)\n", rlid, row.colidx(k)); + assert(false); + } + } + } +}; + +template +class SymmetrizeFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + SymmetrizeFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + if (results(offset + k) == KEEP) { + auto clid = row.colidx(k); + if (clid >= A.numRows()) + continue; + auto row2 = A.rowConst(clid); + const size_t offset2 = A.graph.row_map(clid); + for (local_ordinal_type k2 = 0; k2 < row2.length; ++k2) { + auto clid2 = row2.colidx(k2); + if (clid2 == rlid) { + if (results(offset2 + k2) == DROP) + results(offset2 + k2) = KEEP; + break; + } + } + } + } + } +}; + +} // namespace Misc + +} // namespace MueLu + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp new file mode 100644 index 000000000000..cee43f651dfb --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -0,0 +1,1029 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_MATRIXCONSTRUCTION_HPP +#define MUELU_MATRIXCONSTRUCTION_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" + +#include "MueLu_DroppingCommon.hpp" + +#ifdef MUELU_COALESCE_DROP_DEBUG +// For demangling function names +#include +#endif + +namespace MueLu::MatrixConstruction { + +template +class PointwiseCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + + local_matrix_type A; + results_view results; + rowptr_type rowptr; + functor_type_0 functor0; + functor_type_1 functor1; + functor_type_2 functor2; + functor_type_3 functor3; + functor_type_4 functor4; + functor_type_5 functor5; + functor_type_6 functor6; + functor_type_7 functor7; + functor_type_8 functor8; + functor_type_9 functor9; + + std::vector functorNames; + + void init() { +#ifdef MUELU_COALESCE_DROP_DEBUG + functorNames = std::vector(); + +#define DEBUG_PREP(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + std::string functorName = typeid(decltype(functor)).name(); \ + int status = 0; \ + char* demangledFunctorName = 0; \ + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); \ + functorNames.push_back(demangledFunctorName); \ + } \ + } + DEBUG_PREP(functor0, 0); + DEBUG_PREP(functor1, 1); + DEBUG_PREP(functor2, 2); + DEBUG_PREP(functor3, 3); + DEBUG_PREP(functor4, 4); + DEBUG_PREP(functor5, 5); + DEBUG_PREP(functor6, 6); + DEBUG_PREP(functor7, 7); + DEBUG_PREP(functor8, 8); + DEBUG_PREP(functor9, 9); +#undef DEBUG_PREP +#endif + } + + public: + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(0) + , functor1(0) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(0) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(0) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(functor8_) + , functor9(0) { + init(); + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_, functor_type_9& functor9_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(functor8_) + , functor9(functor9_) { + init(); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const { +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + +#ifdef MUELU_COALESCE_DROP_DEBUG +#define APPLY_FUNCTOR(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + functor(rlid); \ + { \ + Kokkos::printf("%s\n", functorNames[functor_number].c_str()); \ + \ + auto row = A.rowConst(rlid); \ + const size_t offset = A.graph.row_map(rlid); \ + \ + Kokkos::printf("decisions: "); \ + for (local_ordinal_type k = 0; k < row.length; ++k) { \ + Kokkos::printf("%5d ", results(offset + k)); \ + } \ + Kokkos::printf("\n"); \ + } \ + } \ + } +#else +#define APPLY_FUNCTOR(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + functor(rlid); \ + } \ + } +#endif + APPLY_FUNCTOR(functor0, 0); + APPLY_FUNCTOR(functor1, 1); + APPLY_FUNCTOR(functor2, 2); + APPLY_FUNCTOR(functor3, 3); + APPLY_FUNCTOR(functor4, 4); + APPLY_FUNCTOR(functor5, 5); + APPLY_FUNCTOR(functor6, 6); + APPLY_FUNCTOR(functor7, 7); + APPLY_FUNCTOR(functor8, 8); + APPLY_FUNCTOR(functor9, 9); + +#undef APPLY_FUNCTOR + +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("Done with row %d\n", rlid); + } +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++nnz; + } + } + if (final) + rowptr(rlid + 1) = nnz; + } +}; + +template +class PointwiseFillReuseFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + results_view results; + local_matrix_type filteredA; + local_graph_type graph; + const scalar_type zero = ATS::zero(); + + public: + PointwiseFillReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) + : A(A_) + , results(results_) + , filteredA(filteredA_) + , graph(graph_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto rowA = A.row(rlid); + size_t K = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + local_ordinal_type jj = 0; + local_ordinal_type graph_offset = graph.row_map(rlid); + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(K + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + graph.entries(graph_offset + jj) = rowA.colidx(k); + ++jj; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } else { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } +}; + +template +class PointwiseFillNoReuseFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + results_view results; + local_matrix_type filteredA; + const scalar_type zero = ATS::zero(); + + public: + PointwiseFillNoReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_) + : A(A_) + , results(results_) + , filteredA(filteredA_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto rowA = A.row(rlid); + size_t K = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(K + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } +}; + +template +class VectorCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + rowptr_type filtered_rowptr; + rowptr_type graph_rowptr; + + functor_type_0 functor0; + functor_type_1 functor1; + functor_type_2 functor2; + functor_type_3 functor3; + functor_type_4 functor4; + functor_type_5 functor5; + functor_type_6 functor6; + functor_type_7 functor7; + functor_type_8 functor8; + functor_type_9 functor9; + + std::vector functorNames; + + void init() { +#ifdef MUELU_COALESCE_DROP_DEBUG + functorNames = std::vector(); + +#define DEBUG_PREP(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + std::string functorName = typeid(decltype(functor)).name(); \ + int status = 0; \ + char* demangledFunctorName = 0; \ + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); \ + functorNames.push_back(demangledFunctorName); \ + } \ + } + DEBUG_PREP(functor0, 0); + DEBUG_PREP(functor1, 1); + DEBUG_PREP(functor2, 2); + DEBUG_PREP(functor3, 3); + DEBUG_PREP(functor4, 4); + DEBUG_PREP(functor5, 5); + DEBUG_PREP(functor6, 6); + DEBUG_PREP(functor7, 7); + DEBUG_PREP(functor8, 8); + DEBUG_PREP(functor9, 9); +#undef DEBUG_PREP +#endif + } + + public: + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(0) + , functor1(0) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(0) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(0) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(0) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(0) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(0) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(0) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(0) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(0) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(functor8_) + , functor9(0) { + init(); + } + + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, + functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, + functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_, functor_type_9& functor9_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor0(functor0_) + , functor1(functor1_) + , functor2(functor2_) + , functor3(functor3_) + , functor4(functor4_) + , functor5(functor5_) + , functor6(functor6_) + , functor7(functor7_) + , functor8(functor8_) + , functor9(functor9_) { + init(); + } + + KOKKOS_INLINE_FUNCTION + void join(Kokkos::pair& dest, const Kokkos::pair& src) const { + dest.first += src.first; + dest.second += src.second; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid, Kokkos::pair& nnz, const bool& final) const { + auto nnz_filtered = &nnz.first; + auto nnz_graph = &nnz.second; +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\nStarting on block row %d\n", brlid); +#endif + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + +#ifdef MUELU_COALESCE_DROP_DEBUG +#define APPLY_FUNCTOR(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + functor(rlid); \ + { \ + Kokkos::printf("%s\n", functorNames[functor_number].c_str()); \ + \ + auto row = A.rowConst(rlid); \ + const size_t offset = A.graph.row_map(rlid); \ + \ + Kokkos::printf("decisions: "); \ + for (local_ordinal_type k = 0; k < row.length; ++k) { \ + Kokkos::printf("%5d ", results(offset + k)); \ + } \ + Kokkos::printf("\n"); \ + } \ + } \ + } +#else +#define APPLY_FUNCTOR(functor, functor_number) \ + { \ + if constexpr (!std::is_same_v) { \ + functor(rlid); \ + } \ + } +#endif + + APPLY_FUNCTOR(functor0, 0); + APPLY_FUNCTOR(functor1, 1); + APPLY_FUNCTOR(functor2, 2); + APPLY_FUNCTOR(functor3, 3); + APPLY_FUNCTOR(functor4, 4); + APPLY_FUNCTOR(functor5, 5); + APPLY_FUNCTOR(functor6, 6); + APPLY_FUNCTOR(functor7, 7); + APPLY_FUNCTOR(functor8, 8); + APPLY_FUNCTOR(functor9, 9); + +#undef APPLY_FUNCTOR + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with row %d\n", rlid); +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++(*nnz_filtered); + } + } + if (final) + filtered_rowptr(rlid + 1) = *nnz_filtered; + } + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with block row %d\nGraph indices ", brlid); +#endif + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = ATS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + ++(*nnz_graph); +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("%5d ", bclid); +#endif + prev_bclid = bclid; + } + } + } +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\n"); +#endif + if (final) + graph_rowptr(brlid + 1) = *nnz_graph; + } +}; + +template +class VectorFillFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + using OTS = Kokkos::ArithTraits; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + local_matrix_type filteredA; + local_graph_type graph; + const scalar_type zero = ATS::zero(); + + public: + VectorFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filteredA(filteredA_) + , graph(graph_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid) const { + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { + auto rowA = A.row(rlid); + size_t K = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(K + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + if constexpr (reuse) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } else if constexpr (reuse) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + + local_ordinal_type j = graph.row_map(brlid); + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = OTS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + graph.entries(j) = bclid; + ++j; + prev_bclid = bclid; + } + } + } + } +}; + +} // namespace MueLu::MatrixConstruction + +#endif diff --git a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp index c856c755ec50..b58389aed22f 100644 --- a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp @@ -919,6 +919,16 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, 0.0, 2.0, reuseGraph)); expectedBoundaryNodesVector.push_back({true, true}); + + // test case 9 + Teuchos::ParameterList params9 = Teuchos::ParameterList(params0); + params9.set("aggregation: drop scheme", "classical"); + params9.set("aggregation: classical algo", "unscaled cut"); + params9.set("aggregation: drop tol", 3.6); + params.push_back(params9); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); } for (size_t testNo = 0; testNo < params.size(); ++testNo) { From aefc932e15d64e3c199fbfb3353cfc5ebcadf1d9 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Thu, 5 Sep 2024 09:47:52 -0600 Subject: [PATCH 08/50] MueLu: Extend CoalesceDropFactory_kokkos unit tests Signed-off-by: Christian Glusa --- .../CoalesceDropFactory_kokkos.cpp | 1248 ++++++++++++++++- .../MueLu_TestHelpers_kokkos.hpp | 58 +- 2 files changed, 1297 insertions(+), 9 deletions(-) diff --git a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp index b58389aed22f..e4d206f2fdeb 100644 --- a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp @@ -13,12 +13,15 @@ #include "MueLu_TestHelpers_kokkos.hpp" #include "MueLu_Version.hpp" +#include #include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_FilteredAFactory.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_AmalgamationFactory.hpp" #include "MueLu_LWGraph_kokkos.hpp" +#include + namespace MueLuTests { TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Constructor, Scalar, LocalOrdinal, GlobalOrdinal, Node) { @@ -33,6 +36,1215 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Constructor, Scala out << *coalesceDropFact << std::endl; } +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Build, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); +} // Build + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 40); + +} // DistanceLaplacian + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (comm->getRank() == 0) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // DistanceLaplacianScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianUnscaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (!comm->getRank()) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // DistanceLaplacianUnscaleCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianCutSym, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (!comm->getRank()) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.5)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut symmetric"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 106); + +} // DistanceLaplacianCutScaled + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // ClassicalScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalUnScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // ClassicalUnScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalCutSym, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut symmetric"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 106); + +} // ClassicalCutSym + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] *= 2; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // A_10 = -2 + // A_ij = -1 + // A_ii = 2 + // criterion for dropping is + // -Re(L_ij) <= tol * max_{k\neq i} Re(-L_ik) + // -> We drop entry (1,2). + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // SignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedScaledCutClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + TEST_THROW(coalesceDropFact.Build(fineLevel), MueLu::Exceptions::RuntimeError); + + // RCP graph = fineLevel.Get >("Graph", &coalesceDropFact); + // LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + // TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + // const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + // const RCP myDomainMap = graph->GetDomainMap(); + + // TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myImportMap->getGlobalNumElements(),Teuchos::as(36 + (comm->getSize()-1)*2)); + + // TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myDomainMap->getGlobalNumElements(),36); + + // TEST_EQUALITY(graph->GetGlobalNumEdges(),36); + +} // SignedScaledCutClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedUnscaledCutClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + TEST_THROW(coalesceDropFact.Build(fineLevel), MueLu::Exceptions::RuntimeError); + + // RCP graph = fineLevel.Get >("Graph", &coalesceDropFact); + // LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + // TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + // const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + // const RCP myDomainMap = graph->GetDomainMap(); + + // TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myImportMap->getGlobalNumElements(),Teuchos::as(36 + (comm->getSize()-1)*2)); + + // TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myDomainMap->getGlobalNumElements(),36); + + // TEST_EQUALITY(graph->GetGlobalNumEdges(),36); + +} // SignedUnScaledCutClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalColoredSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal colored signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalColoredSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalNoColoredSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + // this test is only compatible with rank higher than 1 + if (comm->getSize() == 1) { + return; + } + + // Default is Laplace1D with nx = 8748. + // It's a nice size for 1D and perfect aggregation. (6561 = 3^8) + // Nice size for 1D and perfect aggregation on small numbers of processors. (8748 = 4*3^7) + Teuchos::CommandLineProcessor clp(false); + Galeri::Xpetra::Parameters matrixParameters(clp, 8748); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); + + RCP map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + // getCrsGraph()->getImporter() + RCP importer = ImportFactory::Build(A->getRowMap(), map); + fineLevel.Set("Importer", importer); + auto importerTest = A->getCrsGraph()->getImporter(); // NULL + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal colored signed classical"))); + coalesceDropFact.SetParameter("aggregation: coloring: localize color graph", Teuchos::ParameterEntry(false)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + // Need an importer + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalNoColoredSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonal, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal classical"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceDifferentCoordinatesLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + GO bnx = 15 * comm->getSize(); + Teuchos::ParameterList bMatrixList; + matrixList.set("bnx", bnx); + RCP B = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(bMatrixList, lib); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", B->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacianWeighted, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + std::vector weights_v{100.0, 1.0, 1.0, 1.0, 100, 1.0, 1.0, 1.0, 100.0}; + Teuchos::Array weights(weights_v); + coalesceDropFact.SetParameter("aggregation: distance laplacian directional weights", Teuchos::ParameterEntry(weights)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianWeighted, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + std::vector weights_v{100.0, 1.0, 1.0}; + Teuchos::Array weights(weights_v); + coalesceDropFact.SetParameter("aggregation: distance laplacian directional weights", Teuchos::ParameterEntry(weights)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedClassicalSA, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + + Level fineLevel; + fineLevel.Set("A", A); + + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical sa"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; @@ -363,7 +1575,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFi dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph_d = fineLevel.Get >("Graph", &dropFact); + auto graph = graph_d->copyToHost(); auto myDofsPerNode = fineLevel.Get ("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == 1, true); @@ -924,7 +2137,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local Teuchos::ParameterList params9 = Teuchos::ParameterList(params0); params9.set("aggregation: drop scheme", "classical"); params9.set("aggregation: classical algo", "unscaled cut"); - params9.set("aggregation: drop tol", 3.6); + params9.set("aggregation: drop tol", 1.0 / 3.6); params.push_back(params9); expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, -1.5, 2.0, reuseGraph)); @@ -1017,12 +2230,31 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local } } -#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) \ +#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Build, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacian, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianUnscaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianCutSym, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalUnScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalCutSym, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedScaledCutClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedUnscaledCutClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalColoredSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalNoColoredSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonal, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacian, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacianWeighted, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianWeighted, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedClassicalSA, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, 2x2, SC, LO, GO, NO) // TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithFiltering, SC, LO, GO, NO) // not implemented yet diff --git a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp index ff400f9ca3cb..9d8b51bcdfa8 100644 --- a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp +++ b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp @@ -674,7 +674,63 @@ class TestFactory { } #endif #endif -}; // class TestFactory + + // Create a matrix as specified by parameter list options + static RCP BuildBlockMatrixAsPoint(Teuchos::ParameterList& matrixList, Xpetra::UnderlyingLib lib) { + RCP > comm = TestHelpers_kokkos::Parameters::getDefaultComm(); + GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); + RCP Op; + + if (lib == Xpetra::NotSpecified) + lib = TestHelpers_kokkos::Parameters::getLib(); + + // Make the base graph + RCP old_matrix = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + RCP old_graph = old_matrix->getCrsGraph(); + RCP old_rowmap = old_graph->getRowMap(); + RCP old_colmap = old_graph->getColMap(); + int blocksize = 3; + + // Block Map + LO orig_num_rows = (LO)old_graph->getRowMap()->getLocalNumElements(); + Teuchos::Array owned_rows(blocksize * orig_num_rows); + for (LO i = 0; i < orig_num_rows; i++) { + GO old_gid = old_rowmap->getGlobalElement(i); + for (int j = 0; j < blocksize; j++) { + owned_rows[i * blocksize + j] = old_gid * blocksize + j; + } + } + RCP new_map = Xpetra::MapFactory::Build(lib, GO_INVALID, owned_rows(), 0, comm); + if (new_map.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: Map constructor failed"); + + // Block Graph / Matrix + RCP new_matrix = Xpetra::CrsMatrixFactory::Build(new_map, blocksize * old_graph->getLocalMaxNumRowEntries()); + if (new_matrix.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: Matrix constructor failed"); + for (LO i = 0; i < orig_num_rows; i++) { + Teuchos::ArrayView old_indices; + Teuchos::ArrayView old_values; + Teuchos::Array new_indices(1); + Teuchos::Array new_values(1); + old_matrix->getLocalRowView(i, old_indices, old_values); + for (int ii = 0; ii < blocksize; ii++) { + GO GRID = new_map->getGlobalElement(i * blocksize + ii); + for (LO j = 0; j < (LO)old_indices.size(); j++) { + for (int jj = 0; jj < blocksize; jj++) { + new_indices[0] = old_colmap->getGlobalElement(old_indices[j]) * blocksize + jj; + new_values[0] = old_values[j] * (SC)((ii == jj && i == old_indices[j]) ? blocksize * blocksize : 1); + new_matrix->insertGlobalValues(GRID, new_indices(), new_values); + } + } + } + } + new_matrix->fillComplete(); + Op = rcp(new CrsMatrixWrap(new_matrix)); + if (new_map.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: CrsMatrixWrap constructor failed"); + Op->SetFixedBlockSize(blocksize); + + return Op; + } // BuildBlockMatrixAsPoint() +}; // class TestFactory // Helper class which has some Tpetra specific code inside // We put this into an extra helper class as we need partial specializations and From 50541b1b205699fc3f5234ff6857d511a3e78e81 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Mon, 9 Sep 2024 15:54:38 -0600 Subject: [PATCH 09/50] MueLu: Set "use kokkos refactor" = false for BlockCrs tests Signed-off-by: Christian Glusa --- .../ParameterList/ParameterListInterpreter.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp index 2e4b8da1073f..e29fe020ea25 100644 --- a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp +++ b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp @@ -82,7 +82,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ParameterListInterpreter, BlockCrs, Scalar, Lo if (found == std::string::npos) continue; out << "Processing file: " << fileList[i] << std::endl; - ParameterListInterpreter mueluFactory("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + + Teuchos::RCP mueluList = rcp(new Teuchos::ParameterList()); + Teuchos::updateParametersFromXmlFileAndBroadcast("ParameterList/ParameterListInterpreter/" + fileList[i], mueluList.ptr(), *comm); + mueluList->set("use kokkos refactor", false); + + ParameterListInterpreter mueluFactory(*mueluList, comm); RCP H = mueluFactory.CreateHierarchy(); H->GetLevel(0)->Set("A", A); @@ -168,14 +173,18 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ParameterListInterpreter, PointCrs_vs_BlockCrs out << "Processing file: " << fileList[i] << std::endl; + Teuchos::RCP mueluList = rcp(new Teuchos::ParameterList()); + Teuchos::updateParametersFromXmlFileAndBroadcast("ParameterList/ParameterListInterpreter/" + fileList[i], mueluList.ptr(), *comm); + mueluList->set("use kokkos refactor", false); + // Point Hierarchy - ParameterListInterpreter mueluFactory1("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + ParameterListInterpreter mueluFactory1(*mueluList, comm); RCP PointH = mueluFactory1.CreateHierarchy(); PointH->GetLevel(0)->Set("A", PointA); mueluFactory1.SetupHierarchy(*PointH); // Block Hierachy - ParameterListInterpreter mueluFactory2("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + ParameterListInterpreter mueluFactory2(*mueluList, comm); RCP BlockH = mueluFactory2.CreateHierarchy(); BlockH->GetLevel(0)->Set("A", BlockA); mueluFactory2.SetupHierarchy(*BlockH); From d5771ad9d0338054d53468135f912d396846c5bf Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Mon, 9 Sep 2024 16:01:37 -0600 Subject: [PATCH 10/50] MueLu: Rebase gold files Signed-off-by: Christian Glusa --- .../interface/kokkos/Output/MLaux_tpetra.gold | 7 ++--- .../kokkos/Output/MLcoarse1_tpetra.gold | 18 +++++------ .../kokkos/Output/MLcoarse2_tpetra.gold | 30 +++++++++---------- .../kokkos/Output/MLcoarse3_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLcoarse4_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLcoarse5_tpetra.gold | 12 ++++---- .../kokkos/Output/MLpgamg1_tpetra.gold | 24 +++++++-------- .../Output/MLrepartitioning1_tpetra.gold | 30 +++++++++---------- .../Output/MLrepartitioning2_tpetra.gold | 24 +++++++-------- .../Output/MLrepartitioning3_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLsmoother1_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLsmoother2_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLsmoother3_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLsmoother4_tpetra.gold | 24 +++++++-------- .../kokkos/Output/MLunsmoothed1_tpetra.gold | 24 +++++++-------- .../kokkos/Output/aggregation1_tpetra.gold | 12 ++++---- .../kokkos/Output/aggregation3_tpetra.gold | 14 ++++----- .../kokkos/Output/aggregation4_tpetra.gold | 14 ++++----- .../kokkos/Output/coarse1_tpetra.gold | 18 +++++------ .../kokkos/Output/coarse2_tpetra.gold | 30 +++++++++---------- .../kokkos/Output/coarse3_tpetra.gold | 12 ++++---- .../kokkos/Output/default_e3d_tpetra.gold | 4 +-- .../kokkos/Output/default_mhd_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/default_mhd_tpetra.gold | 12 ++++---- .../kokkos/Output/default_p2d_tpetra.gold | 12 ++++---- .../kokkos/Output/default_p3d_tpetra.gold | 12 ++++---- .../kokkos/Output/default_pg_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/default_pg_tpetra.gold | 12 ++++---- .../Output/driver_drekar1_np4_tpetra.gold | 24 +++++++-------- .../kokkos/Output/driver_drekar1_tpetra.gold | 24 +++++++-------- .../Output/driver_drekar2_np4_tpetra.gold | 21 ++++++------- .../kokkos/Output/driver_drekar2_tpetra.gold | 21 ++++++------- .../interface/kokkos/Output/emin1_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/emin2_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/emin3_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/empty_tpetra.gold | 12 ++++---- .../Output/operator_solve_1_np1_tpetra.gold | 18 +++++------ .../Output/operator_solve_1_np4_tpetra.gold | 18 +++++------ .../Output/operator_solve_5_np1_tpetra.gold | 12 ++++---- .../Output/operator_solve_5_np4_tpetra.gold | 12 ++++---- .../Output/operator_solve_6_np1_tpetra.gold | 12 ++++---- .../Output/operator_solve_6_np4_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/pg1_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/pg2_tpetra.gold | 12 ++++---- .../Output/repartition1_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/repartition1_tpetra.gold | 12 ++++---- .../Output/repartition3_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/repartition3_tpetra.gold | 12 ++++---- .../Output/repartition4_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/repartition4_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RAP-1_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RAP-1_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RAP-2_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RAP-2_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RP-2_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-RP-2_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-S-1_np4_tpetra.gold | 24 +++++++-------- .../kokkos/Output/reuse-S-1_tpetra.gold | 24 +++++++-------- .../Output/reuse-full-1_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-full-1_tpetra.gold | 12 ++++---- .../kokkos/Output/reuse-none_np4_tpetra.gold | 24 +++++++-------- .../kokkos/Output/reuse-none_tpetra.gold | 24 +++++++-------- .../kokkos/Output/reuse-tP-1_np4_tpetra.gold | 20 +++++-------- .../kokkos/Output/reuse-tP-1_tpetra.gold | 20 +++++-------- .../kokkos/Output/reuse-tP-2_np4_tpetra.gold | 26 +++++++--------- .../kokkos/Output/reuse-tP-2_tpetra.gold | 26 +++++++--------- .../kokkos/Output/reuse-tP-3_np4_tpetra.gold | 22 +++++++------- .../kokkos/Output/reuse-tP-3_tpetra.gold | 22 +++++++------- .../kokkos/Output/smoother10_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother11_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother12_tpetra.gold | 30 +++++++++---------- .../kokkos/Output/smoother13_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother1_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother2_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother3_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother4_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother5_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother6_tpetra.gold | 12 ++++---- .../kokkos/Output/smoother9_tpetra.gold | 12 ++++---- .../interface/kokkos/Output/sync1_tpetra.gold | 12 ++++---- .../kokkos/Output/transpose1_tpetra.gold | 12 ++++---- .../kokkos/Output/transpose2_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/transpose2_tpetra.gold | 12 ++++---- .../kokkos/Output/transpose3_np4_tpetra.gold | 12 ++++---- .../kokkos/Output/transpose3_tpetra.gold | 12 ++++---- .../kokkos/Output/unsmoothed1_tpetra.gold | 12 ++++---- .../kokkos/Output/unsmoothed2_tpetra.gold | 12 ++++---- 87 files changed, 685 insertions(+), 720 deletions(-) diff --git a/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold index 0f01f3917fe2..326fbeecded2 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.01, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.01, blocksize = 1 aggregation: drop tol = 0.01 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -26,6 +23,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold index a59513adfcff..9c00a4283072 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold index 9086c4e5f506..07d8543bfbed 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -155,9 +155,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -166,6 +164,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold index d308a4bc6af2..870b0445b626 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold index 6d6f4cc04a6b..24007fde4aa4 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold index cac568f06868..1bf1ac6facd2 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold index 44b6284e32d9..522a4d33c19a 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -52,9 +52,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -63,6 +61,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -93,9 +93,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -104,6 +102,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -134,9 +134,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -145,6 +143,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold index 5073aefe576d..ac01d63b4a50 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -114,9 +114,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -125,6 +123,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -165,9 +165,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -176,6 +174,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -216,9 +216,7 @@ Level 5 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -227,6 +225,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold index 3eeaf2a6528e..6429fba26897 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -76,6 +74,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -118,9 +118,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -129,6 +127,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -171,9 +171,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -182,6 +180,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold index 69572cc64d41..816a2cbc7698 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -66,9 +66,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -77,6 +75,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -120,9 +120,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -131,6 +129,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -174,9 +174,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -185,6 +183,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold index d308a4bc6af2..870b0445b626 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold index bb50b5fdb518..969a8a8bfac9 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold index 835095475959..778384215364 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold index 3757339a77df..a0e7ad721409 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold @@ -12,9 +12,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -60,6 +58,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -86,9 +86,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -97,6 +95,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -123,9 +123,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -134,6 +132,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold index 66cce4e4b90b..4f7d5d9e2c95 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold index 1e9cdc73774e..d42eadfdc6b8 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold @@ -11,10 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = classical Build (MueLu::TentativePFactory_kokkos) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,10 +46,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = classical Build (MueLu::TentativePFactory_kokkos) @@ -60,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold index 57cf2ec15f45..26b3a933d761 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold @@ -11,10 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.05, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.05, blocksize = 1 aggregation: drop tol = 0.05 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -50,10 +49,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.05, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.05, blocksize = 1 aggregation: drop tol = 0.05 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -63,6 +59,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold index b289d035dda9..c8c2fb4e0d13 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -77,9 +77,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -87,6 +85,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold index 2226b1e34413..986986f65002 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -77,9 +77,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -87,6 +85,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -110,9 +110,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -120,6 +118,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -143,9 +143,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -153,6 +151,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold index bed828c7df82..fc02273c3c0c 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold index 591dfd5fe59e..11f55d4ccc24 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold @@ -12,7 +12,7 @@ Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) Build (MueLu::AmalgamationFactory) [empty list] -algorithm = "classical": threshold = 0, blocksize = 3 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 3 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -44,7 +44,7 @@ Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) Build (MueLu::AmalgamationFactory) [empty list] -algorithm = "classical": threshold = 0, blocksize = 3 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 3 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) diff --git a/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold index 9c279a7330a1..4ec58304e539 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold @@ -18,14 +18,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -53,14 +53,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold index 2725d881cea0..2d1901c6ce6a 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold @@ -18,14 +18,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -53,14 +53,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold index bd7153246283..e131cc4fc673 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold @@ -11,14 +11,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,14 +42,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold index f926861f5ace..d4a3486fd56e 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold @@ -11,14 +11,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,14 +42,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold index c56ac4a46dcb..b5df153ebf4f 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold @@ -15,11 +15,9 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -27,6 +25,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -97,11 +97,9 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -109,6 +107,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -154,11 +154,9 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -166,6 +164,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold index c8225b994621..3c6c53c67518 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold @@ -15,11 +15,9 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -27,6 +25,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -72,11 +72,9 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -84,6 +82,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -129,11 +129,9 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -141,6 +139,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold index e195f2265097..1294cbc43d2d 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold @@ -15,10 +15,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -29,6 +26,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -99,10 +98,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -113,6 +109,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -158,10 +156,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -172,6 +167,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold index d9166a4a3f3a..2e79c854a41b 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold @@ -15,10 +15,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -29,6 +26,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -74,10 +73,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -88,6 +84,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -133,10 +131,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -147,6 +142,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold index 87d98e6bbd71..558c631efc00 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -57,6 +55,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold index d3d700a672ee..b0a6f7083c6f 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -59,6 +57,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold index beb9069dcbef..1c426729b397 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -59,6 +57,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold index 186ca4496970..180f89d95e9a 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -45,9 +45,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -56,6 +54,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -79,9 +79,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -90,6 +88,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold index df9604a89b67..8259cc104ae8 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -45,9 +45,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -56,6 +54,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -79,9 +79,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -90,6 +88,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold index 0b51e98970e5..aed4bdcd30f8 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold @@ -22,9 +22,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -33,6 +31,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -54,9 +54,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold index baf6c048c339..eee74662e1a3 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold @@ -22,9 +22,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -33,6 +31,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -54,9 +54,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold index 49df428e7ac1..8abd2dd54f54 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold @@ -26,9 +26,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -37,6 +35,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -59,9 +59,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold index 9fb6a3101a8b..50662700a85d 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold @@ -26,9 +26,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -37,6 +35,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -59,9 +59,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold index 5cd915b67772..3993d8aa3e64 100644 --- a/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold @@ -13,14 +13,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -46,14 +46,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold index 5cd915b67772..3993d8aa3e64 100644 --- a/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold @@ -13,14 +13,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -46,14 +46,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold index bb19eb33551c..1eb2e9c6fcbb 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold index 412bfdae6761..d849a7220890 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold index 40cd4782bdd0..d01d4cca3d90 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -61,9 +61,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -71,6 +69,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold index 8848a605bf3d..0ded152ab26c 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -61,9 +61,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -71,6 +69,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold index ef7d15a75400..d58c71ed6aa0 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -75,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold index 53f8ec14732c..a34e807605c5 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -75,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold index 6bd1e19b0132..6b8e82af56eb 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold index ddb6f2cda9c7..c1825742b828 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold index 35f645ce54df..e9be96c407cb 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold index 4c6bec2de826..2e276c11f43b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold index f7b528b12da7..eb674410b28a 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold index c22abad73532..f41fd267e41c 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold index 0ce74043c745..d8594818fb07 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -148,6 +146,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -189,9 +189,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -199,6 +197,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold index 66b3846fcb50..1439c208a74b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -145,9 +145,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -155,6 +153,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -196,9 +196,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -206,6 +204,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold index 3e15b97cf0e3..4c007cccf69c 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold index df57cead5b10..3317c950e7c3 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold index c567d971cec7..353184cc8755 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -133,9 +133,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -143,6 +141,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -181,9 +181,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -191,6 +189,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold index 4898e6dcfb70..202736e00075 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -137,9 +137,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -147,6 +145,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -185,9 +185,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -195,6 +193,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold index 931773684060..6c75ad79682b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -140,9 +140,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -178,9 +176,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold index 283a59d925f1..df8c695af7ae 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -145,9 +145,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -183,9 +181,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold index 99ad1a23b744..a041df517103 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,10 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -77,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -142,10 +140,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 matrixmatrix: kernel params -> [empty list] @@ -179,10 +174,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -191,6 +183,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold index 0e209db92aa3..51e717fb85e2 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,10 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -77,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -146,10 +144,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 matrixmatrix: kernel params -> [empty list] @@ -183,10 +178,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -195,6 +187,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold index ba9828cca88e..ad14ee9d5d04 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -73,6 +71,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -176,9 +174,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -186,6 +182,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold index 41e831b05ba1..0d822dd954f3 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -73,6 +71,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -142,9 +142,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -180,9 +178,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -190,6 +186,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold index 454794b53db2..6963611ce60b 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold index 23fd32664480..4818fdac660d 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold @@ -15,9 +15,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -25,6 +23,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -52,9 +52,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -62,6 +60,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold index 04ab2a78cbab..5ff00c17a796 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -74,9 +74,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -84,6 +82,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -106,9 +106,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -116,6 +114,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -148,6 +146,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold index 69e737694215..5bd1ac2ab59a 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold @@ -13,9 +13,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -48,9 +48,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold index 170df5b0a425..cad2c1d4b2b2 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold index 68e878d15b39..fc4d2af803e1 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold index 72e5138c42a8..592fcfd8912b 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold @@ -5,9 +5,7 @@ Level 0 Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -15,6 +13,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -32,9 +32,7 @@ matrixmatrix: kernel params -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -42,6 +40,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold index 010f9135f9ac..0048ce280929 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold @@ -8,9 +8,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -18,6 +16,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -38,9 +38,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -48,6 +46,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold index 2a697355b847..572bc3e1a866 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold @@ -13,9 +13,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -48,9 +48,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold index 7bee44217696..a4d7d7de12af 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold index 37971dbc7010..b75a9ec83e86 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -55,9 +55,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold index 97aa95528902..985be2cd47c5 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -55,9 +55,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold index 823740eaa78e..d621f56e3626 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -56,9 +56,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -66,6 +64,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold index 899fdd29a9f5..6927436a6f9c 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -56,9 +56,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -66,6 +64,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold index 039847881f42..ec9647a3dbc1 100644 --- a/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold @@ -12,14 +12,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -43,14 +43,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold index 861f191ab2a4..d2d9cc6349d2 100644 --- a/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold @@ -12,14 +12,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,14 +44,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) From ab9b19d0ecdeec18be72a914fad0ea67e9f11e2e Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Thu, 12 Sep 2024 13:15:15 -0600 Subject: [PATCH 11/50] MueLu Regression: Add +1 for initilization of drop results Signed-off-by: Christian Glusa --- packages/muelu/test/unit_tests_kokkos/Regression.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/muelu/test/unit_tests_kokkos/Regression.cpp b/packages/muelu/test/unit_tests_kokkos/Regression.cpp index 46f4f88777ce..ccfbce86b07e 100644 --- a/packages/muelu/test/unit_tests_kokkos/Regression.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Regression.cpp @@ -98,12 +98,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, H2D, Scalar, LocalOrdinal, GlobalO } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 19 : 31); + size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 20 : 32); TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 31); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 32); } #endif // KOKKOS_HAS_SHARED_SPACE @@ -175,12 +175,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregation, Scalar, LocalOrdinal, } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = std::is_same_v ? 17 : 16; + size_t targetNumDeepCopies = std::is_same_v ? 18 : 17; TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 16); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 17); } #endif From 080afbdcb906aa22a0617ffa7d19029f96fa20b8 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Thu, 10 Oct 2024 16:12:17 -0600 Subject: [PATCH 12/50] Xpetra: Fix return types in TpetraOperator Signed-off-by: Christian Glusa --- packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp | 4 ++-- packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp b/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp index 411c33cb176f..ab75af2e267e 100644 --- a/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp +++ b/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp @@ -151,13 +151,13 @@ class EpetraInverseOperator : public Operator > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { XPETRA_MONITOR("EpetraOperator::getDomainMap()"); return toXpetra(op_->OperatorDomainMap()); } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { XPETRA_MONITOR("EpetraOperator::getRangeMap()"); return toXpetra(op_->OperatorRangeMap()); } diff --git a/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp b/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp index f279c037d15c..ad33f6fa4d78 100644 --- a/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp +++ b/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp @@ -128,12 +128,12 @@ class TpetraOperator //@{ //! The Map associated with the domain of this operator, which must be compatible with X.getMap(). - virtual Teuchos::RCP > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { return Teuchos::null; } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { return Teuchos::null; } @@ -199,12 +199,12 @@ class TpetraOperator //@{ //! The Map associated with the domain of this operator, which must be compatible with X.getMap(). - virtual Teuchos::RCP > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { return Teuchos::null; } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { return Teuchos::null; } From 7398324313c353f691f7116b7761483bf642f2eb Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Mon, 4 Nov 2024 15:36:17 -0700 Subject: [PATCH 13/50] MueLu: Refactor constructors to avoid copy-pasta Signed-off-by: Christian Glusa --- .../MueLu_BoundaryDetection.hpp | 213 ++--- .../MueLu_ClassicalDropping.hpp | 27 + .../MatrixTransformation/MueLu_CutDrop.hpp | 17 + .../MueLu_DistanceLaplacianDropping.hpp | 17 + .../MueLu_DroppingCommon.hpp | 37 + .../MueLu_MatrixConstruction.hpp | 888 +++++++----------- 6 files changed, 539 insertions(+), 660 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp index a000343db43a..f79983f3cb43 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp @@ -23,9 +23,13 @@ namespace MueLu::BoundaryDetection { -// These functors all assume that the boundaryNodes view has been initialized to false. +/*! + @class PointDirichletFunctor + @brief Functor for marking nodes as Dirichlet. -// Marks rows as Dirichlet based on value threshold and number of off-diagonal entries. + A row is marked as Dirichlet boundary if fewer than dirichletNonzeroThreshold entries are larger in absolute value than dirichletThreshold. + It is assumed that boundaryNodes was initialized to false. +*/ template class PointDirichletFunctor { private: @@ -67,116 +71,15 @@ class PointDirichletFunctor { } }; -// Marks rows as Dirichlet based on abs(rowsum) and abs(diag). -template -class RowSumFunctor { - private: - using scalar_type = typename local_matrix_type::value_type; - using local_ordinal_type = typename local_matrix_type::ordinal_type; - using memory_space = typename local_matrix_type::memory_space; - - using ATS = Kokkos::ArithTraits; - using magnitudeType = typename ATS::magnitudeType; - using magATS = Kokkos::ArithTraits; - using boundary_nodes_view = Kokkos::View; - - local_matrix_type A; - boundary_nodes_view boundaryNodes; - magnitudeType rowSumTol; +/*! + @class VectorDirichletFunctor + @brief Functor for marking nodes as Dirichlet in a block operator. - public: - RowSumFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType rowSumTol_) - : A(A_) - , boundaryNodes(boundaryNodes_) - , rowSumTol(rowSumTol_) {} - - KOKKOS_FORCEINLINE_FUNCTION - void operator()(const local_ordinal_type rlid) const { - scalar_type rowsum = ATS::zero(); - scalar_type diagval = ATS::zero(); - auto row = A.rowConst(rlid); - for (local_ordinal_type k = 0; k < row.length; ++k) { - auto clid = row.colidx(k); - auto val = row.value(k); - if (rlid == static_cast(clid)) - diagval = val; - rowsum += val; - } - if (ATS::magnitude(rowsum) > ATS::magnitude(diagval) * rowSumTol) { - boundaryNodes(rlid) = true; - } - } -}; - -template -class BoundaryFunctor { - private: - using scalar_type = typename local_matrix_type::value_type; - using local_ordinal_type = typename local_matrix_type::ordinal_type; - using memory_space = typename local_matrix_type::memory_space; - - local_matrix_type A; - functor_type_0 functor0; - functor_type_1 functor1; - functor_type_2 functor2; - functor_type_3 functor3; - - public: - BoundaryFunctor(local_matrix_type& A_) - : A(A_) - , functor0(0) - , functor1(0) - , functor2(0) - , functor3(0) {} - - BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_) - : A(A_) - , functor0(functor0_) - , functor1(0) - , functor2(0) - , functor3(0) {} - - BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_) - : A(A_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(0) - , functor3(0) {} - - BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) - : A(A_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(0) {} - - BoundaryFunctor(local_matrix_type& A_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) - : A(A_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) {} - - KOKKOS_INLINE_FUNCTION - void - operator()(const local_ordinal_type rlid) const { - if constexpr (!std::is_same_v) - functor0(rlid); - if constexpr (!std::is_same_v) - functor1(rlid); - if constexpr (!std::is_same_v) - functor2(rlid); - if constexpr (!std::is_same_v) - functor3(rlid); - } -}; - -// Marks rows as Dirichlet based on value threshold and number of off-diagonal entries. -// Marks blocks as Dirichlet when one row is Dirichlet (useGreedyDirichlet==true) or when all rows are Dirichlet (useGreedyDirichlet==false). + Assumes a single fixed block size specified by blockSize. + Marks blocks as Dirichlet when one row is Dirichlet (useGreedyDirichlet==true) or when all rows are Dirichlet (useGreedyDirichlet==false). + A row is marked as Dirichlet boundary if fewer than dirichletNonzeroThreshold entries are larger in absolute value than dirichletThreshold. + It is assumed that boundaryNodes was initialized to false. +*/ template class VectorDirichletFunctor { private: @@ -238,6 +141,94 @@ class VectorDirichletFunctor { } }; +/*! + @class RowSumFunctor + @brief Functor for marking nodes as Dirichlet based on rowsum. + + A row is marked as Dirichlet boundary if the sum of off-diagonal values is smaller in absolute value than the diagonal multiplied by the threshold rowSumTol. + It is assumed that boundaryNodes was initialized to false. +*/ +template +class RowSumFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using magATS = Kokkos::ArithTraits; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + magnitudeType rowSumTol; + + public: + RowSumFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType rowSumTol_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , rowSumTol(rowSumTol_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + scalar_type rowsum = ATS::zero(); + scalar_type diagval = ATS::zero(); + auto row = A.rowConst(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + auto val = row.value(k); + if (rlid == static_cast(clid)) + diagval = val; + rowsum += val; + } + if (ATS::magnitude(rowsum) > ATS::magnitude(diagval) * rowSumTol) { + boundaryNodes(rlid) = true; + } + } +}; + +/*! + @class BoundaryFunctor + @brief Functor that serially applies sub-functors to rows. +*/ +template +class BoundaryFunctor { + private: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + + Functor functor; + BoundaryFunctor remainingFunctors; + + public: + BoundaryFunctor(local_matrix_type& A_, Functor& functor_, RemainingFunctors&... remainingFunctors_) + : functor(functor_) + , remainingFunctors(A_, remainingFunctors_...) {} + + KOKKOS_FUNCTION void operator()(const local_ordinal_type rlid) const { + functor(rlid); + remainingFunctors(rlid); + } +}; + +template +class BoundaryFunctor { + private: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + + local_matrix_type A; + Functor functor; + + public: + BoundaryFunctor(local_matrix_type& A_, Functor& functor_) + : A(A_) + , functor(functor_) {} + + KOKKOS_FUNCTION void operator()(const local_ordinal_type rlid) const { + functor(rlid); + } +}; + } // namespace MueLu::BoundaryDetection #endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp index b0a7ed2f3554..68c98915a0bf 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp @@ -18,6 +18,15 @@ namespace MueLu::ClassicalDropping { +/*! + @class AbsDropFunctor + @brief Classical dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{|A_{ij}|^2}{|A_{ii}| |A_{jj}|} \le \theta^2 + \f] +*/ template class AbsDropFunctor { private: @@ -68,6 +77,15 @@ class AbsDropFunctor { } }; +/*! + @class SignedClassicalRSDropFunctor + @brief Signed classical Ruge-Stueben dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{-\operatorname{Re}A_{ij}}{|A_{ii}|} \le \theta + \f] +*/ template class SignedClassicalRSDropFunctor { private: @@ -116,6 +134,15 @@ class SignedClassicalRSDropFunctor { } }; +/*! + @class SignedClassicalSADropFunctor + @brief Signed classical smoothed aggregation dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{-\operatorname{sign}(A_{ij}) |A_{ij}|^2}{|A_{ii}| |A_{jj}|} \le \theta^2 + \f] +*/ template class SignedClassicalSADropFunctor { private: diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp index e5068974586c..1bb2fa1b1648 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp @@ -20,11 +20,16 @@ namespace MueLu::CutDrop { +/*! Cut drop algorithm options*/ enum decisionAlgoType { defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric }; +/*! + @class UnscaledComparison + @brief Orders entries of row \f$i\f$ by \f$|A_{ij}|^2\f$. +*/ template class UnscaledComparison { public: @@ -104,6 +109,10 @@ class UnscaledComparison { } }; +/*! + @class ScaledComparison + @brief Orders entries of row \f$i\f$ by \f$\frac{|A_{ij}|^2}{|A_{ii}| |A_{jj}|}\f$. +*/ template class ScaledComparison { public: @@ -303,6 +312,10 @@ class UnscaledDistanceLaplacianComparison { } }; +/*! + @class ScaledDistanceLaplacianComparison + @brief Orders entries of row \f$i\f$ by \f$\frac{|d_{ij}|^2}{|d_{ii}| |d_{jj}|}\f$ where \f$d_ij\f$ is the distance Laplacian. +*/ template class ScaledDistanceLaplacianComparison { public: @@ -441,6 +454,10 @@ KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type compara } } +/*! + @class CutDropFunctor + @brief Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is below the threshold. +*/ template class CutDropFunctor { private: diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp index 454a177956eb..12161d0d11d9 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp @@ -20,6 +20,10 @@ namespace MueLu::DistanceLaplacian { +/*! +@class DistanceFunctor +@brief Computes the unscaled distance Laplacian. +*/ template class DistanceFunctor { private: @@ -66,6 +70,9 @@ class DistanceFunctor { } }; +/*! +Method to compute ghosted distance Laplacian diagonal. +*/ template Teuchos::RCP > getDiagonal(Xpetra::Matrix& A, @@ -115,6 +122,16 @@ getDiagonal(Xpetra::Matrix& A, } } +/*! +@class DropFunctor +@brief Drops entries the unscaled distance Laplacian. + +Evaluates the dropping criterion +\f[ +\frac{|d_{ij}|^2}{|d_{ii}| |d_{jj}|} \le \theta^2 +\f] +where \f$d_{ij}\f$ is a distance metric. +*/ template class DropFunctor { private: diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp index deba71e88665..65cb55fc7e43 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -17,6 +17,7 @@ namespace MueLu { +/*! Possible decision for a single entry */ enum DecisionType { UNDECIDED = 0, // no decision has been taken yet, used for initialization KEEP = 1, // keeep the entry @@ -36,6 +37,10 @@ class NoOpFunctor { } }; +/*! + @class PointwiseDropBoundaryFunctor + @brief Functor that drops boundary nodes for a blockSize == 1 problem. +*/ template class PointwiseDropBoundaryFunctor { private: @@ -70,6 +75,10 @@ class PointwiseDropBoundaryFunctor { } }; +/*! + @class VectorDropBoundaryFunctor + @brief Functor that drops boundary nodes for a blockSize > 1 problem. +*/ template class VectorDropBoundaryFunctor { private: @@ -107,6 +116,10 @@ class VectorDropBoundaryFunctor { } }; +/*! +@class KeepDiagonalFunctor +@brief Functor that marks diagonal as kept, unless the are already marked as boundary. +*/ template class KeepDiagonalFunctor { private: @@ -137,6 +150,10 @@ class KeepDiagonalFunctor { } }; +/*! +@class DropOffRankFunctor +@brief Functor that drops off-rank entries +*/ template class DropOffRankFunctor { private: @@ -166,6 +183,10 @@ class DropOffRankFunctor { } }; +/*! +@class MarkSingletonFunctor +@brief Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary. +*/ template class MarkSingletonFunctor { private: @@ -206,6 +227,10 @@ class MarkSingletonFunctor { } }; +/*! +@class MarkSingletonVectorFunctor +@brief Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary. +*/ template class MarkSingletonVectorFunctor { private: @@ -250,6 +275,10 @@ class MarkSingletonVectorFunctor { } }; +/*! +@class BlockDiagonalizeFunctor +@brief Functor that drops all entries that are not on the block diagonal. +*/ template class BlockDiagonalizeFunctor { private: @@ -291,6 +320,10 @@ class BlockDiagonalizeFunctor { } }; +/*! +@class DebugFunctor +@brief Functor that checks that all entries have been marked. +*/ template class DebugFunctor { private: @@ -322,6 +355,10 @@ class DebugFunctor { } }; +/*! +@class SymmetrizeFunctor +@brief Functor that symmetrizes the dropping decisions. +*/ template class SymmetrizeFunctor { private: diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp index cee43f651dfb..0abbc6724b91 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -21,18 +21,17 @@ #endif namespace MueLu::MatrixConstruction { - -template +/*! + @class PointwiseCountingFunctor + @brief Functor that executes a sequence of sub-functors on each row for a problem with blockSize == 1. + + The functor applies a series of functors to each row of the matrix. + Each sub-functor can modify the decision to drop or keep any matrix entry in the given row. + These decisions are applied to the results_view. + Once a row has been processed by all sub-functors, the number of entries in the row after dropping is determined. + The result is saved as offsets in rowptr. +*/ +template class PointwiseCountingFunctor { private: using scalar_type = typename local_matrix_type::value_type; @@ -45,245 +44,147 @@ class PointwiseCountingFunctor { local_matrix_type A; results_view results; rowptr_type rowptr; - functor_type_0 functor0; - functor_type_1 functor1; - functor_type_2 functor2; - functor_type_3 functor3; - functor_type_4 functor4; - functor_type_5 functor5; - functor_type_6 functor6; - functor_type_7 functor7; - functor_type_8 functor8; - functor_type_9 functor9; + functor_type functor; + PointwiseCountingFunctor remainingFunctors; + bool firstFunctor; - std::vector functorNames; - - void init() { #ifdef MUELU_COALESCE_DROP_DEBUG - functorNames = std::vector(); - -#define DEBUG_PREP(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - std::string functorName = typeid(decltype(functor)).name(); \ - int status = 0; \ - char* demangledFunctorName = 0; \ - demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); \ - functorNames.push_back(demangledFunctorName); \ - } \ - } - DEBUG_PREP(functor0, 0); - DEBUG_PREP(functor1, 1); - DEBUG_PREP(functor2, 2); - DEBUG_PREP(functor3, 3); - DEBUG_PREP(functor4, 4); - DEBUG_PREP(functor5, 5); - DEBUG_PREP(functor6, 6); - DEBUG_PREP(functor7, 7); - DEBUG_PREP(functor8, 8); - DEBUG_PREP(functor9, 9); -#undef DEBUG_PREP + std::string functorName; #endif - } public: - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_) + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) : A(A_) , results(results_) , rowptr(rowptr_) - , functor0(0) - , functor1(0) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); + , functor(functor_) + , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...) + , firstFunctor(true) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_) + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) : A(A_) , results(results_) , rowptr(rowptr_) - , functor0(functor0_) - , functor1(0) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); + , functor(functor_) + , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...) + , firstFunctor(firstFunctor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const { +#ifdef MUELU_COALESCE_DROP_DEBUG + if (firstFunctor) { + Kokkos::printf("\nStarting on row %d\n", rlid); - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + auto row = A.rowConst(rlid); - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + functor(rlid); - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("%s\n", functorName.c_str()); - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_) - : A(A_) - , results(results_) - , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(0) - , functor9(0) { - init(); + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif + + remainingFunctors(rlid, nnz, final); } +}; + +template +class PointwiseCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + + local_matrix_type A; + results_view results; + rowptr_type rowptr; + functor_type functor; + bool firstFunctor; - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_) +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string functorName; +#endif + + public: + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_) : A(A_) , results(results_) , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(functor8_) - , functor9(0) { - init(); + , functor(functor_) + , firstFunctor(true) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } - PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_, functor_type_9& functor9_) + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_) : A(A_) , results(results_) , rowptr(rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(functor8_) - , functor9(functor9_) { - init(); + , functor(functor_) + , firstFunctor(firstFunctor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const { #ifdef MUELU_COALESCE_DROP_DEBUG - { + if (firstFunctor) { Kokkos::printf("\nStarting on row %d\n", rlid); auto row = A.rowConst(rlid); @@ -304,50 +205,21 @@ class PointwiseCountingFunctor { } #endif -#ifdef MUELU_COALESCE_DROP_DEBUG -#define APPLY_FUNCTOR(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - functor(rlid); \ - { \ - Kokkos::printf("%s\n", functorNames[functor_number].c_str()); \ - \ - auto row = A.rowConst(rlid); \ - const size_t offset = A.graph.row_map(rlid); \ - \ - Kokkos::printf("decisions: "); \ - for (local_ordinal_type k = 0; k < row.length; ++k) { \ - Kokkos::printf("%5d ", results(offset + k)); \ - } \ - Kokkos::printf("\n"); \ - } \ - } \ - } -#else -#define APPLY_FUNCTOR(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - functor(rlid); \ - } \ - } -#endif - APPLY_FUNCTOR(functor0, 0); - APPLY_FUNCTOR(functor1, 1); - APPLY_FUNCTOR(functor2, 2); - APPLY_FUNCTOR(functor3, 3); - APPLY_FUNCTOR(functor4, 4); - APPLY_FUNCTOR(functor5, 5); - APPLY_FUNCTOR(functor6, 6); - APPLY_FUNCTOR(functor7, 7); - APPLY_FUNCTOR(functor8, 8); - APPLY_FUNCTOR(functor9, 9); - -#undef APPLY_FUNCTOR + functor(rlid); #ifdef MUELU_COALESCE_DROP_DEBUG - { - Kokkos::printf("Done with row %d\n", rlid); + Kokkos::printf("%s\n", functorName); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); } + + Kokkos::printf("\n"); + Kokkos::printf("Done with row %d\n", rlid); #endif size_t start = A.graph.row_map(rlid); @@ -362,6 +234,14 @@ class PointwiseCountingFunctor { } }; +/*! + @class PointwiseFillReuseFunctor + @brief Functor that fills the filtered matrix while reusing the graph of the matrix before dropping, blockSize == 1. + + The dropped graph is built from scratch. + The filtered matrix reuses the graph of the matrix before dropping. + Lumps dropped entries to the diagonal if lumping==true. +*/ template class PointwiseFillReuseFunctor { private: @@ -424,6 +304,13 @@ class PointwiseFillReuseFunctor { } }; +/*! + @class PointwiseFillNoReuseFunctor + @brief Functor does not reuse the graph of the matrix for a problem with blockSize == 1. + + The dropped graph and the filtered matrix are built from scratch. + Lumps dropped entries to the diagonal if lumping==true. +*/ template class PointwiseFillNoReuseFunctor { private: @@ -473,17 +360,19 @@ class PointwiseFillNoReuseFunctor { } }; +/*! + @class VectorCountingFunctor + @brief Functor that executes a sequence of sub-functors on each block of rows. + + The functor applies a series of functors to each row of the matrix. + Each sub-functor can modify the decision to drop or keep any matrix entry in the given row. + These decisions are applied to the results_view. + Once a row has been processed by all sub-functors, the number of entries in the row after dropping is determined. + The result is saved as offsets in rowptr. +*/ template + class functor_type, + class... remaining_functor_types> class VectorCountingFunctor { private: using scalar_type = typename local_matrix_type::value_type; @@ -502,282 +391,194 @@ class VectorCountingFunctor { rowptr_type filtered_rowptr; rowptr_type graph_rowptr; - functor_type_0 functor0; - functor_type_1 functor1; - functor_type_2 functor2; - functor_type_3 functor3; - functor_type_4 functor4; - functor_type_5 functor5; - functor_type_6 functor6; - functor_type_7 functor7; - functor_type_8 functor8; - functor_type_9 functor9; + functor_type functor; + VectorCountingFunctor remainingFunctors; std::vector functorNames; - void init() { -#ifdef MUELU_COALESCE_DROP_DEBUG - functorNames = std::vector(); - -#define DEBUG_PREP(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - std::string functorName = typeid(decltype(functor)).name(); \ - int status = 0; \ - char* demangledFunctorName = 0; \ - demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); \ - functorNames.push_back(demangledFunctorName); \ - } \ - } - DEBUG_PREP(functor0, 0); - DEBUG_PREP(functor1, 1); - DEBUG_PREP(functor2, 2); - DEBUG_PREP(functor3, 3); - DEBUG_PREP(functor4, 4); - DEBUG_PREP(functor5, 5); - DEBUG_PREP(functor6, 6); - DEBUG_PREP(functor7, 7); - DEBUG_PREP(functor8, 8); - DEBUG_PREP(functor9, 9); -#undef DEBUG_PREP -#endif - } - public: - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_) + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) : A(A_) , blockSize(blockSize_) , ghosted_point_to_block(ghosted_point_to_block_) , results(results_) , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) - , functor0(0) - , functor1(0) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); + , functor(functor_) + , remainingFunctors(A_, blockSize_, ghosted_point_to_block_, results_, filtered_rowptr_, graph_rowptr_, remainingFunctors_...) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(0) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); + KOKKOS_INLINE_FUNCTION + void join(Kokkos::pair& dest, const Kokkos::pair& src) const { + dest.first += src.first; + dest.second += src.second; } - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(0) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); + KOKKOS_INLINE_FUNCTION + void operatorRow(const local_ordinal_type rlid) const { + functor(rlid); + remainingFunctors.operatorRow(rlid); } - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(0) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid, Kokkos::pair& nnz, const bool& final) const { + auto nnz_filtered = &nnz.first; + auto nnz_graph = &nnz.second; - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(0) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\nStarting on block row %d\n", brlid); +#endif + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("\nStarting on row %d\n", rlid); - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(0) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + auto row = A.rowConst(rlid); - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(0) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(0) - , functor8(0) - , functor9(0) { - init(); - } + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(0) - , functor9(0) { - init(); - } + functor(rlid); + remainingFunctors.operatorRow(rlid); - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_) - : A(A_) - , blockSize(blockSize_) - , ghosted_point_to_block(ghosted_point_to_block_) - , results(results_) - , filtered_rowptr(filtered_rowptr_) - , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(functor8_) - , functor9(0) { - init(); +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("%s\n", functorName.c_str()); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with row %d\n", rlid); +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++(*nnz_filtered); + } + } + if (final) + filtered_rowptr(rlid + 1) = *nnz_filtered; + } + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with block row %d\nGraph indices ", brlid); +#endif + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = ATS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + ++(*nnz_graph); +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("%5d ", bclid); +#endif + prev_bclid = bclid; + } + } + } +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\n"); +#endif + if (final) + graph_rowptr(brlid + 1) = *nnz_graph; } +}; + +template +class VectorCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + rowptr_type filtered_rowptr; + rowptr_type graph_rowptr; + + bool firstFunctor; + functor_type functor; + + std::vector functorNames; - VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, - functor_type_0& functor0_, functor_type_1& functor1_, functor_type_2& functor2_, functor_type_3& functor3_, - functor_type_4& functor4_, functor_type_5& functor5_, functor_type_6& functor6_, functor_type_7& functor7_, functor_type_8& functor8_, functor_type_9& functor9_) + public: + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_) : A(A_) , blockSize(blockSize_) , ghosted_point_to_block(ghosted_point_to_block_) , results(results_) , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) - , functor0(functor0_) - , functor1(functor1_) - , functor2(functor2_) - , functor3(functor3_) - , functor4(functor4_) - , functor5(functor5_) - , functor6(functor6_) - , functor7(functor7_) - , functor8(functor8_) - , functor9(functor9_) { - init(); + , functor(functor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif } KOKKOS_INLINE_FUNCTION @@ -786,10 +587,16 @@ class VectorCountingFunctor { dest.second += src.second; } + KOKKOS_INLINE_FUNCTION + void operatorRow(const local_ordinal_type rlid) const { + functor(rlid); + } + KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid, Kokkos::pair& nnz, const bool& final) const { auto nnz_filtered = &nnz.first; auto nnz_graph = &nnz.second; + #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("\nStarting on block row %d\n", brlid); #endif @@ -816,46 +623,22 @@ class VectorCountingFunctor { } #endif + functor(rlid); + #ifdef MUELU_COALESCE_DROP_DEBUG -#define APPLY_FUNCTOR(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - functor(rlid); \ - { \ - Kokkos::printf("%s\n", functorNames[functor_number].c_str()); \ - \ - auto row = A.rowConst(rlid); \ - const size_t offset = A.graph.row_map(rlid); \ - \ - Kokkos::printf("decisions: "); \ - for (local_ordinal_type k = 0; k < row.length; ++k) { \ - Kokkos::printf("%5d ", results(offset + k)); \ - } \ - Kokkos::printf("\n"); \ - } \ - } \ - } -#else -#define APPLY_FUNCTOR(functor, functor_number) \ - { \ - if constexpr (!std::is_same_v) { \ - functor(rlid); \ - } \ - } -#endif + { + Kokkos::printf("%s\n", functorName.c_str()); - APPLY_FUNCTOR(functor0, 0); - APPLY_FUNCTOR(functor1, 1); - APPLY_FUNCTOR(functor2, 2); - APPLY_FUNCTOR(functor3, 3); - APPLY_FUNCTOR(functor4, 4); - APPLY_FUNCTOR(functor5, 5); - APPLY_FUNCTOR(functor6, 6); - APPLY_FUNCTOR(functor7, 7); - APPLY_FUNCTOR(functor8, 8); - APPLY_FUNCTOR(functor9, 9); + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); -#undef APPLY_FUNCTOR + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("Done with row %d\n", rlid); @@ -919,6 +702,13 @@ class VectorCountingFunctor { } }; +/*! + @class VectorFillNoReuseFunctor + @brief Functor does not reuse the graph of the matrix for a problem with blockSize>1. + + The dropped graph and the filtered matrix are built from scratch. + Lumps dropped entries to the diagonal if lumping==true. +*/ template class VectorFillFunctor { private: From 3b791214788772177d05f8ea71baf0718662cac9 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Wed, 20 Nov 2024 09:29:12 -0700 Subject: [PATCH 14/50] MueLu ClassicalDropping: Rename functors Signed-off-by: Christian Glusa --- .../MueLu_ClassicalDropping.hpp | 20 +++++++------- .../MueLu_CoalesceDropFactory_kokkos_def.hpp | 26 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp index 68c98915a0bf..1ba7039a5129 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp @@ -19,8 +19,8 @@ namespace MueLu::ClassicalDropping { /*! - @class AbsDropFunctor - @brief Classical dropping criterion + @class SAFunctor + @brief Classical smoothed aggregation dropping criterion Evaluates the dropping criterion \f[ @@ -28,7 +28,7 @@ namespace MueLu::ClassicalDropping { \f] */ template -class AbsDropFunctor { +class SAFunctor { private: using matrix_type = Xpetra::Matrix; using diag_vec_type = Xpetra::MultiVector; @@ -51,7 +51,7 @@ class AbsDropFunctor { results_view results; public: - AbsDropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + SAFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) : A(A_.getLocalMatrixDevice()) , eps(threshold) , results(results_) { @@ -78,7 +78,7 @@ class AbsDropFunctor { }; /*! - @class SignedClassicalRSDropFunctor + @class SignedRSFunctor @brief Signed classical Ruge-Stueben dropping criterion Evaluates the dropping criterion @@ -87,7 +87,7 @@ class AbsDropFunctor { \f] */ template -class SignedClassicalRSDropFunctor { +class SignedRSFunctor { private: using matrix_type = Xpetra::Matrix; using local_matrix_type = typename matrix_type::local_matrix_type; @@ -111,7 +111,7 @@ class SignedClassicalRSDropFunctor { results_view results; public: - SignedClassicalRSDropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + SignedRSFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) : A(A_.getLocalMatrixDevice()) , eps(threshold) , results(results_) { @@ -135,7 +135,7 @@ class SignedClassicalRSDropFunctor { }; /*! - @class SignedClassicalSADropFunctor + @class SignedSAFunctor @brief Signed classical smoothed aggregation dropping criterion Evaluates the dropping criterion @@ -144,7 +144,7 @@ class SignedClassicalRSDropFunctor { \f] */ template -class SignedClassicalSADropFunctor { +class SignedSAFunctor { private: using matrix_type = Xpetra::Matrix; using diag_vec_type = Xpetra::MultiVector; @@ -168,7 +168,7 @@ class SignedClassicalSADropFunctor { results_view results; public: - SignedClassicalSADropFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + SignedSAFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) : A(A_.getLocalMatrixDevice()) , eps(threshold) , results(results_) { diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index 4d3d64c43745..7f3f724a14d4 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -286,11 +286,11 @@ std::tuple(BlockNumbers), *std::get<1>(BlockNumbers), results); if (classicalAlgoStr == "default") { - auto classical_dropping = ClassicalDropping::AbsDropFunctor(*A, threshold, results); + auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results); if (aggregationMayCreateDirichlet) { runCountingFunctor(block_diagonalize, @@ -390,7 +390,7 @@ std::tuple Date: Wed, 20 Nov 2024 09:51:05 -0700 Subject: [PATCH 15/50] MueLu CoalesceDropFactory_kokkos: Rename variables in MatrixConstruction Signed-off-by: Christian Glusa --- .../MatrixTransformation/MueLu_MatrixConstruction.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp index 0abbc6724b91..1a5f2729c72e 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -267,7 +267,7 @@ class PointwiseFillReuseFunctor { KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid) const { auto rowA = A.row(rlid); - size_t K = A.graph.row_map(rlid); + size_t row_start = A.graph.row_map(rlid); auto rowFilteredA = filteredA.row(rlid); local_ordinal_type j = 0; local_ordinal_type jj = 0; @@ -281,7 +281,7 @@ class PointwiseFillReuseFunctor { diagOffset = j; } } - if (results(K + k) == KEEP) { + if (results(row_start + k) == KEEP) { rowFilteredA.colidx(j) = rowA.colidx(k); rowFilteredA.value(j) = rowA.value(k); ++j; @@ -742,7 +742,7 @@ class VectorFillFunctor { void operator()(const local_ordinal_type brlid) const { for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { auto rowA = A.row(rlid); - size_t K = A.graph.row_map(rlid); + size_t row_start = A.graph.row_map(rlid); auto rowFilteredA = filteredA.row(rlid); local_ordinal_type j = 0; scalar_type diagCorrection = zero; @@ -754,7 +754,7 @@ class VectorFillFunctor { diagOffset = j; } } - if (results(K + k) == KEEP) { + if (results(row_start + k) == KEEP) { rowFilteredA.colidx(j) = rowA.colidx(k); rowFilteredA.value(j) = rowA.value(k); ++j; From 443878f51bd788848d0095aa565d4f9ab94c1cb9 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Wed, 20 Nov 2024 09:53:51 -0700 Subject: [PATCH 16/50] MueLu CoalesceDropFactory_kokkos: Namespace macros Signed-off-by: Christian Glusa --- .../MueLu_CoalesceDropFactory_kokkos_def.hpp | 298 +++++++++--------- 1 file changed, 149 insertions(+), 149 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index 7f3f724a14d4..3f9d157701d6 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -257,7 +257,7 @@ std::tuple(BlockNumbers), *std::get<1>(BlockNumbers), results); - runCountingFunctor(block_diagonalize); + MueLu_runDroppingFunctors(block_diagonalize); } else { TEUCHOS_ASSERT(false); } } else { Kokkos::deep_copy(results, KEEP); // FIXME: This seems inconsistent - // runCountingFunctor(drop_boundaries); + // MueLu_runDroppingFunctors(drop_boundaries); auto no_op = Misc::NoOpFunctor(); - runCountingFunctor(no_op); + MueLu_runDroppingFunctors(no_op); } -#undef runCountingFunctor +#undef MueLu_runDroppingFunctors } GO numDropped = lclA.nnz() - nnz_filtered; // We now know the number of entries of filtered A and have the final rowptr. @@ -841,7 +841,7 @@ std::tuple(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); - runBoundaryFunctor(dirichlet_detection); + MueLu_runBoundaryFunctors(dirichlet_detection); } else { auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); - runBoundaryFunctor(dirichlet_detection); + MueLu_runBoundaryFunctors(dirichlet_detection); } -#undef runBoundaryFunctor +#undef MueLu_runBoundaryFunctors } // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true. // Otherwise we're now done with it now. @@ -895,13 +895,13 @@ std::tuple::magnitudeType, LO, GO, NO>; @@ -976,14 +976,14 @@ std::tuple(); - runCountingFunctor(no_op); + MueLu_runDroppingFunctors(no_op); } -#undef runCountingFunctor +#undef MueLu_runDroppingFunctors } LocalOrdinal nnz_filtered = nnz.first; LocalOrdinal nnz_graph = nnz.second; From b234230724160fb45a263d128dfa3d793a764503 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Wed, 20 Nov 2024 09:58:31 -0700 Subject: [PATCH 17/50] MueLu DroppingCommon: Add doc Signed-off-by: Christian Glusa --- .../src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp index 65cb55fc7e43..dd371c124fcd 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -17,7 +17,10 @@ namespace MueLu { -/*! Possible decision for a single entry */ +/*! Possible decision for a single entry. + Once we are done with dropping, we should have no UNDECIDED entries left. + Normally, both DROP and BOUNDARY entries will be dropped, but we distinguish them in case we want to keep boundaries. + */ enum DecisionType { UNDECIDED = 0, // no decision has been taken yet, used for initialization KEEP = 1, // keeep the entry From d3cb92dd6e31f7201783c5c1acddfb5d5c0effd7 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Mon, 25 Nov 2024 18:20:10 -0600 Subject: [PATCH 18/50] Add PR script argument to skip packageEnables.cmake creation Add argument that can be passed downstream to skip the creation of the packageEnables.cmake file to the main entry point of our PR script, PullRequestLinuxDriverTest.py. Signed-off-by: Anderson Chauphan --- packages/framework/pr_tools/PullRequestLinuxDriverTest.py | 7 +++++++ .../pr_tools/unittests/test_PullRequestLinuxDriverTest.py | 1 + 2 files changed, 8 insertions(+) diff --git a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py index 73caa76a1c62..7fa3724dfdc1 100755 --- a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py @@ -196,6 +196,13 @@ def parse_args(): default=default_filename_packageenables, help="{} Default={}".format(desc_package_enables, default_filename_packageenables)) + optional.add_argument('--skip-create-packageenables', + dest="skip_create_packageenables", + action="store_true", + help="Skip the creation of the packageEnables.cmake fragment file generated by " + \ + "the TriBITS infrastructure indicating which packages are to be enabled based on file " + \ + "changes between a source and target branch. Default=") + desc_subprojects_file = "The subprojects_file is used by the testing infrastructure. This parameter " + \ "allows the default, generated file, to be overridden. Generally this should " + \ "not be changed from the defaults." diff --git a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py index 91e8e7068d71..a87993834afc 100755 --- a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py @@ -80,6 +80,7 @@ def setUp(self): workspace_dir='/dev/null/Trilinos_clone', filename_packageenables='../packageEnables.cmake', filename_subprojects='../package_subproject_list.cmake', + skip_create_packageenables=False, test_mode='standard', req_mem_per_core=3.0, max_cores_allowed=12, From 0d8c4dc65c2907a0546ba61baba9d49cf8d4d411 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Mon, 25 Nov 2024 18:22:08 -0600 Subject: [PATCH 19/50] Specify argument to skip packageEnables creation in AT1 AutoTester1 begins its PR script from LaunchDriver.sh -> PullRequestLinuxDriver.sh where arguments are propogated to the main PR script, PullRequestLinuxDriverTest.py. Signed-off-by: Anderson Chauphan --- packages/framework/pr_tools/PullRequestLinuxDriver.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/framework/pr_tools/PullRequestLinuxDriver.sh b/packages/framework/pr_tools/PullRequestLinuxDriver.sh index 01679dae2cdb..391be53a7e75 100755 --- a/packages/framework/pr_tools/PullRequestLinuxDriver.sh +++ b/packages/framework/pr_tools/PullRequestLinuxDriver.sh @@ -270,6 +270,11 @@ then test_cmd_options+=( "--use-explicit-cachefile ") fi +if [[ ${GENCONFIG_BUILD_NAME} == *"framework"* ]] +then + test_cmd_options+=( "--skip-create-packageenables ") +fi + test_cmd="${PYTHON_EXE:?} ${REPO_ROOT:?}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py ${test_cmd_options[@]}" # Call the script to launch the tests From 0ac49bf74fca7f633fbfc4fc44fbd5b91eeccb56 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Mon, 25 Nov 2024 19:27:55 -0600 Subject: [PATCH 20/50] Add logic to skip create_package_enables_file Added the logic that skips the creation of the packageEnables.cmake file in Trilinos containing all the packages with changes that need to be enabled for PR testing. The implementation of current packageEnables generation writes to a hard-coded file that does not take into consideration the existing `filename_packageenables` argument. Further changes should be made such that the creation of the packageEnables file depends on the value of the `filename_packageenables` instead of this `skip-create-packageenables` flag, but here we are. Signed-off-by: Anderson Chauphan --- .../TrilinosPRConfigurationBase.py | 31 +++++++++++++------ .../test_TrilinosPRConfigurationBase.py | 17 ++++++++++ ...est_TrilinosPRConfigurationInstallation.py | 1 + .../test_TrilinosPRConfigurationStandard.py | 1 + 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py index 5ca1499c2dfa..02b3a9f22fe6 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py @@ -267,6 +267,13 @@ def arg_filename_packageenables(self): """ return self.args.filename_packageenables + @property + def arg_skip_create_packageenables(self): + """ + This property controls whether the creation of a packageEnables.cmake fragment file + should be skipped. + """ + return self.args.skip_create_packageenables @property def arg_workspace_dir(self): @@ -501,7 +508,7 @@ def pullrequest_build_name(self): elif self.arg_dashboard_build_name != "__UNKNOWN__": output = self.arg_dashboard_build_name else: - output = self.arg_pr_genconfig_job_name + output = self.arg_pr_genconfig_job_name return output @@ -799,16 +806,22 @@ def prepare_test(self): self.message("| E N V I R O N M E N T S E T U P C O M P L E T E") self.message("+" + "-"*68 + "+") - self.message("+" + "-"*68 + "+") - self.message("| G e n e r a t e `packageEnables.cmake` S T A R T I N G") - self.message("+" + "-"*68 + "+") + if self.arg_skip_create_packageenables: + self.message("+" + "-"*68 + "+") + self.message("| S K I P P I N G `packageEnables.cmake` G E N E R A T I O N") + self.message("+" + "-"*68 + "+") - self.create_package_enables_file(dryrun=self.args.dry_run) + else: + self.message("+" + "-"*68 + "+") + self.message("| G e n e r a t e `packageEnables.cmake` S T A R T I N G") + self.message("+" + "-"*68 + "+") - self.message("+" + "-"*68 + "+") - self.message("| G e n e r a t e `packageEnables.cmake` C O M P L E T E D") - self.message("+" + "-"*68 + "+") - self.message("") + self.create_package_enables_file(dryrun=self.args.dry_run) + + self.message("+" + "-"*68 + "+") + self.message("| G e n e r a t e `packageEnables.cmake` C O M P L E T E D") + self.message("+" + "-"*68 + "+") + self.message("") return 0 diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py index b323abfe1943..da3d44a382da 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py @@ -225,6 +225,7 @@ def dummy_args(self): ctest_drop_site="testing.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, @@ -697,6 +698,22 @@ def test_TrilinosPRConfigurationBase_prepare_test(self): self.assertEqual(ret, 0) + def test_TrilinosPRConfigurationBase_prepare_test_skip_create_package_enables_file(self): + """ + Test that the prepare_test method does not call the member function create_package_enables_file + when skip_create_packageenables is True + """ + args = self.dummy_args() + args.skip_create_packageenables = True + pr_config = trilinosprhelpers.TrilinosPRConfigurationBase(args) + + with patch('trilinosprhelpers.TrilinosPRConfigurationBase.create_package_enables_file') as m_call: + pr_config.prepare_test() + + expected_call_count = 0 + self.assertEqual(m_call.call_count, expected_call_count) + + def test_TrilinosPRConfigurationBase_prepare_test_FAIL(self): """ Test the prepare_test method where it would fail due to diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py index 4eac6b0ceeda..551a57aff301 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py @@ -168,6 +168,7 @@ def dummy_args(self): ctest_drop_site="testint.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py index 9a722b30cce8..47586711a32d 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py @@ -167,6 +167,7 @@ def dummy_args(self): ctest_drop_site="testing.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, From 899922222d5c7bac8a46e236dc4a32a62b7245f2 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Mon, 25 Nov 2024 19:37:13 -0600 Subject: [PATCH 21/50] Add --skip-create-packageenables arg to AT2 Framework tests Add argument to skip the creation of the packageEnables.cmake fragment file for the Framework AT2 job. This job should not run any other tests than Framework unittests, which was what it was doing before due to the packageEnables.cmake file always being generated along with the test being launched from the ctest-driver. Signed-off-by: Anderson Chauphan --- .github/workflows/AT2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 66d648bcd716..f9c0107293bd 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -384,7 +384,7 @@ jobs: --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ --filename-subprojects ./package_subproject_list.cmake \ - --filename-packageenables ./packageEnables.cmake \ + --skip-create-packageenables \ - name: Summary if: ${{ !cancelled() }} shell: bash -l {0} From 773da12b1c79fd548e9d81428eb74db3ad54553e Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 26 Nov 2024 11:07:55 -0700 Subject: [PATCH 22/50] MueLu BoundaryDetection: Do not overwrite previously detected boundary nodes Signed-off-by: Christian Glusa --- .../Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp index f79983f3cb43..d70534974a00 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp @@ -129,14 +129,11 @@ class VectorDirichletFunctor { } } else { if (!rowIsDirichlet) { - boundaryNodes(rblid) = false; return; } } } - if constexpr (useGreedyDirichlet) - boundaryNodes(rblid) = false; - else + if constexpr (!useGreedyDirichlet) boundaryNodes(rblid) = true; } }; From 5b8cf0a2dc78c189394a005cb2a56fd10412a3ff Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 29 Aug 2024 17:20:45 -0600 Subject: [PATCH 23/50] Amesos2 : option to use Zoltan2 in SimpleSolve_File, and fixes in CssMKL interface to deal with permuted GIDs Signed-off-by: iyamazaki --- packages/amesos2/cmake/Amesos2_config.h.in | 6 + packages/amesos2/cmake/Dependencies.cmake | 2 +- packages/amesos2/example/SimpleSolve_File.cpp | 150 +++++++++-- packages/amesos2/src/Amesos2_CssMKL_decl.hpp | 9 +- packages/amesos2/src/Amesos2_CssMKL_def.hpp | 245 ++++++++++++++---- .../amesos2/src/Amesos2_MatrixAdapter_def.hpp | 5 +- .../amesos2/src/Amesos2_SolverCore_def.hpp | 65 +++-- packages/amesos2/src/Amesos2_Timers.hpp | 6 + ...os2_TpetraCrsMatrix_MatrixAdapter_decl.hpp | 5 + ...sos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 13 + 10 files changed, 401 insertions(+), 105 deletions(-) diff --git a/packages/amesos2/cmake/Amesos2_config.h.in b/packages/amesos2/cmake/Amesos2_config.h.in index fc004c7c8fe4..4a6e76ce8f67 100644 --- a/packages/amesos2/cmake/Amesos2_config.h.in +++ b/packages/amesos2/cmake/Amesos2_config.h.in @@ -110,3 +110,9 @@ /* Define to 1 if SuperLU's *gssvx and *gsisx routines need a GlobalLU_t argument. */ #cmakedefine HAVE_AMESOS2_SUPERLU5_API + +#cmakedefine HAVE_AMESOS2_XPETRA +#cmakedefine HAVE_AMESOS2_ZOLTAN2CORE +#ifdef HAVE_AMESOS2_ZOLTAN2CORE +# define HAVE_AMESOS2_ZOLTAN2 +#endif diff --git a/packages/amesos2/cmake/Dependencies.cmake b/packages/amesos2/cmake/Dependencies.cmake index 879d21293c3f..9c5d753ee8ee 100644 --- a/packages/amesos2/cmake/Dependencies.cmake +++ b/packages/amesos2/cmake/Dependencies.cmake @@ -5,7 +5,7 @@ SET(LIB_REQUIRED_DEP_PACKAGES Teuchos Tpetra TrilinosSS Kokkos) SET(LIB_OPTIONAL_DEP_PACKAGES Epetra EpetraExt ShyLU_NodeBasker ShyLU_NodeTacho) SET(TEST_REQUIRED_DEP_PACKAGES) -SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS) +SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS Xpetra Zoltan2Core) # SET(LIB_REQUIRED_DEP_TPLS SuperLU) SET(LIB_REQUIRED_DEP_TPLS ) SET(LIB_OPTIONAL_DEP_TPLS MPI SuperLU SuperLUMT SuperLUDist LAPACK UMFPACK PARDISO_MKL CSS_MKL ParMETIS METIS Cholmod MUMPS STRUMPACK CUSPARSE CUSOLVER) diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp index 350d755ec402..bfaab5ee3214 100644 --- a/packages/amesos2/example/SimpleSolve_File.cpp +++ b/packages/amesos2/example/SimpleSolve_File.cpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include #include @@ -22,8 +25,16 @@ #include #include -#include "Amesos2.hpp" -#include "Amesos2_Version.hpp" +#include +#include + +#if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_ZOLTAN2) +# include +# include +# include +# include +# include +#endif int main(int argc, char *argv[]) { @@ -32,7 +43,9 @@ int main(int argc, char *argv[]) { typedef Tpetra::CrsMatrix<>::scalar_type Scalar; typedef Tpetra::Map<>::local_ordinal_type LO; typedef Tpetra::Map<>::global_ordinal_type GO; + typedef Tpetra::Map<>::node_type NO; + typedef Tpetra::RowGraph Graph; typedef Tpetra::CrsMatrix MAT; typedef Tpetra::MultiVector MV; @@ -51,24 +64,32 @@ int main(int argc, char *argv[]) { Teuchos::oblackholestream blackhole; - bool printMatrix = false; - bool printSolution = false; - bool checkSolution = false; - bool printTiming = false; - bool allprint = false; + bool printMatrix = false; + bool printSolution = false; + bool checkSolution = false; + bool printTiming = false; + bool useStackedTimer = false; + bool allprint = false; bool verbose = (myRank==0); + bool useZoltan2 = false; + bool useParMETIS = false; std::string mat_filename("arc130.mtx"); std::string rhs_filename(""); std::string solvername("Superlu"); + std::string xml_filename(""); Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("filename",&mat_filename,"Filename for Matrix-Market test matrix."); cmdp.setOption("rhs_filename",&rhs_filename,"Filename for Matrix-Market right-hand-side."); cmdp.setOption("solvername",&solvername,"Name of solver."); + cmdp.setOption("xml_filename",&xml_filename,"XML Filename for Solver parameters."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); cmdp.setOption("check-solution","no-check-solution",&checkSolution,"Check solution vector after solve."); + cmdp.setOption("use-zoltan2","no-zoltan2",&useZoltan2,"Use Zoltan2 (Hypergraph) for repartitioning"); + cmdp.setOption("use-parmetis","no-parmetis",&useParMETIS,"Use ParMETIS for repartitioning"); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); + cmdp.setOption("use-stacked-timer","no-stacked-timer",&useStackedTimer,"Use StackedTimer to print solver timing statistics"); cmdp.setOption("all-print","root-print",&allprint,"All processors print to out"); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; @@ -82,23 +103,13 @@ int main(int argc, char *argv[]) { const size_t numVectors = 1; - RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(mat_filename, comm); - if( printMatrix ){ - A->describe(*fos, Teuchos::VERB_EXTREME); - } - else if( verbose ){ - std::cout << std::endl << A->description() << std::endl << std::endl; - } + // Read matrix + RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(mat_filename, comm); - // get the maps - RCP > dmnmap = A->getDomainMap(); + // get the map (Range Map used for both X & B) RCP > rngmap = A->getRangeMap(); - - GO nrows = dmnmap->getGlobalNumElements(); - RCP > root_map - = rcp( new Map(nrows,myRank == 0 ? nrows : 0,0,comm) ); - RCP Xhat = rcp( new MV(root_map,numVectors) ); - RCP > importer = rcp( new Import(dmnmap,root_map) ); + RCP > dmnmap = A->getDomainMap(); + GO nrows = A->getGlobalNumRows(); // Create random X RCP X = rcp(new MV(dmnmap,numVectors)); @@ -122,6 +133,69 @@ int main(int argc, char *argv[]) { B = Tpetra::MatrixMarket::Reader::readDenseFile (rhs_filename, comm, rngmap); } + if (useZoltan2 || useParMETIS) { +#if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_ZOLTAN2) + // Specify partitioning parameters + Teuchos::ParameterList zoltan_params; + zoltan_params.set("partitioning_approach", "partition"); + // + if (useParMETIS) { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(ParMETIS)" << std::endl; + } + zoltan_params.set("algorithm", "parmetis"); + zoltan_params.set("symmetrize_input", "transpose"); + zoltan_params.set("partitioning_objective", "minimize_cut_edge_weight"); + } else { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(HyperGraph)" << std::endl; + } + zoltan_params.set("algorithm", "phg"); + } + + // Create an input adapter for the Tpetra matrix. + Zoltan2::TpetraRowGraphAdapter + zoltan_graph(A->getGraph()); + + // Create and solve partitioning problem + Zoltan2::PartitioningProblem> + problem(&zoltan_graph, &zoltan_params); + problem.solve(); + + // Redistribute matrix + RCP zoltan_A; + Zoltan2::TpetraCrsMatrixAdapter zoltan_matrix(A); + zoltan_matrix.applyPartitioningSolution (*A, zoltan_A, problem.getSolution()); + // Set it as coefficient matrix, and update range map + A = zoltan_A; + rngmap = A->getRangeMap(); + + // Redistribute RHS + RCP zoltan_b; + Zoltan2::XpetraMultiVectorAdapter adapterRHS(rcpFromRef (*B)); + adapterRHS.applyPartitioningSolution (*B, zoltan_b, problem.getSolution()); + // Set it as RHS + B = zoltan_b; + + // Redistribute Sol + RCP zoltan_x; + Zoltan2::XpetraMultiVectorAdapter adapterSol(rcpFromRef (*X)); + adapterSol.applyPartitioningSolution (*X, zoltan_x, problem.getSolution()); + // Set it as Sol + X = zoltan_x; +#else + TEUCHOS_TEST_FOR_EXCEPTION( + useZoltan2, std::invalid_argument, + "Both Xpetra and Zoltan2 are needed to use Zoltan2."); +#endif + } + if( printMatrix ){ + A->describe(*fos, Teuchos::VERB_EXTREME); + } + else if( verbose ){ + std::cout << std::endl << A->description() << std::endl << std::endl; + } + // Constructor from Factory RCP > solver; if( !Amesos2::query(solvername) ){ @@ -130,11 +204,30 @@ int main(int argc, char *argv[]) { } solver = Amesos2::create(solvername, A, X, B); + if (xml_filename != "") { + Teuchos::ParameterList test_params = + Teuchos::ParameterXMLFileReader(xml_filename).getParameters(); + Teuchos::ParameterList& amesos2_params = test_params.sublist("Amesos2"); + *fos << amesos2_params.currentParametersString() << std::endl; + solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); + } + RCP stackedTimer; + if(useStackedTimer) { + stackedTimer = rcp(new Teuchos::StackedTimer("Amesos2 SimpleSolve-File")); + Teuchos::TimeMonitor::setStackedTimer(stackedTimer); + } solver->symbolicFactorization().numericFactorization().solve(); + if(useStackedTimer) { + stackedTimer->stopBaseTimer(); + } if( printSolution ){ // Print the solution + RCP > root_map + = rcp( new Map(nrows,myRank == 0 ? nrows : 0,0,comm) ); + RCP Xhat = rcp( new MV(root_map,numVectors) ); + RCP > importer = rcp( new Import(rngmap,root_map) ); if( allprint ){ if( myRank == 0 ) *fos << "Solution :" << std::endl; Xhat->describe(*fos,Teuchos::VERB_EXTREME); @@ -167,11 +260,20 @@ int main(int argc, char *argv[]) { if (myRank == 0) *fos << std::endl; } - if( printTiming ){ + if(useStackedTimer) { + Teuchos::StackedTimer::OutputOptions options; + options.num_histogram=3; + options.print_warnings = false; + options.output_histogram = true; + options.output_fraction=true; + options.output_minmax = true; + stackedTimer->report(std::cout, comm, options); + } else if( printTiming ){ // Print some timing statistics solver->printTiming(*fos); + } else { + Teuchos::TimeMonitor::summarize(); } - Teuchos::TimeMonitor::summarize(); // We are done. return 0; diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp index de968c111e7d..b158db3e5e52 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -268,16 +268,19 @@ namespace Amesos2 { /// number of righthand-side vectors mutable int_t nrhs_; + bool use_zoltan2_; + bool use_parmetis_; + bool css_initialized_; bool is_contiguous_; + /// The messaging level. Set to 1 if you wish for Pardiso MKL to print statistical info + int_t msglvl_; + /// CssMKL parameter vector. Note that the documentation uses /// 1-based indexing, but our interface must use 0-based indexing int_t iparm_[64]; - /// The messaging level. Set to 1 if you wish for Pardiso MKL to print statistical info - static const int_t msglvl_; - // We will deal with 1 factor at a time static const int_t maxfct_; static const int_t mnum_; diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index ea043e663c2b..6cca7dc8cbf3 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "Amesos2_SolverCore_def.hpp" #include "Amesos2_CssMKL_decl.hpp" @@ -44,25 +45,34 @@ namespace Amesos2 { , n_(Teuchos::as(this->globalNumRows_)) , perm_(this->globalNumRows_) , nrhs_(0) + , use_zoltan2_(false) + , use_parmetis_(false) , css_initialized_(false) , is_contiguous_(true) + , msglvl_(0) { + // Matrix info + Teuchos::RCP > matComm = this->matrixA_->getComm (); + const global_ordinal_type indexBase = this->matrixA_->getRowMap ()->getIndexBase (); + const local_ordinal_type nrows = this->matrixA_->getLocalNumRows(); + + // rowmap for loadA (to have locally contiguous) + css_rowmap_ = + Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); + // set the default matrix type set_css_mkl_matrix_type(); set_css_mkl_default_parameters(pt_, iparm_); // index base - const global_ordinal_type indexBase = this->matrixA_->getRowMap ()->getIndexBase (); iparm_[34] = (indexBase == 0 ? 1 : 0); /* Use one or zero-based indexing */ - // 1D block-row distribution - auto frow = this->matrixA_->getRowMap()->getMinGlobalIndex(); - auto nrows = this->matrixA_->getLocalNumRows(); + // 1D block-row distribution (using Contiguous map) + auto frow = css_rowmap_->getMinGlobalIndex(); iparm_[39] = 2; /* Matrix input format. */ iparm_[40] = frow; /* > Beginning of input domain. */ iparm_[41] = frow+nrows-1; /* > End of input domain. */ // get MPI Comm - Teuchos::RCP > matComm = this->matrixA_->getComm (); TEUCHOS_TEST_FOR_EXCEPTION( matComm.is_null (), std::logic_error, "Amesos2::CssMKL " "constructor: The matrix's communicator is null!"); @@ -81,10 +91,6 @@ namespace Amesos2 { "MPI_COMM_NULL."); MPI_Comm CssComm = *(matMpiComm->getRawMpiComm ()); CssComm_ = MPI_Comm_c2f(CssComm); - - // rowmap for loadA (to have locally contiguous) - css_rowmap_ = - Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); } @@ -125,6 +131,45 @@ namespace Amesos2 { int CssMKL::symbolicFactorization_impl() { +#if 1 + if (use_zoltan2_) { + char filename[200]; + int nprocs = -1; + int rank = -1; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + sprintf(filename,"metis_%d.dat", this->globalNumRows_); + if (rank == 0) printf( " filename : %s\n",filename ); + + FILE *fp = fopen(filename,"r"); + if (fp) { + if (rank == 0) printf( " - perm from filename : %s\n\n",filename ); + for (int i=0; iglobalNumRows_; i++) { + int row; + fscanf(fp, "%d", &row); + //perm_[i] = row; + perm_[row] = i; + } + /*if (rank == 0) { + printf("perm=[\n"); + for (int i=0; iglobalNumRows_; i++) { + printf( " %d\n",perm_[i] ); + } + printf("];\n"); + }*/ + } else { + if (rank == 0) printf( " - natural perm\n\n" ); + for (int i=0; iglobalNumRows_; i++) { + perm_[i] = i; + } + } + iparm_[4] = 1; /* 0: ignore perm, 1: use perm, 2: return perm */ + } +#endif + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::symbolicFactorization:\n" << std::endl; + for (int i=0; i < 64; i++) std::cout << " * IPARM[" << i << "] = " << iparm_[i] << std::endl; + } int_t error = 0; { #ifdef HAVE_AMESOS2_TIMERS @@ -141,13 +186,24 @@ namespace Amesos2 { const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); } check_css_mkl_error(Amesos2::SYMBFACT, error); + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::symbolicFactorization done:" << std::endl; + std::cout << " * Time : " << this->timers_.symFactTime_.totalElapsedTime() << std::endl; + } // Pardiso only lets you retrieve the total number of factor // non-zeros, not for each individually. We should document how // such a situation is reported. this->setNnzLU(iparm_[17]); css_initialized_ = true; - + /*{ + int rank = -1; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + char filename[200]; + sprintf(filename,"perm_%d.dat", rank); + FILE *fp = fopen(filename,"w"); + for (int i=0; i::numericFactorization_impl() { + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::numericFactorization:\n" << std::endl; + } int_t error = 0; { #ifdef HAVE_AMESOS2_TIMERS @@ -173,6 +232,10 @@ namespace Amesos2 { const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); } check_css_mkl_error(Amesos2::NUMFACT, error); + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::numericFactorization done:" << std::endl; + std::cout << " Time : " << this->timers_.numFactTime_.totalElapsedTime() << std::endl; + } return( 0 ); } @@ -202,8 +265,7 @@ namespace Amesos2 { MultiVecAdapter, solver_scalar_type>::do_get(B, bvals_(), as(ld_rhs), - DISTRIBUTED_NO_OVERLAP, - this->rowIndexBase_); + Teuchos::ptrInArg(*css_rowmap_)); } int_t error = 0; @@ -242,7 +304,7 @@ namespace Amesos2 { MultiVecAdapter, solver_scalar_type>::do_put(X, xvals_(), as(ld_rhs), - DISTRIBUTED_NO_OVERLAP); + Teuchos::ptrInArg(*css_rowmap_)); } return( 0 ); @@ -268,7 +330,7 @@ namespace Amesos2 { RCP valid_params = getValidParameters_impl(); - // Fill-in reordering: 0 = minimum degree, 2 = METIS 4.0.1 (default), 3 = METIS 5.1, 4 = AMD, + // 2: Fill-in reordering from METIS, 3: thread dissection, 10: MPI version of the nested dissection if( parameterList->isParameter("IPARM(2)") ) { RCP fillin_validator = valid_params->getEntry("IPARM(2)").validator(); @@ -318,10 +380,30 @@ namespace Amesos2 { parameterList->getEntry("IPARM(18)").setValidator(report_validator); iparm_[17] = getIntegralValue(*parameterList, "IPARM(18)"); } + + // Check input matrix is sorted + if( parameterList->isParameter("IPARM(28)") ) + { + RCP report_validator = valid_params->getEntry("IPARM(28)").validator(); + parameterList->getEntry("IPARM(28)").setValidator(report_validator); + iparm_[27] = getIntegralValue(*parameterList, "IPARM(28)"); + } + if( parameterList->isParameter("useZoltan2") ){ + use_zoltan2_ = parameterList->get("useZoltan2"); + } + + if( parameterList->isParameter("useParMETIS") ){ + use_parmetis_ = parameterList->get("useParMETIS"); + } + if( parameterList->isParameter("IsContiguous") ){ is_contiguous_ = parameterList->get("IsContiguous"); } + + if( parameterList->isParameter("verbose") ){ + msglvl_ = parameterList->get("verbose"); + } } @@ -406,8 +488,16 @@ CssMKL::getValidParameters_impl() const pl->set("IPARM(18)", as(iparm_temp[17]), "Report the number of non-zero elements in the factors", anyNumberParameterEntryValidator(preferred_int, accept_int)); + pl->set("IPARM(28)", as(iparm_temp[27]), "Check input matrix is sorted", + anyNumberParameterEntryValidator(preferred_int, accept_int)); + + pl->set("useZoltan2", false, "Use Zoltan2 for re-distribution"); + pl->set("useParMETIS", false, "Use ParMETIS for re-distribution"); + pl->set("IsContiguous", true, "Whether GIDs contiguous"); + pl->set("verbose", 0, "Verbosity Message Level"); + valid_params = pl; } @@ -427,42 +517,92 @@ CssMKL::loadA_impl(EPhase current_phase) // CssMKL does not need matrix data in the pre-ordering phase if( current_phase == PREORDERING ) return( false ); + // is_contiguous : input is contiguous + // CONTIGUOUS_AND_ROOTED : input is not contiguous, so make output contiguous EDistribution dist_option = (iparm_[39] != 0 ? DISTRIBUTED_NO_OVERLAP : ((is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED)); - if (current_phase == SYMBFACT) { - if (dist_option == DISTRIBUTED_NO_OVERLAP) { - Kokkos::resize(nzvals_temp_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(nzvals_view_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(colind_view_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(rowptr_view_, this->matrixA_->getLocalNumRows() + 1); - } else { - if( this->root_ ) { - Kokkos::resize(nzvals_temp_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(nzvals_view_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(colind_view_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(rowptr_view_, this->matrixA_->getGlobalNumRows() + 1); + if (dist_option == DISTRIBUTED_NO_OVERLAP && !is_contiguous_) { + // Neeed to form contiguous matrix + #if 0 + int myRank = this->matrixA_->getComm()->getRank(); + Teuchos::oblackholestream blackhole; + std::ostream& out = ( myRank == 0 ? std::cout : blackhole ); + RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); + + if (myRank == 0) printf( " %d Distributed and contiguous\n",myRank ); fflush(stdout); + this->matrixA_->describe(*fos, Teuchos::VERB_EXTREME); + if (myRank == 0) printf( " %d done print\n\n\n",myRank ); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); + #endif + Teuchos::RCP > contig_mat = this->matrixA_->get(ptrInArg(*css_rowmap_)); + #if 0 + contig_mat->describe(*fos, Teuchos::VERB_EXTREME); + if (myRank == 0) printf( " %d done re-print\n\n\n",myRank ); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); + + printf( " %d: (%d,%d, %d,%d) -> (%d,%d, %d,%d)\n",myRank, + this->matrixA_->getGlobalNumRows(),this->matrixA_->getGlobalNNZ(), this->matrixA_->getLocalNumRows(),this->matrixA_->getLocalNNZ(), + contig_mat->getGlobalNumRows(),contig_mat->getGlobalNNZ(), contig_mat->getLocalNumRows(),contig_mat->getLocalNNZ()); + #endif + // Copy into local views + if (current_phase == SYMBFACT) { + Kokkos::resize(nzvals_temp_, contig_mat->getLocalNNZ()); + Kokkos::resize(nzvals_view_, contig_mat->getLocalNNZ()); + Kokkos::resize(colind_view_, contig_mat->getLocalNNZ()); + Kokkos::resize(rowptr_view_, contig_mat->getLocalNumRows() + 1); + } + int_t nnz_ret = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); +#endif + Util::get_crs_helper_kokkos_view, + host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( + contig_mat.ptr(), + nzvals_temp_, colind_view_, rowptr_view_, + nnz_ret, + ptrInArg(*css_rowmap_), + #if 1 + DISTRIBUTED_NO_OVERLAP, + #else + ROOTED, + #endif + SORTED_INDICES); + Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + } + } else { + if (current_phase == SYMBFACT) { + if (dist_option == DISTRIBUTED_NO_OVERLAP) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getLocalNumRows() + 1); } else { - Kokkos::resize(nzvals_temp_, 0); - Kokkos::resize(nzvals_view_, 0); - Kokkos::resize(colind_view_, 0); - Kokkos::resize(rowptr_view_, 0); + if( this->root_ ) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getGlobalNumRows() + 1); + } else { + Kokkos::resize(nzvals_temp_, 0); + Kokkos::resize(nzvals_view_, 0); + Kokkos::resize(colind_view_, 0); + Kokkos::resize(rowptr_view_, 0); + } } } - } - - { + int_t nnz_ret = 0; + { #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); #endif - int_t nnz_ret = 0; - Util::get_crs_helper_kokkos_view, - host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( - this->matrixA_.ptr(), - nzvals_temp_, colind_view_, rowptr_view_, - nnz_ret, - Teuchos::ptrInArg(*css_rowmap_), - dist_option, - SORTED_INDICES); - Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + Util::get_crs_helper_kokkos_view, + host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( + this->matrixA_.ptr(), + nzvals_temp_, colind_view_, rowptr_view_, + nnz_ret, + Teuchos::ptrInArg(*css_rowmap_), + dist_option, + SORTED_INDICES); + Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + } } return( true ); } @@ -564,14 +704,22 @@ CssMKL::set_css_mkl_default_parameters(void* pt[], int_t iparm[]) // Reset some of the default parameters iparm[1] = 10; /* 2: Fill-in reordering from METIS, 3: thread dissection, 10: MPI version of the nested dissection and symbolic factorization*/ iparm[7] = 0; /* Max numbers of iterative refinement steps */ - iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ iparm[10] = 0; /* Disable nonsymmetric permutation and scaling MPS */ iparm[11] = 0; /* Normal solve (0), or a transpose solve (1) */ iparm[12] = 0; /* Do not use (non-)symmetric matchings */ iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ - iparm[20] = -1; /* Pivoting for symmetric indefinite matrices */ + iparm[20] = 1; /* Pivoting for symmetric indefinite matrices */ iparm[26] = 1; /* Check input matrix is sorted */ + // diagonal pertubation + if (mtype_ == -2 || mtype_ == -4) { + // symmetric indefinite + iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */ + } else { + // non-symmetric + iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ + } + // set single or double precision if constexpr ( std::is_same_v ) { iparm[27] = 1; // single-precision @@ -581,12 +729,9 @@ CssMKL::set_css_mkl_default_parameters(void* pt[], int_t iparm[]) iparm[34] = 1; /* Use zero-based indexing */ } -template -const char* CssMKL::name = "CSSMKL"; template -const typename CssMKL::int_t -CssMKL::msglvl_ = 0; // set to be one, for more CSS messages +const char* CssMKL::name = "CSSMKL"; template const typename CssMKL::int_t diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index 7cf8a28c0b97..e6a1ddbadd23 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -176,7 +176,10 @@ namespace Amesos2 { void MatrixAdapter::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const - {} + { + // TODO : Make sure to implement in all specialization (Only in Tpetra::CrsMatrix) + return static_cast(this)->describe(out, verbLevel); + } template < class Matrix > template < class KV > diff --git a/packages/amesos2/src/Amesos2_SolverCore_def.hpp b/packages/amesos2/src/Amesos2_SolverCore_def.hpp index 6907cc93a708..cd214b210aae 100644 --- a/packages/amesos2/src/Amesos2_SolverCore_def.hpp +++ b/packages/amesos2/src/Amesos2_SolverCore_def.hpp @@ -92,19 +92,23 @@ SolverCore::symbolicFactorization() Teuchos::TimeMonitor LocalTimer1(timers_.totalTime_); #endif - if( !status_.preOrderingDone() ){ - preOrdering(); - if( !matrix_loaded_ ) loadA(SYMBFACT); - } else { - loadA(SYMBFACT); - } + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreSymFactTime_); +#endif + if( !status_.preOrderingDone() ){ + preOrdering(); + if( !matrix_loaded_ ) loadA(SYMBFACT); + } else { + loadA(SYMBFACT); + } - int error_code = static_cast(this)->symbolicFactorization_impl(); - if (error_code == EXIT_SUCCESS){ - ++status_.numSymbolicFact_; - status_.last_phase_ = SYMBFACT; + int error_code = static_cast(this)->symbolicFactorization_impl(); + if (error_code == EXIT_SUCCESS){ + ++status_.numSymbolicFact_; + status_.last_phase_ = SYMBFACT; + } } - return *this; } @@ -116,18 +120,22 @@ SolverCore::numericFactorization() #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor LocalTimer1(timers_.totalTime_); #endif + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreNumFactTime_); +#endif + if( !status_.symbolicFactorizationDone() ){ + symbolicFactorization(); + if( !matrix_loaded_ ) loadA(NUMFACT); + } else { + loadA(NUMFACT); + } - if( !status_.symbolicFactorizationDone() ){ - symbolicFactorization(); - if( !matrix_loaded_ ) loadA(NUMFACT); - } else { - loadA(NUMFACT); - } - - int error_code = static_cast(this)->numericFactorization_impl(); - if (error_code == EXIT_SUCCESS){ - ++status_.numNumericFact_; - status_.last_phase_ = NUMFACT; + int error_code = static_cast(this)->numericFactorization_impl(); + if (error_code == EXIT_SUCCESS){ + ++status_.numNumericFact_; + status_.last_phase_ = NUMFACT; + } } return *this; @@ -189,10 +197,15 @@ SolverCore::solve(const Teuchos::Ptr X, const_cast(*this).numericFactorization(); } - int error_code = static_cast(this)->solve_impl(Teuchos::outArg(*x), Teuchos::ptrInArg(*b)); - if (error_code == EXIT_SUCCESS){ - ++status_.numSolve_; - status_.last_phase_ = SOLVE; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreSolveTime_); +#endif + int error_code = static_cast(this)->solve_impl(Teuchos::outArg(*x), Teuchos::ptrInArg(*b)); + if (error_code == EXIT_SUCCESS){ + ++status_.numSolve_; + status_.last_phase_ = SOLVE; + } } } diff --git a/packages/amesos2/src/Amesos2_Timers.hpp b/packages/amesos2/src/Amesos2_Timers.hpp index 1f53727d3154..f4468e8b09ba 100644 --- a/packages/amesos2/src/Amesos2_Timers.hpp +++ b/packages/amesos2/src/Amesos2_Timers.hpp @@ -36,6 +36,9 @@ struct Timers { , symFactTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for symbolic factorization"))) , numFactTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for numeric factorization"))) , solveTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for solve"))) + , coreSymFactTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::symbolicFactorization"))) + , coreNumFactTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::numericFactorization"))) + , coreSolveTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::solve"))) , totalTime_(*(Teuchos::TimeMonitor::getNewTimer("Total Time in Amesos2 interface"))) {} @@ -47,6 +50,9 @@ struct Timers { Teuchos::Time symFactTime_; Teuchos::Time numFactTime_; Teuchos::Time solveTime_; + Teuchos::Time coreSymFactTime_; + Teuchos::Time coreNumFactTime_; + Teuchos::Time coreSolveTime_; Teuchos::Time totalTime_; }; diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp index 1ed32235e915..3b519bffae49 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -88,6 +88,11 @@ namespace Amesos2 { RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + //! Print a description of this adapter to the given output stream + void + describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; }; } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 4d6ea994dcc0..853abca35a6b 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -84,6 +84,19 @@ namespace Amesos2 { return rcp (new ConcreteMatrixAdapter (t_mat)); } + + template + void + ConcreteMatrixAdapter< + Tpetra::CrsMatrix + >::describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel) const + { + this->mat_->describe(os, verbLevel); + } } // end namespace Amesos2 #endif // AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP From 15146998c37a40f8094e7bde99da9fdca5a930cf Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 10 Sep 2024 14:54:26 -0600 Subject: [PATCH 24/50] Amesos2 : use GO for CSS Signed-off-by: iyamazaki --- packages/amesos2/example/SimpleSolve_File.cpp | 2 +- packages/amesos2/src/Amesos2_CssMKL_decl.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp index bfaab5ee3214..347908857fed 100644 --- a/packages/amesos2/example/SimpleSolve_File.cpp +++ b/packages/amesos2/example/SimpleSolve_File.cpp @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) { RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); // Say hello - out << myRank << " : " << Amesos2::version() << std::endl << std::endl; + out << myRank << " : " << Amesos2::version() << " on " << comm->getSize() << " MPIs" << std::endl << std::endl; const size_t numVectors = 1; diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp index b158db3e5e52..74cafe9eacec 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -76,7 +76,7 @@ namespace Amesos2 { // This may be PMKL::_INTEGER_t or long long int depending on the // mapping and input ordinal - typedef typename TypeMap::type int_t; + typedef typename TypeMap::type int_t; /* For CssMKL we dispatch based on the integer type instead of * the scalar type: From aafc549d77eb5d569da2702eb3d1770749ea3d16 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 25 Sep 2024 12:36:43 -0600 Subject: [PATCH 25/50] Amesos2 : CSS reindexing Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_CssMKL_decl.hpp | 3 - packages/amesos2/src/Amesos2_CssMKL_def.hpp | 86 +++---------------- .../src/Amesos2_MatrixAdapter_decl.hpp | 1 + .../amesos2/src/Amesos2_MatrixAdapter_def.hpp | 8 ++ ...os2_TpetraCrsMatrix_MatrixAdapter_decl.hpp | 1 + ...sos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 59 +++++++++++++ ...raRowMatrix_AbstractMatrixAdapter_decl.hpp | 1 + ...traRowMatrix_AbstractMatrixAdapter_def.hpp | 15 ++++ 8 files changed, 96 insertions(+), 78 deletions(-) diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp index 74cafe9eacec..4a9f7db120e3 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -268,9 +268,6 @@ namespace Amesos2 { /// number of righthand-side vectors mutable int_t nrhs_; - bool use_zoltan2_; - bool use_parmetis_; - bool css_initialized_; bool is_contiguous_; diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index 6cca7dc8cbf3..423797332e7c 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -29,7 +29,6 @@ #include "Amesos2_SolverCore_def.hpp" #include "Amesos2_CssMKL_decl.hpp" - namespace Amesos2 { namespace PMKL { @@ -45,8 +44,6 @@ namespace Amesos2 { , n_(Teuchos::as(this->globalNumRows_)) , perm_(this->globalNumRows_) , nrhs_(0) - , use_zoltan2_(false) - , use_parmetis_(false) , css_initialized_(false) , is_contiguous_(true) , msglvl_(0) @@ -131,41 +128,6 @@ namespace Amesos2 { int CssMKL::symbolicFactorization_impl() { -#if 1 - if (use_zoltan2_) { - char filename[200]; - int nprocs = -1; - int rank = -1; - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - sprintf(filename,"metis_%d.dat", this->globalNumRows_); - if (rank == 0) printf( " filename : %s\n",filename ); - - FILE *fp = fopen(filename,"r"); - if (fp) { - if (rank == 0) printf( " - perm from filename : %s\n\n",filename ); - for (int i=0; iglobalNumRows_; i++) { - int row; - fscanf(fp, "%d", &row); - //perm_[i] = row; - perm_[row] = i; - } - /*if (rank == 0) { - printf("perm=[\n"); - for (int i=0; iglobalNumRows_; i++) { - printf( " %d\n",perm_[i] ); - } - printf("];\n"); - }*/ - } else { - if (rank == 0) printf( " - natural perm\n\n" ); - for (int i=0; iglobalNumRows_; i++) { - perm_[i] = i; - } - } - iparm_[4] = 1; /* 0: ignore perm, 1: use perm, 2: return perm */ - } -#endif if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { std::cout << " CssMKL::symbolicFactorization:\n" << std::endl; for (int i=0; i < 64; i++) std::cout << " * IPARM[" << i << "] = " << iparm_[i] << std::endl; @@ -196,14 +158,6 @@ namespace Amesos2 { // such a situation is reported. this->setNnzLU(iparm_[17]); css_initialized_ = true; - /*{ - int rank = -1; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - char filename[200]; - sprintf(filename,"perm_%d.dat", rank); - FILE *fp = fopen(filename,"w"); - for (int i=0; i, solver_scalar_type>::do_get(B, bvals_(), as(ld_rhs), - Teuchos::ptrInArg(*css_rowmap_)); + Teuchos::ptrInArg(*css_rowmap_)); } int_t error = 0; @@ -304,7 +258,7 @@ namespace Amesos2 { MultiVecAdapter, solver_scalar_type>::do_put(X, xvals_(), as(ld_rhs), - Teuchos::ptrInArg(*css_rowmap_)); + Teuchos::ptrInArg(*css_rowmap_)); } return( 0 ); @@ -389,14 +343,6 @@ namespace Amesos2 { iparm_[27] = getIntegralValue(*parameterList, "IPARM(28)"); } - if( parameterList->isParameter("useZoltan2") ){ - use_zoltan2_ = parameterList->get("useZoltan2"); - } - - if( parameterList->isParameter("useParMETIS") ){ - use_parmetis_ = parameterList->get("useParMETIS"); - } - if( parameterList->isParameter("IsContiguous") ){ is_contiguous_ = parameterList->get("IsContiguous"); } @@ -522,24 +468,14 @@ CssMKL::loadA_impl(EPhase current_phase) EDistribution dist_option = (iparm_[39] != 0 ? DISTRIBUTED_NO_OVERLAP : ((is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED)); if (dist_option == DISTRIBUTED_NO_OVERLAP && !is_contiguous_) { // Neeed to form contiguous matrix - #if 0 - int myRank = this->matrixA_->getComm()->getRank(); - Teuchos::oblackholestream blackhole; - std::ostream& out = ( myRank == 0 ? std::cout : blackhole ); - RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); - - if (myRank == 0) printf( " %d Distributed and contiguous\n",myRank ); fflush(stdout); - this->matrixA_->describe(*fos, Teuchos::VERB_EXTREME); - if (myRank == 0) printf( " %d done print\n\n\n",myRank ); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); - #endif + #if 1 + // Only reinex GIDs + css_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve + Teuchos::RCP > contig_mat = this->matrixA_->reindex(); + #else + // Redistribued matrixA into contiguous GIDs Teuchos::RCP > contig_mat = this->matrixA_->get(ptrInArg(*css_rowmap_)); - #if 0 - contig_mat->describe(*fos, Teuchos::VERB_EXTREME); - if (myRank == 0) printf( " %d done re-print\n\n\n",myRank ); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); - - printf( " %d: (%d,%d, %d,%d) -> (%d,%d, %d,%d)\n",myRank, - this->matrixA_->getGlobalNumRows(),this->matrixA_->getGlobalNNZ(), this->matrixA_->getLocalNumRows(),this->matrixA_->getLocalNNZ(), - contig_mat->getGlobalNumRows(),contig_mat->getGlobalNNZ(), contig_mat->getLocalNumRows(),contig_mat->getLocalNNZ()); + //css_rowmap_ = contig_mat->getRowMap(); // use new map to redistribute vectors in solve #endif // Copy into local views if (current_phase == SYMBFACT) { @@ -558,7 +494,7 @@ CssMKL::loadA_impl(EPhase current_phase) contig_mat.ptr(), nzvals_temp_, colind_view_, rowptr_view_, nnz_ret, - ptrInArg(*css_rowmap_), + ptrInArg(*(contig_mat->getRowMap())), #if 1 DISTRIBUTED_NO_OVERLAP, #else @@ -598,7 +534,7 @@ CssMKL::loadA_impl(EPhase current_phase) this->matrixA_.ptr(), nzvals_temp_, colind_view_, rowptr_view_, nnz_ret, - Teuchos::ptrInArg(*css_rowmap_), + ptrInArg(*(this->matrixA_->getRowMap())), dist_option, SORTED_INDICES); Kokkos::deep_copy(nzvals_view_, nzvals_temp_); diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp index 723837b8c9a4..45ca54790741 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp @@ -215,6 +215,7 @@ namespace Amesos2 { } Teuchos::RCP get(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + Teuchos::RCP reindex() const; /// Returns a short description of this Solver std::string description() const; diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index e6a1ddbadd23..ccca346f9cd5 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -514,6 +514,14 @@ namespace Amesos2 { } + template < class Matrix > + Teuchos::RCP > + MatrixAdapter::reindex() const + { + return static_cast(this)->reindex_impl(); + } + + template Teuchos::RCP > createMatrixAdapter(Teuchos::RCP m){ diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp index 3b519bffae49..5bac57193d64 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -87,6 +87,7 @@ namespace Amesos2 { ConcreteMatrixAdapter(RCP m); RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + RCP > reindex_impl() const; //! Print a description of this adapter to the given output stream void diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 853abca35a6b..d7bdfa649e68 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -85,6 +85,64 @@ namespace Amesos2 { } + + template + Teuchos::RCP > > + ConcreteMatrixAdapter< + Tpetra::CrsMatrix + >::reindex_impl() const + { + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; + typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; + auto rowMap = this->mat_->getRowMap(); + auto colMap = this->mat_->getColMap(); + auto rowComm = rowMap->getComm(); + auto colComm = colMap->getComm(); + + global_ordinal_t indexBase = rowMap->getIndexBase(); + global_ordinal_t numDoFs = this->mat_->getGlobalNumRows(); + local_ordinal_t nRows = this->mat_->getLocalNumRows(); + local_ordinal_t nCols = colMap->getLocalNumElements(); + + auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); + global_ordinal_t frow = tmpMap->getMinGlobalIndex(); + + // Create new GID list for RowMap + Kokkos::View rowIndexList ("indexList", nRows); + for (local_ordinal_t k = 0; k < nRows; k++) { + rowIndexList(k) = frow+k; + } + // Create new GID list for ColMap + Kokkos::View colIndexList ("indexList", nCols); + typedef Tpetra::MultiVector gid_mv_t; + Teuchos::ArrayView rowIndexArray(rowIndexList.data(), nRows); + Teuchos::ArrayView colIndexArray(colIndexList.data(), nCols); + gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1); + gid_mv_t col_mv (colMap, colIndexArray, nCols, 1); + typedef Tpetra::Import import_t; + RCP importer = rcp (new import_t (rowMap, colMap)); + col_mv.doImport (row_mv, *importer, Tpetra::INSERT); + { + auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly); + for(int i=0; i newRowMap = rcp (new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm)); + Teuchos::RCP newColMap = rcp (new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm)); + + // Build Matrix with new Maps, + auto lclMatrix = this->mat_->getLocalMatrixDevice(); + RCP contiguous_t_mat = rcp( new matrix_t(newRowMap, newColMap, lclMatrix)); + + return rcp (new ConcreteMatrixAdapter (contiguous_t_mat)); + } + template mat_->describe(os, verbLevel); + Tpetra::MatrixMarket::Writer::writeSparseFile ("matA.dat", this->mat_); } } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp index 9eb848429c07..62381e188743 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp @@ -146,6 +146,7 @@ namespace Amesos2 { // different (cf subclasses of Tpetra::CrsMatrix), this method // hands off implementation to the adapter for the subclass RCP get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + RCP reindex_impl() const; template void getSparseRowPtr_kokkos_view(KV & view) const { diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp index ec6cc767f8da..f74a3dd8753b 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp @@ -329,6 +329,21 @@ namespace Amesos2 { #endif } + + template + RCP > + AbstractConcreteMatrixAdapter< + Tpetra::RowMatrix, DerivedMat + >::reindex_impl() const + { +#ifdef __CUDACC__ + // NVCC doesn't seem to like the static_cast, even though it is valid + return dynamic_cast*>(this)->reindex_impl(map, distribution); +#else + return static_cast*>(this)->reindex_impl(map, distribution); +#endif + } + } // end namespace Amesos2 #endif // AMESOS2_TPETRAROWMATRIX_ABSTRACTMATRIXADAPTER_DEF_HPP From 0cb1a4f07eebe908fb23536a66dedfa668c5ee99 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 25 Sep 2024 12:53:42 -0600 Subject: [PATCH 26/50] Amesos2 : cleanup Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_CssMKL_def.hpp | 2 +- .../amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index 423797332e7c..2a760521dbd0 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -24,11 +24,11 @@ #include #include #include -#include #include "Amesos2_SolverCore_def.hpp" #include "Amesos2_CssMKL_decl.hpp" + namespace Amesos2 { namespace PMKL { diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index d7bdfa649e68..1e3dd2309def 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -154,7 +154,6 @@ namespace Amesos2 { const Teuchos::EVerbosityLevel verbLevel) const { this->mat_->describe(os, verbLevel); - Tpetra::MatrixMarket::Writer::writeSparseFile ("matA.dat", this->mat_); } } // end namespace Amesos2 From fb9603ade2e83e0c3941864a097353ab668e2c37 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Mon, 30 Sep 2024 16:09:40 -0600 Subject: [PATCH 27/50] Amesos2 : reuse maps for reindexing Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_CssMKL_decl.hpp | 2 + packages/amesos2/src/Amesos2_CssMKL_def.hpp | 6 +- .../src/Amesos2_MatrixAdapter_decl.hpp | 15 ++-- .../amesos2/src/Amesos2_MatrixAdapter_def.hpp | 24 +++--- ...os2_TpetraCrsMatrix_MatrixAdapter_decl.hpp | 5 +- ...sos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 83 ++++++++++--------- ...raRowMatrix_AbstractMatrixAdapter_decl.hpp | 2 +- ...traRowMatrix_AbstractMatrixAdapter_def.hpp | 6 +- 8 files changed, 78 insertions(+), 65 deletions(-) diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp index 4a9f7db120e3..4b91a84e892a 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -289,6 +289,8 @@ namespace Amesos2 { MPI_Fint CssComm_; Teuchos::RCP css_rowmap_; + Teuchos::RCP css_contig_rowmap_; + Teuchos::RCP css_contig_colmap_; }; // End class CssMKL diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index 2a760521dbd0..8bd402816e6e 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -56,7 +56,9 @@ namespace Amesos2 { // rowmap for loadA (to have locally contiguous) css_rowmap_ = Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); - + css_contig_rowmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, matComm)); + css_contig_colmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, matComm)); + // set the default matrix type set_css_mkl_matrix_type(); set_css_mkl_default_parameters(pt_, iparm_); @@ -471,7 +473,7 @@ CssMKL::loadA_impl(EPhase current_phase) #if 1 // Only reinex GIDs css_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve - Teuchos::RCP > contig_mat = this->matrixA_->reindex(); + Teuchos::RCP > contig_mat = this->matrixA_->reindex(css_contig_rowmap_, css_contig_colmap_); #else // Redistribued matrixA into contiguous GIDs Teuchos::RCP > contig_mat = this->matrixA_->get(ptrInArg(*css_rowmap_)); diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp index 45ca54790741..625498165410 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp @@ -52,6 +52,7 @@ namespace Amesos2 { typedef Matrix matrix_t; typedef MatrixAdapter type; typedef ConcreteMatrixAdapter adapter_t; + typedef Tpetra::Map map_t; typedef typename MatrixTraits::global_host_idx_type global_host_idx_t; typedef typename MatrixTraits::global_host_val_type global_host_val_t; @@ -100,7 +101,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EStorage_Ordering ordering=ARBITRARY, EDistribution distribution=ROOTED) const; // This was placed as last argument to preserve API @@ -151,7 +152,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EStorage_Ordering ordering=ARBITRARY, EDistribution distribution=ROOTED) const; // This was placed as last argument to preserve API @@ -199,23 +200,23 @@ namespace Amesos2 { /// Get the local number of non-zeros on this processor size_t getLocalNNZ() const; - Teuchos::RCP > + Teuchos::RCP getMap() const { return static_cast(this)->getMap_impl(); } - Teuchos::RCP > + Teuchos::RCP getRowMap() const { return row_map_; } - Teuchos::RCP > + Teuchos::RCP getColMap() const { return col_map_; } - Teuchos::RCP get(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - Teuchos::RCP reindex() const; + Teuchos::RCP get(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; + Teuchos::RCP reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; /// Returns a short description of this Solver std::string description() const; diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index ccca346f9cd5..7d8d92c48328 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -41,7 +41,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EStorage_Ordering ordering, EDistribution distribution) const { @@ -60,7 +60,7 @@ namespace Amesos2 { EDistribution distribution, EStorage_Ordering ordering) const { - const Teuchos::RCP > rowmap + const Teuchos::RCP rowmap = Util::getDistributionMap(distribution, this->getGlobalNumRows(), this->getComm()); @@ -74,7 +74,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EStorage_Ordering ordering, EDistribution distribution) const { @@ -93,7 +93,7 @@ namespace Amesos2 { EDistribution distribution, EStorage_Ordering ordering) const { - const Teuchos::RCP > colmap + const Teuchos::RCP colmap = Util::getDistributionMap(distribution, this->getGlobalNumCols(), this->getComm()); @@ -212,7 +212,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EDistribution distribution, EStorage_Ordering ordering, no_special_impl nsi) const @@ -232,7 +232,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EDistribution distribution, EStorage_Ordering ordering, row_access ra) const @@ -273,7 +273,7 @@ namespace Amesos2 { // TODO: There may be some more checking between the row map // compatibility, but things are working fine now. - RCP > rmap = get_mat->getRowMap(); + RCP rmap = get_mat->getRowMap(); ArrayView node_elements = rmap->getLocalElementList(); //if( node_elements.size() == 0 ) return; // no more contribution typename ArrayView::iterator row_it, row_end; @@ -387,7 +387,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EDistribution distribution, EStorage_Ordering ordering, no_special_impl nsi) const @@ -407,7 +407,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EDistribution distribution, EStorage_Ordering ordering, row_access ra) const @@ -508,7 +508,7 @@ namespace Amesos2 { template < class Matrix > Teuchos::RCP > - MatrixAdapter::get(const Teuchos::Ptr > map, EDistribution distribution) const + MatrixAdapter::get(const Teuchos::Ptr map, EDistribution distribution) const { return static_cast(this)->get_impl(map, distribution); } @@ -516,9 +516,9 @@ namespace Amesos2 { template < class Matrix > Teuchos::RCP > - MatrixAdapter::reindex() const + MatrixAdapter::reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const { - return static_cast(this)->reindex_impl(); + return static_cast(this)->reindex_impl(contigRowMap, contigColMap); } diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp index 5bac57193d64..9718c89e1025 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -83,11 +83,12 @@ namespace Amesos2 { typedef typename super_t::global_size_t global_size_t; typedef ConcreteMatrixAdapter type; + typedef Tpetra::Map map_t; ConcreteMatrixAdapter(RCP m); - RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - RCP > reindex_impl() const; + RCP > get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; + RCP > reindex_impl(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; //! Print a description of this adapter to the given output stream void diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 1e3dd2309def..bd0fa513be99 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -44,7 +44,7 @@ namespace Amesos2 { Teuchos::RCP > > ConcreteMatrixAdapter< Tpetra::CrsMatrix - >::get_impl(const Teuchos::Ptr > map, EDistribution distribution) const + >::get_impl(const Teuchos::Ptr map, EDistribution distribution) const { using Teuchos::RCP; using Teuchos::rcp; @@ -68,11 +68,11 @@ namespace Amesos2 { const size_t local_num_contiguous_entries = (myRank == 0) ? t_mat->getGlobalNumRows() : 0; //create maps - typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; - RCP contiguousRowMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousColMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousDomainMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousRangeMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + //typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; + RCP contiguousRowMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousColMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousDomainMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousRangeMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); RCP contiguous_t_mat = rcp( new matrix_t(contiguousRowMap, contiguousColMap, local_matrix) ); contiguous_t_mat->resumeFill(); @@ -93,52 +93,59 @@ namespace Amesos2 { Teuchos::RCP > > ConcreteMatrixAdapter< Tpetra::CrsMatrix - >::reindex_impl() const + >::reindex_impl(Teuchos::RCP &contigRowMap, + Teuchos::RCP &contigColMap) const { - typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; - typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; + typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; auto rowMap = this->mat_->getRowMap(); auto colMap = this->mat_->getColMap(); auto rowComm = rowMap->getComm(); auto colComm = colMap->getComm(); +#ifdef HAVE_AMESOS2_TIMERS + auto reindexTimer = Teuchos::TimeMonitor::getNewTimer("Time to re-index matrix gids"); + Teuchos::TimeMonitor ReindexTimer(*reindexTimer); +#endif + global_ordinal_t indexBase = rowMap->getIndexBase(); global_ordinal_t numDoFs = this->mat_->getGlobalNumRows(); local_ordinal_t nRows = this->mat_->getLocalNumRows(); local_ordinal_t nCols = colMap->getLocalNumElements(); - auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); - global_ordinal_t frow = tmpMap->getMinGlobalIndex(); - - // Create new GID list for RowMap - Kokkos::View rowIndexList ("indexList", nRows); - for (local_ordinal_t k = 0; k < nRows; k++) { - rowIndexList(k) = frow+k; + if (contigRowMap->getGlobalNumElements() != numDoFs || contigColMap->getGlobalNumElements() != numDoFs) { + auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); + global_ordinal_t frow = tmpMap->getMinGlobalIndex(); + + // Create new GID list for RowMap + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; + Kokkos::View rowIndexList ("indexList", nRows); + for (local_ordinal_t k = 0; k < nRows; k++) { + rowIndexList(k) = frow+k; + } + // Create new GID list for ColMap + Kokkos::View colIndexList ("indexList", nCols); + typedef Tpetra::MultiVector gid_mv_t; + Teuchos::ArrayView rowIndexArray(rowIndexList.data(), nRows); + Teuchos::ArrayView colIndexArray(colIndexList.data(), nCols); + gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1); + gid_mv_t col_mv (colMap, colIndexArray, nCols, 1); + typedef Tpetra::Import import_t; + RCP importer = rcp (new import_t (rowMap, colMap)); + col_mv.doImport (row_mv, *importer, Tpetra::INSERT); + { + auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly); + for(int i=0; i colIndexList ("indexList", nCols); - typedef Tpetra::MultiVector gid_mv_t; - Teuchos::ArrayView rowIndexArray(rowIndexList.data(), nRows); - Teuchos::ArrayView colIndexArray(colIndexList.data(), nCols); - gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1); - gid_mv_t col_mv (colMap, colIndexArray, nCols, 1); - typedef Tpetra::Import import_t; - RCP importer = rcp (new import_t (rowMap, colMap)); - col_mv.doImport (row_mv, *importer, Tpetra::INSERT); - { - auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly); - for(int i=0; i newRowMap = rcp (new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm)); - Teuchos::RCP newColMap = rcp (new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm)); - // Build Matrix with new Maps, auto lclMatrix = this->mat_->getLocalMatrixDevice(); - RCP contiguous_t_mat = rcp( new matrix_t(newRowMap, newColMap, lclMatrix)); + RCP contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix)); return rcp (new ConcreteMatrixAdapter (contiguous_t_mat)); } diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp index 62381e188743..a2313e10e1a1 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp @@ -146,7 +146,7 @@ namespace Amesos2 { // different (cf subclasses of Tpetra::CrsMatrix), this method // hands off implementation to the adapter for the subclass RCP get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - RCP reindex_impl() const; + RCP reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const; template void getSparseRowPtr_kokkos_view(KV & view) const { diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp index f74a3dd8753b..9b8a0ec16d68 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp @@ -334,13 +334,13 @@ namespace Amesos2 { RCP > AbstractConcreteMatrixAdapter< Tpetra::RowMatrix, DerivedMat - >::reindex_impl() const + >::reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const { #ifdef __CUDACC__ // NVCC doesn't seem to like the static_cast, even though it is valid - return dynamic_cast*>(this)->reindex_impl(map, distribution); + return dynamic_cast*>(this)->reindex_impl(contigRowMap, contigColMap); #else - return static_cast*>(this)->reindex_impl(map, distribution); + return static_cast*>(this)->reindex_impl(contigRowMap, contigColMap); #endif } From ba7433acc0f62011734a8a3d55d683575d1083fc Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 2 Oct 2024 07:29:48 -0600 Subject: [PATCH 28/50] Amesos2 : reuse importer & exporter for reindexing Signed-off-by: iyamazaki --- .../Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index bd0fa513be99..670c54719851 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -112,6 +112,7 @@ namespace Amesos2 { local_ordinal_t nRows = this->mat_->getLocalNumRows(); local_ordinal_t nCols = colMap->getLocalNumElements(); + RCP contiguous_t_mat; if (contigRowMap->getGlobalNumElements() != numDoFs || contigColMap->getGlobalNumElements() != numDoFs) { auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); global_ordinal_t frow = tmpMap->getMinGlobalIndex(); @@ -142,11 +143,17 @@ namespace Amesos2 { // Create new Row & Col Maps contigRowMap = rcp (new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm)); contigColMap = rcp (new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm)); - } - // Build Matrix with new Maps, - auto lclMatrix = this->mat_->getLocalMatrixDevice(); - RCP contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix)); + // Create contiguous Matrix + auto lclMatrix = this->mat_->getLocalMatrixDevice(); + contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix)); + } else { + // Build Matrix with contiguous Maps + auto lclMatrix = this->mat_->getLocalMatrixDevice(); + auto importer = this->mat_->getCrsGraph()->getImporter(); + auto exporter = this->mat_->getCrsGraph()->getExporter(); + contiguous_t_mat = rcp( new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter)); + } return rcp (new ConcreteMatrixAdapter (contiguous_t_mat)); } From c3e954ea64885ab76483cded1b35bf95f775b6ba Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 3 Dec 2024 22:18:06 -0700 Subject: [PATCH 29/50] Amesos2 : reindexing with Epetra (and extend testing with gapped indexes) Signed-off-by: iyamazaki --- .../amesos2/example/GappedMtxGIDs-1proc.cpp | 20 +++++--- .../amesos2/example/quick_solve_epetra.cpp | 50 ++++++++++++++++--- ...os2_EpetraCrsMatrix_MatrixAdapter_decl.hpp | 17 ++++++- ...sos2_EpetraCrsMatrix_MatrixAdapter_def.hpp | 37 ++++++++++++++ 4 files changed, 110 insertions(+), 14 deletions(-) diff --git a/packages/amesos2/example/GappedMtxGIDs-1proc.cpp b/packages/amesos2/example/GappedMtxGIDs-1proc.cpp index 4493ee0558b0..49238e16d120 100644 --- a/packages/amesos2/example/GappedMtxGIDs-1proc.cpp +++ b/packages/amesos2/example/GappedMtxGIDs-1proc.cpp @@ -160,8 +160,10 @@ int main(int argc, char *argv[]) { "does not result in the same Map."); } - if ( myRank == 0 && verbose ) { - *fos << "\nrowMap->describe output:" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "\nrowMap->describe output:" << endl; + } rowMap->describe(*fos, Teuchos::VERB_EXTREME); } @@ -186,16 +188,20 @@ int main(int argc, char *argv[]) { A = readCrsMatrixFromFile (mtx_name, fos, rowMap, domainMap, rangeMap, convert_mtx_to_zero_base, num_header_lines); } - if ( myRank == 0 && verbose ) { - *fos << "A->describe" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "A->describe" << endl; + } A->describe(*fos, Teuchos::VERB_EXTREME); } RCP RHS; RHS = Tpetra::MatrixMarket::Reader::readDenseFile (rhs_name, comm, rangeMap); - if ( myRank == 0 && verbose ) { - *fos << "RHS->describe" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "RHS->describe" << endl; + } RHS->describe(*fos, Teuchos::VERB_EXTREME); } @@ -410,6 +416,8 @@ readCrsMatrixFromFile (const std::string& matrixFilename, for (typename Teuchos::Array::size_type i=0; iinsertGlobalValues (gblRowInds[i], gblColInds(i,1), vals(i,1)); } + } else { + A = Teuchos::rcp(new MAT(rowMap, 0)); } A->fillComplete (domainMap, rangeMap); diff --git a/packages/amesos2/example/quick_solve_epetra.cpp b/packages/amesos2/example/quick_solve_epetra.cpp index 6367330d1106..ffe95f2a1a1a 100644 --- a/packages/amesos2/example/quick_solve_epetra.cpp +++ b/packages/amesos2/example/quick_solve_epetra.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -38,18 +39,24 @@ #include "Amesos2.hpp" #include "Amesos2_Version.hpp" +#include "Amesos2_Util.hpp" int main(int argc, char *argv[]) { Teuchos::GlobalMPISession mpiSession(&argc,&argv); typedef Epetra_CrsMatrix MAT; typedef Epetra_MultiVector MV; + typedef Tpetra::CrsMatrix<> TpetraMAT; + typedef Tpetra::Map<>::local_ordinal_type LO; + typedef Tpetra::Map<>::global_ordinal_type GO; + typedef Tpetra::Map<>::node_type NO; using Tpetra::global_size_t; using Teuchos::tuple; using Teuchos::RCP; using Teuchos::rcp; + #ifdef HAVE_MPI const Epetra_MpiComm comm (MPI_COMM_WORLD); #else @@ -68,15 +75,21 @@ int main(int argc, char *argv[]) { std::string solver_name = "SuperLU"; std::string filedir = "../test/matrices/"; std::string filename = "arc130.mtx"; + std::string map_filename = ""; + bool make_contiguous = false; Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("filedir",&filedir,"Directory where matrix-market files are located"); cmdp.setOption("filename",&filename,"Filename for Matrix-Market test matrix."); + cmdp.setOption("map_filename",&map_filename,"Filename for rowMap of test matrix."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); cmdp.setOption("solver", &solver_name, "Which TPL solver library to use."); + cmdp.setOption("makeContiguous","isContiguous",&make_contiguous, "Set this option to makeContiguous if matrix has gapped row ids"); + if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { + std::cerr << solver_name << " failed to process command-line args. Exiting..." << std::endl; return -1; } @@ -92,10 +105,20 @@ int main(int argc, char *argv[]) { std::string mat_pathname = filedir + filename; MAT* A; - int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), comm, A, false, false); - if( ret == -1 ){ - *fos << "error reading matrix file from disk, aborting..." << std::endl; - return EXIT_FAILURE; + if (map_filename != "") { + auto rowTMap = Tpetra::MatrixMarket::Reader< TpetraMAT >::readMapFile(map_filename, Tpetra::getDefaultComm()); + auto rowEMap = Amesos2::Util::tpetra_map_to_epetra_map(*(rowTMap.getRawPtr())); + int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), *rowEMap, A, false, verbose); + if( ret == -1 ){ + *fos << "error reading matrix file (" << mat_pathname << ") with map (" << map_filename << ") from disk, aborting..." << std::endl; + return EXIT_FAILURE; + } + } else { + int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), comm, A, false, verbose); + if( ret == -1 ){ + *fos << "error reading matrix file from disk, aborting..." << std::endl; + return EXIT_FAILURE; + } } if( printMatrix ){ @@ -122,16 +145,29 @@ int main(int argc, char *argv[]) { return 0; } + Teuchos::ParameterList amesos2_params("Amesos2"); + if ( make_contiguous ) { + if( myRank == 0 ) { *fos << " set IsContigous==false in solver parameter list" << endl; } + amesos2_params.sublist(solver->name()).set("IsContiguous", false, "Are GIDs Contiguous"); + } #ifdef HAVE_AMESOS2_SHYLU_NODEBASKER - if( Amesos2::query("shylubasker") ) { - Teuchos::ParameterList amesos2_params("Amesos2"); + if( Amesos2::query("shylubasker") && solver->name() == "ShyLUBasker") { amesos2_params.sublist(solver_name).set("num_threads", 1, "Number of threads"); - solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); } #endif + solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); solver->solve(); + { + double nrmR, nrmB; + RCP R = rcp(new MV(rngmap,numVectors)); + A->Apply(*X, *R); + R->Update(1.0, *B, -1.0); + R->Norm2(&nrmR); + B->Norm2(&nrmB); + if( myRank == 0 ) { *fos << std::endl << nrmR << " / " << nrmB << " = " << nrmR/nrmB << std::endl << std::endl; } + } if( printSolution ){ // Print the solution X->Print(*(fos->getOStream())); diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp index 0f376c01bb41..c75d3c42c035 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -24,6 +24,9 @@ #include "Amesos2_config.h" #include +#ifdef HAVE_AMESOS2_EPETRAEXT +#include +#endif #include "Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp" #include "Amesos2_MatrixAdapter_decl.hpp" @@ -69,7 +72,19 @@ namespace Amesos2 { ConcreteMatrixAdapter(RCP m); RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - + RCP > reindex_impl(Teuchos::RCP> &contigRowMap, + Teuchos::RCP> &contigColMap) const; + + //! Print a description of this adapter to the given output stream + void + describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; +#ifdef HAVE_AMESOS2_EPETRAEXT + private: + mutable RCP StdIndex_; + mutable RCP ContigMat_; +#endif }; } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp index d29a00991eb3..0e07e7abc81c 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp @@ -46,6 +46,43 @@ namespace Amesos2 { return( rcp(new ConcreteMatrixAdapter(t_mat)) ); } + Teuchos::RCP > + ConcreteMatrixAdapter::reindex_impl(Teuchos::RCP > &contigRowMap, + Teuchos::RCP > &contigColMap) const + { + #if defined(HAVE_AMESOS2_EPETRAEXT) + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcpFromRef; + auto CrsMatrix = const_cast(this->mat_.getRawPtr()); + if(!CrsMatrix) { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_EpetraCrsMatrix_MatrixAdapter requires CsrMatrix to reindex matrices."); + } + + // Map + RCP OriginalMap = rcpFromRef(CrsMatrix->RowMap()); + int NumGlobalElements = OriginalMap->NumGlobalElements(); + int NumMyElements = OriginalMap->NumMyElements(); + auto ReindexMap = rcp( new Epetra_Map( NumGlobalElements, NumMyElements, 0, OriginalMap->Comm() ) ); + + // Matrix + StdIndex_ = rcp( new EpetraExt::CrsMatrix_Reindex( *ReindexMap ) ); + ContigMat_ = rcpFromRef((*StdIndex_)( *CrsMatrix )); + if(!ContigMat_) { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_EpetraCrsMatrix_MatrixAdapter reindexing failed."); + } + return rcp(new ConcreteMatrixAdapter(ContigMat_)); + #else + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "ConcreteMatrixAdapter requires EpetraExt to reindex matrices."); + #endif + } + + void + ConcreteMatrixAdapter::describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel) const + { + this->mat_->Print(*(os.getOStream())); + } } // end namespace Amesos2 #endif // AMESOS2_EPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP From 353e695cd0cf4405b489570be22e9d2f237d9e11 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 4 Dec 2024 08:33:48 -0700 Subject: [PATCH 30/50] Amesos2 : a compile error Signed-off-by: iyamazaki --- packages/amesos2/example/quick_solve_epetra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/amesos2/example/quick_solve_epetra.cpp b/packages/amesos2/example/quick_solve_epetra.cpp index ffe95f2a1a1a..dfb3c4301b79 100644 --- a/packages/amesos2/example/quick_solve_epetra.cpp +++ b/packages/amesos2/example/quick_solve_epetra.cpp @@ -147,7 +147,7 @@ int main(int argc, char *argv[]) { Teuchos::ParameterList amesos2_params("Amesos2"); if ( make_contiguous ) { - if( myRank == 0 ) { *fos << " set IsContigous==false in solver parameter list" << endl; } + if( myRank == 0 ) { *fos << " set IsContigous==false in solver parameter list" << std::endl; } amesos2_params.sublist(solver->name()).set("IsContiguous", false, "Are GIDs Contiguous"); } #ifdef HAVE_AMESOS2_SHYLU_NODEBASKER From a6ee4029fc560a2385f7f3408b5bd99b47f40695 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Wed, 4 Dec 2024 17:16:46 -0600 Subject: [PATCH 31/50] Explicitly define default case for "store_true" argparse Argparse "store_true" action implicitly defaults to false. Explicitly define false as the default case and update the help message. Signed-off-by: Anderson Chauphan --- packages/framework/pr_tools/PullRequestLinuxDriverTest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py index 7fa3724dfdc1..40f63e275df1 100755 --- a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py @@ -199,9 +199,10 @@ def parse_args(): optional.add_argument('--skip-create-packageenables', dest="skip_create_packageenables", action="store_true", + default=False, help="Skip the creation of the packageEnables.cmake fragment file generated by " + \ "the TriBITS infrastructure indicating which packages are to be enabled based on file " + \ - "changes between a source and target branch. Default=") + "changes between a source and target branch. Default=False") desc_subprojects_file = "The subprojects_file is used by the testing infrastructure. This parameter " + \ "allows the default, generated file, to be overridden. Generally this should " + \ From 0e996a775c866230ac5c4e1edc21d96398386378 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Wed, 4 Dec 2024 17:17:16 -0600 Subject: [PATCH 32/50] Add creation of empty packageEnables and subprojects Add the creation of empty packageEnables.cmake and package_subproject_list.cmake files when `--skip-create-packageenables` argument is passed. This is due to SimpleTesting CTest drivers requiring these files to exist. packageEnables.cmake and package_subproject_list.cmake are created by get-changed-trilinos-packages.sh script which we do not always want to generate for cases such as Framework test line, where we do not want to spend extra compute resources building packages from changed files. We let the other PR builds handle that. Signed-off-by: Anderson Chauphan --- .../trilinosprhelpers/TrilinosPRConfigurationStandard.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py index 401824ea8b6d..69f1ad653523 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py @@ -54,6 +54,15 @@ def execute_test(self): if not self.args.dry_run: gc.write_cmake_fragment() + if self.arg_skip_create_packageenables: + print("Optional --skip_create_packageenables found. " + + "Creating dummy packageEnables.cmake and package_subproject_list.cmake " + + "for CTest drivers.") + with open(self.arg_filename_packageenables, 'w'): + pass + with open(self.arg_filename_subprojects, 'w'): + pass + # Execute the call to ctest. verbosity_flag = "-VV" if "BUILD_NUMBER" in os.environ: From 76b9512123ef8fa7e8d3c5b8ad9f1b3d95d19eb5 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 5 Dec 2024 12:31:58 -0700 Subject: [PATCH 33/50] Amesos2 : remove parameters, not used any longer Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_CssMKL_def.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index 8bd402816e6e..b0191f7c7921 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -439,9 +439,6 @@ CssMKL::getValidParameters_impl() const pl->set("IPARM(28)", as(iparm_temp[27]), "Check input matrix is sorted", anyNumberParameterEntryValidator(preferred_int, accept_int)); - pl->set("useZoltan2", false, "Use Zoltan2 for re-distribution"); - pl->set("useParMETIS", false, "Use ParMETIS for re-distribution"); - pl->set("IsContiguous", true, "Whether GIDs contiguous"); pl->set("verbose", 0, "Verbosity Message Level"); From 39244f170b89f21ab9586f780ecea7623e52c81a Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 5 Dec 2024 12:50:18 -0700 Subject: [PATCH 34/50] Amesos2 : update comment Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index 7d8d92c48328..14fa5dd3305a 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -177,7 +177,7 @@ namespace Amesos2 { MatrixAdapter::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { - // TODO : Make sure to implement in all specialization (Only in Tpetra::CrsMatrix) + // (implemented for Epetra::CrsMatrix & Tpetra::CrsMatrix) return static_cast(this)->describe(out, verbLevel); } From cd9ef979bbb18d1375b9f2b58e8bbbad28078c96 Mon Sep 17 00:00:00 2001 From: Anderson Date: Thu, 5 Dec 2024 14:00:37 -0700 Subject: [PATCH 35/50] Fix skip_create_package_enables test Using patch for the desired member function on the version of anaconda used on our AT1 Framework build lined did not work as it could not find the `create_package_enables_file` attribute in `TrilinosPRConfigurationBase`. Loading an older sems-anaconda version allowed the behavior and found the `trilinosprhelpers.TrilinosPRConfigurationBase.create_package_enables_file` attribute. In order to keep using our current version of aue/anaconda, use the implemented work around for mocking the member function. Signed-off-by: Anderson --- .../unittests/test_TrilinosPRConfigurationBase.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py index da3d44a382da..ffce49219b97 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py @@ -707,11 +707,10 @@ def test_TrilinosPRConfigurationBase_prepare_test_skip_create_package_enables_fi args.skip_create_packageenables = True pr_config = trilinosprhelpers.TrilinosPRConfigurationBase(args) - with patch('trilinosprhelpers.TrilinosPRConfigurationBase.create_package_enables_file') as m_call: - pr_config.prepare_test() + trilinosprhelpers.TrilinosPRConfigurationBase.create_package_enables_file = Mock() + pr_config.prepare_test() - expected_call_count = 0 - self.assertEqual(m_call.call_count, expected_call_count) + pr_config.create_package_enables_file.assert_not_called() def test_TrilinosPRConfigurationBase_prepare_test_FAIL(self): From 49e08eb04dd302f464500485bbab48de6bbeefba Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Thu, 5 Dec 2024 16:54:31 -0600 Subject: [PATCH 36/50] Re-scope mocked method Mocked a method at too large of scope and it affected other tests. Signed-off-by: Anderson Chauphan --- .../pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py | 2 +- .../unittests/test_TrilinosPRConfigurationBase.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py index 02b3a9f22fe6..616b23861268 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py @@ -630,7 +630,6 @@ def create_package_enables_file(self, dryrun=False): job_name = self.arg_pr_jenkins_job_name enable_map_entry = self.get_multi_property_from_config("ENABLE_MAP", job_name, delimeter=" ") - # Generate files using ATDM/TriBiTS Scripts if enable_map_entry is None: cmd = [os.path.join( self.arg_workspace_dir, @@ -741,6 +740,7 @@ def prepare_test(self): self.message("--- arg_ctest_driver = {}".format(self.arg_ctest_driver)) self.message("--- arg_ctest_drop_site = {}".format(self.arg_ctest_drop_site)) self.message("--- arg_ccache_enable = {}".format(self.arg_ccache_enable)) + self.message("--- arg_skip_create_packageenables = {}".format(self.arg_skip_create_packageenables)) self.message("") self.message("--- concurrency_build = {}".format(self.concurrency_build)) self.message("--- concurrency_test = {}".format(self.concurrency_test)) diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py index ffce49219b97..b5673ff8901d 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py @@ -707,7 +707,7 @@ def test_TrilinosPRConfigurationBase_prepare_test_skip_create_package_enables_fi args.skip_create_packageenables = True pr_config = trilinosprhelpers.TrilinosPRConfigurationBase(args) - trilinosprhelpers.TrilinosPRConfigurationBase.create_package_enables_file = Mock() + pr_config.create_package_enables_file = Mock() pr_config.prepare_test() pr_config.create_package_enables_file.assert_not_called() From 3654747763b2b935a0781c8cd80a8be9c6523efb Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Mon, 9 Dec 2024 08:55:20 -0700 Subject: [PATCH 37/50] Panzer: fix for changes in kokkos 4.5 for MI300A Signed-off-by: Roger Pawlowski --- .../disc-fe/src/Panzer_BasisValues2_impl.hpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp index b036a7f7fba2..bbcea92f0315 100644 --- a/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp +++ b/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp @@ -1120,7 +1120,7 @@ getBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1174,7 +1174,7 @@ getBasisValues(const bool weighted, } else if(element_space == PureBasis::HGRAD || element_space == PureBasis::CONST) { fst::HGRADtransformVALUE(s_aux,s_ref); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1292,7 +1292,7 @@ getVectorBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1352,7 +1352,7 @@ getVectorBasisValues(const bool weighted, auto s_jac_det = Kokkos::subview(cubature_jacobian_determinant_.get_view(), cell_range, Kokkos::ALL()); fst::HDIVtransformVALUE(s_aux,s_jac, s_jac_det, s_ref); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1456,7 +1456,7 @@ getGradBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1506,7 +1506,7 @@ getGradBasisValues(const bool weighted, // Apply transformation using fst=Intrepid2::FunctionSpaceTools; fst::HGRADtransformGRAD(s_aux, s_jac_inv, s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1611,7 +1611,7 @@ getCurl2DVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1665,7 +1665,7 @@ getCurl2DVectorBasis(const bool weighted, // the divergence space in 2D! using fst=Intrepid2::FunctionSpaceTools; fst::HDIVtransformDIV(s_aux,s_jac_det,s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1767,7 +1767,7 @@ getCurlVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1817,7 +1817,7 @@ getCurlVectorBasis(const bool weighted, using fst=Intrepid2::FunctionSpaceTools; fst::HCURLtransformCURL(s_aux, s_jac, s_jac_det, s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1917,7 +1917,7 @@ getDivVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1965,7 +1965,7 @@ getDivVectorBasis(const bool weighted, using fst=Intrepid2::FunctionSpaceTools; fst::HDIVtransformDIV(s_aux,s_jac_det,s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); From ec2a0d554e8a4e8671bc1bfcb9bb4a2ae6d6ccf7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 22:41:56 +0000 Subject: [PATCH 38/50] Bump github/codeql-action from 3.27.5 to 3.27.6 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.5 to 3.27.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f09c1c0a94de965c15400f5634aa42fac8fb8f88...aa578102511db1f4524ed59b8cc2bae4f6e88195) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 19c03bf2714f..5a5e701def17 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -108,6 +108,6 @@ jobs: ninja -j 16 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 1cbaf2b3c6e4..88c2a1fcf484 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: sarif_file: results.sarif From 9bb65b7b76260d6dd48145d45d0447490ac85f27 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Mon, 9 Dec 2024 14:23:06 -0700 Subject: [PATCH 39/50] Amesos2 : comments on the new features Signed-off-by: iyamazaki --- packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp | 3 +++ .../amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp index 625498165410..812e6ac7321e 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp @@ -216,6 +216,9 @@ namespace Amesos2 { } Teuchos::RCP get(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; + + /// Reindex the GIDs such that they are contiguous without gaps (0, .., n-1) + /// This is called in loadA for the matrix with (DISTRIBUTED_NO_OVERLAP && !is_contiguous_) Teuchos::RCP reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; /// Returns a short description of this Solver diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 670c54719851..9bf58d9d82df 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -113,6 +113,8 @@ namespace Amesos2 { local_ordinal_t nCols = colMap->getLocalNumElements(); RCP contiguous_t_mat; + // if-checks when to recompute contigRowMap & contigColMap + // TODO: this is currentlly based on the global matrix dimesions if (contigRowMap->getGlobalNumElements() != numDoFs || contigColMap->getGlobalNumElements() != numDoFs) { auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); global_ordinal_t frow = tmpMap->getMinGlobalIndex(); From e999586547da956f8d8e0efae9e41a44f56c37c3 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 10 Dec 2024 07:53:05 -0700 Subject: [PATCH 40/50] MueLu CoalesceDrop: Error out if dropTol > 1.0 Signed-off-by: Christian Glusa --- .../MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index 11aa186788b3..d848a823743c 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -348,6 +348,10 @@ void CoalesceDropFactory::Build(Level } else GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos))) + TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0"); + Set(currentLevel, "Filtering", (threshold != STS::zero())); const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); From 468ceda14effae33f4a58c33722a78f84a1c6aaf Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 10 Dec 2024 08:08:40 -0700 Subject: [PATCH 41/50] MueLu: Change gen_UseShortNames.sh to generate source files with headers Signed-off-by: Christian Glusa --- packages/muelu/src/Headers/LO-GO-NO.tmpl | 9 -------- packages/muelu/src/Headers/Non-Templated.tmpl | 9 -------- packages/muelu/src/Headers/SC-LO-GO-NO.tmpl | 9 -------- .../muelu/src/Headers/gen_UseShortNames.sh | 23 ++++++++++++++++--- 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/packages/muelu/src/Headers/LO-GO-NO.tmpl b/packages/muelu/src/Headers/LO-GO-NO.tmpl index a02cc88af6fe..279e44678a32 100644 --- a/packages/muelu/src/Headers/LO-GO-NO.tmpl +++ b/packages/muelu/src/Headers/LO-GO-NO.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/Non-Templated.tmpl b/packages/muelu/src/Headers/Non-Templated.tmpl index 6b7294ffa2df..c0993b86a4d5 100644 --- a/packages/muelu/src/Headers/Non-Templated.tmpl +++ b/packages/muelu/src/Headers/Non-Templated.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl b/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl index 2004b62dfbfa..86161dee5add 100644 --- a/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl +++ b/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/gen_UseShortNames.sh b/packages/muelu/src/Headers/gen_UseShortNames.sh index 49a723c4a424..7bbe12dc5e1e 100755 --- a/packages/muelu/src/Headers/gen_UseShortNames.sh +++ b/packages/muelu/src/Headers/gen_UseShortNames.sh @@ -6,7 +6,16 @@ classListDir=../Utils/ClassList/ -echo "// Type definitions for templated classes (generally graph-related) that do not require a scalar." > MueLu_UseShortNamesOrdinal.hpp +echo "// @HEADER" > MueLu_UseShortNamesOrdinal.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesOrdinal.hpp +echo "// MueLu: A package for multigrid based preconditioning" >> MueLu_UseShortNamesOrdinal.hpp +echo "//" >> MueLu_UseShortNamesOrdinal.hpp +echo "// Copyright 2012 NTESS and the MueLu contributors." >> MueLu_UseShortNamesOrdinal.hpp +echo "// SPDX-License-Identifier: BSD-3-Clause" >> MueLu_UseShortNamesOrdinal.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesOrdinal.hpp +echo "// @HEADER" >> MueLu_UseShortNamesOrdinal.hpp +echo "" >> MueLu_UseShortNamesOrdinal.hpp +echo "// Type definitions for templated classes (generally graph-related) that do not require a scalar." >> MueLu_UseShortNamesOrdinal.hpp echo >> MueLu_UseShortNamesOrdinal.hpp echo "#include " >> MueLu_UseShortNamesOrdinal.hpp echo >> MueLu_UseShortNamesOrdinal.hpp @@ -27,8 +36,16 @@ done # # Scalar # - -echo "// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, Node of the current context." > MueLu_UseShortNamesScalar.hpp +echo "// @HEADER" > MueLu_UseShortNamesScalar.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesScalar.hpp +echo "// MueLu: A package for multigrid based preconditioning" >> MueLu_UseShortNamesScalar.hpp +echo "//" >> MueLu_UseShortNamesScalar.hpp +echo "// Copyright 2012 NTESS and the MueLu contributors." >> MueLu_UseShortNamesScalar.hpp +echo "// SPDX-License-Identifier: BSD-3-Clause" >> MueLu_UseShortNamesScalar.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesScalar.hpp +echo "// @HEADER" >> MueLu_UseShortNamesScalar.hpp +echo "" >> MueLu_UseShortNamesScalar.hpp +echo "// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, Node of the current context." >> MueLu_UseShortNamesScalar.hpp echo >> MueLu_UseShortNamesScalar.hpp echo "#include " >> MueLu_UseShortNamesScalar.hpp echo >> MueLu_UseShortNamesScalar.hpp From 9a371c36e13b0db6af10ee6b4f3430ca3e22e99c Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 10 Dec 2024 08:13:49 -0700 Subject: [PATCH 42/50] MueLu: Remove deprecated MLParameterListInterpreter Signed-off-by: Christian Glusa --- .../src/Headers/MueLu_UseShortNamesScalar.hpp | 3 - .../MueLu_MLParameterListInterpreter_decl.hpp | 202 ----- .../MueLu_MLParameterListInterpreter_def.hpp | 728 ------------------ .../src/Utils/ClassList/SC-LO-GO-NO.classList | 1 - .../ETI_SC_LO_GO_NO_classes.cmake | 1 - .../MueLu_MLParameterListIntepreter_fwd.hpp | 27 - .../MueLu_MLParameterListInterpreter_fwd.hpp | 27 - 7 files changed, 989 deletions(-) delete mode 100644 packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp delete mode 100644 packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp delete mode 100644 packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp delete mode 100644 packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index 6418213b359c..b094106158d5 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -209,9 +209,6 @@ using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFa #ifdef MUELU_MERGEDSMOOTHER_SHORT using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; #endif -#ifdef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -using MLParameterListInterpreter [[maybe_unused]] = MueLu::MLParameterListInterpreter; -#endif #ifdef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; #endif diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp deleted file mode 100644 index 8b22c8fc7f90..000000000000 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ /dev/null @@ -1,202 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP - -#include - -#include -#include -#include - -#include "MueLu_ConfigDefs.hpp" -#include "MueLu_HierarchyManager.hpp" -#include "MueLu_MLParameterListInterpreter_fwd.hpp" - -#include "MueLu_Hierarchy_fwd.hpp" -#include "MueLu_SmootherFactory_fwd.hpp" - -#include "MueLu_TentativePFactory_fwd.hpp" -#include "MueLu_SaPFactory_fwd.hpp" -#include "MueLu_PgPFactory_fwd.hpp" -#include "MueLu_AmalgamationFactory_fwd.hpp" -#include "MueLu_TransPFactory_fwd.hpp" -#include "MueLu_GenericRFactory_fwd.hpp" -#include "MueLu_SmootherPrototype_fwd.hpp" -#include "MueLu_TrilinosSmoother_fwd.hpp" -#include "MueLu_IfpackSmoother_fwd.hpp" -#include "MueLu_DirectSolver_fwd.hpp" -#include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_CoalesceDropFactory_fwd.hpp" -#include "MueLu_UncoupledAggregationFactory_fwd.hpp" -#include "MueLu_NullspaceFactory_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) -#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" -#include "MueLu_RepartitionFactory_fwd.hpp" -#include "MueLu_RebalanceTransferFactory_fwd.hpp" -#include "MueLu_IsorropiaInterface_fwd.hpp" -#include "MueLu_RebalanceAcFactory_fwd.hpp" -#include "MueLu_RebalanceMapFactory_fwd.hpp" -#endif - -#ifdef HAVE_MUELU_DEPRECATED_CODE -#ifdef MueLu_SHOW_DEPRECATED_WARNINGS -#warning "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" -#endif -#else -#error "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" -#endif - -namespace MueLu { - -/* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 -*/ -// This function is a copy of ML_CreateSublists to avoid dependency on ML -// Throw exception on error instead of exit() -void CreateSublists(const ParameterList& List, ParameterList& newList); - -/*! - @class MLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - - The parameter list is validated only if the package ML is available and parameter "ML validate parameter list" is true. - TODO: A warning is issued if ML is not available -*/ - -template -class MLParameterListInterpreter : public HierarchyManager { -#undef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#include "MueLu_UseShortNames.hpp" - - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - MLParameterListInterpreter() - : nullspace_(NULL) - , blksize_(1) {} - - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); - - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - - //! Destructor. - virtual ~MLParameterListInterpreter() = default; - - //@} - - //@{ - - void SetParameterList(const Teuchos::ParameterList& paramList); - - //@} - - //@{ - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy& H) const; - - //@} - - //@{ - - //! @name static helper functions translating parameter list to factories - //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects - //@{ - - //! Read smoother options and build the corresponding smoother factory - // @param AFact: Factory used by smoother to find 'A' - static RCP GetSmootherFactory(const Teuchos::ParameterList& paramList, const RCP& AFact = Teuchos::null); - - //@} - - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP& factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} - - private: - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; // TODO: replace by Teuchos::ArrayRCP<> - - //! coordinates can be embedded in the ML parameter list - double* xcoord_; - double* ycoord_; - double* zcoord_; - - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; - - //@{ Matrix configuration - - //! Setup Operator object - virtual void SetupOperator(Operator& Op) const; - - //! Matrix configuration storage - int blksize_; - - //@} - -}; // class MLParameterListInterpreter - -} // namespace MueLu - -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif /* MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp deleted file mode 100644 index 09eb4bc28e60..000000000000 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp +++ /dev/null @@ -1,728 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP - -#include - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_ML) -#include -#endif - -#include -#include -#include -#include -#include - -#include "MueLu_MLParameterListInterpreter_decl.hpp" - -#include "MueLu_Level.hpp" -#include "MueLu_Hierarchy.hpp" -#include "MueLu_FactoryManager.hpp" - -#include "MueLu_TentativePFactory.hpp" -#include "MueLu_SaPFactory.hpp" -#include "MueLu_PgPFactory.hpp" -#include "MueLu_AmalgamationFactory.hpp" -#include "MueLu_TransPFactory.hpp" -#include "MueLu_GenericRFactory.hpp" -#include "MueLu_SmootherPrototype.hpp" -#include "MueLu_SmootherFactory.hpp" -#include "MueLu_TrilinosSmoother.hpp" -#include "MueLu_IfpackSmoother.hpp" -#include "MueLu_DirectSolver.hpp" -#include "MueLu_HierarchyUtils.hpp" -#include "MueLu_RAPFactory.hpp" -#include "MueLu_CoalesceDropFactory.hpp" -#include "MueLu_UncoupledAggregationFactory.hpp" -#include "MueLu_NullspaceFactory.hpp" -#include "MueLu_ParameterListUtils.hpp" - -#include "MueLu_CoalesceDropFactory_kokkos.hpp" -// #include "MueLu_CoordinatesTransferFactory_kokkos.hpp" -#include "MueLu_TentativePFactory_kokkos.hpp" - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) -#include "MueLu_IsorropiaInterface.hpp" -#include "MueLu_RepartitionHeuristicFactory.hpp" -#include "MueLu_RepartitionFactory.hpp" -#include "MueLu_RebalanceTransferFactory.hpp" -#include "MueLu_RepartitionInterface.hpp" -#include "MueLu_RebalanceAcFactory.hpp" -//#include "MueLu_RebalanceMapFactory.hpp" -#endif - -// Note: do not add options that are only recognized by MueLu. - -// TODO: this parameter list interpreter should force MueLu to use default ML parameters -// - Ex: smoother sweep=2 by default for ML - -// Read a parameter value from a parameter list and store it into a variable named 'varName' -#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; \ - if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); - -// Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) -#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else \ - outParamList.set(outParamStr, static_cast(defaultValue)); - -namespace MueLu { - -template -MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm, std::vector > factoryList) - : nullspace_(NULL) - , xcoord_(NULL) - , ycoord_(NULL) - , zcoord_(NULL) - , TransferFacts_(factoryList) - , blksize_(1) { - if (paramList.isParameter("xml parameter file")) { - std::string filename = paramList.get("xml parameter file", ""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - Teuchos::ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); - paramList2.remove("xml parameter file"); - SetParameterList(paramList2); - } else - SetParameterList(paramList); - } else - SetParameterList(paramList); -} - -template -MLParameterListInterpreter::MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) - : nullspace_(NULL) - , TransferFacts_(factoryList) - , blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); -} - -template -void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); - - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); - // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); - MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML - MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M - MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); - MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); - - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation - - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); - - MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML - - MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); - MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); - MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); - - // - // Move smoothers/aggregation/coarse parameters to sublists - // - - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // pull out "use kokkos refactor" - bool setKokkosRefactor = false; - bool useKokkosRefactor = !Node::is_serial; - if (paramList.isType("use kokkos refactor")) { - useKokkosRefactor = paramList.get("use kokkos refactor"); - setKokkosRefactor = true; - paramList.remove("use kokkos refactor"); - } - - // - // Validate parameter list - // - - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate) { -#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); -#else - // If no validator available: issue a warning and set parameter value to false in the output list - this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); - -#endif // HAVE_MUELU_ML - } // if(validate) - } // scope - - // Matrix option - blksize_ = nDofsPerNode; - - // Translate verbosity parameter - - // Translate verbosity parameter - MsgType eVerbLevel = None; - if (verbosityLevel == 0) eVerbLevel = None; - if (verbosityLevel >= 1) eVerbLevel = Low; - if (verbosityLevel >= 5) eVerbLevel = Medium; - if (verbosityLevel >= 10) eVerbLevel = High; - if (verbosityLevel >= 11) eVerbLevel = Extreme; - if (verbosityLevel >= 42) eVerbLevel = Test; - if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; - this->verbosity_ = eVerbLevel; - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - RCP dropFact; - if (useKokkosRefactor) - dropFact = rcp(new CoalesceDropFactory_kokkos()); - else - dropFact = rcp(new CoalesceDropFactory()); - - if (agg_use_aux) { - dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); - dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(agg_aux_thresh)); - } - - // Uncoupled aggregation - RCP AggFact = Teuchos::null; - AggFact = rcp(new UncoupledAggregationFactory()); - - AggFact->SetFactory("Graph", dropFact); - AggFact->SetFactory("DofsPerNode", dropFact); - AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); - AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); - AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); - AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); - - if (verbosityLevel > 3) { - std::ostringstream oss; - oss << "========================= Aggregate option summary  =========================" << std::endl; - oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; - oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - oss << "aggregate ordering :                    natural" << std::endl; - oss << "=============================================================================" << std::endl; - this->GetOStream(Runtime1) << oss.str(); - } - - RCP PFact; - RCP RFact; - RCP PtentFact; - if (useKokkosRefactor) - PtentFact = rcp(new TentativePFactory_kokkos()); - else - PtentFact = rcp(new TentativePFactory()); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp(new TransPFactory()); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact = rcp(new SaPFactory()); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp(new TransPFactory()); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp(new PgPFactory()); - RFact = rcp(new GenericRFactory()); - } - - RCP AcFact = rcp(new RAPFactory()); - AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); - for (size_t i = 0; i < TransferFacts_.size(); i++) { - AcFact->AddTransferFactory(TransferFacts_[i]); - } - - // - // introduce rebalancing - // -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - Teuchos::RCP RebalancedPFact = Teuchos::null; - Teuchos::RCP RebalancedRFact = Teuchos::null; - Teuchos::RCP RepartitionFact = Teuchos::null; - Teuchos::RCP RebalancedAFact = Teuchos::null; - - MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); - if (bDoRepartition == 1) { - // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. - // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. - RFact->SetFactory("P", PFact); - // - AcFact->SetFactory("P", PFact); - AcFact->SetFactory("R", RFact); - - // define rebalancing factory for coarse matrix - Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); - rebAmalgFact->SetFactory("A", AcFact); - - MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); - MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); - - // Repartitioning heuristic - RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); - { - Teuchos::ParameterList paramListRepFact; - paramListRepFact.set("repartition: min rows per proc", minperproc); - paramListRepFact.set("repartition: max imbalance", maxminratio); - RepartitionHeuristicFact->SetParameterList(paramListRepFact); - } - RepartitionHeuristicFact->SetFactory("A", AcFact); - - // create "Partition" - Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); - isoInterface->SetFactory("A", AcFact); - isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); - - // create "Partition" by unamalgamtion - Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); - repInterface->SetFactory("A", AcFact); - repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - repInterface->SetFactory("AmalgamatedPartition", isoInterface); - // repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? - - // Repartitioning (creates "Importer" from "Partition") - RepartitionFact = Teuchos::rcp(new RepartitionFactory()); - RepartitionFact->SetFactory("A", AcFact); - RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); - RepartitionFact->SetFactory("Partition", repInterface); - - // Reordering of the transfer operators - RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); - RebalancedPFact->SetFactory("P", PFact); - RebalancedPFact->SetFactory("Nullspace", PtentFact); - RebalancedPFact->SetFactory("Importer", RepartitionFact); - - RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); - RebalancedRFact->SetFactory("R", RFact); - RebalancedRFact->SetFactory("Importer", RepartitionFact); - - // Compute Ac from rebalanced P and R - RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); - RebalancedAFact->SetFactory("A", AcFact); - } -#else // #ifdef HAVE_MUELU_ISORROPIA - // Get rid of [-Wunused] warnings - //(void) - // - // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. -#endif - - // - // Nullspace factory - // - - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); - nspFact->SetFactory("Nullspace", PtentFact); - - // Stash coordinates - xcoord_ = xcoord; - ycoord_ = ycoord; - zcoord_ = zcoord; - - // - // Hierarchy + FactoryManager - // - - // Hierarchy options - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; - - // - // Coarse Smoother - // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // check whether coarse solver is set properly. If not, set default coarse solver. - if (!coarseList.isParameter("smoother: type")) - coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide - RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - - // - - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID = 0; levelID < maxLevels; levelID++) { - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - if (setKokkosRefactor) - manager->SetKokkosRefactor(useKokkosRefactor); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - - manager->SetFactory("Smoother", smootherFact); - } - - // - // Misc - // - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("Ptent", PtentFact); - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - if (bDoRepartition == 1) { - manager->SetFactory("A", RebalancedAFact); - manager->SetFactory("P", RebalancedPFact); - manager->SetFactory("R", RebalancedRFact); - manager->SetFactory("Nullspace", RebalancedPFact); - manager->SetFactory("Importer", RepartitionFact); - } else { -#endif // #ifdef HAVE_MUELU_ISORROPIA - manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels - manager->SetFactory("A", AcFact); // same RAP factory for all levels - manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels - manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - } -#endif - - this->AddFactoryManager(levelID, 1, manager); - } // for (level loop) -} - -template -void MLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { - // if nullspace_ has already been extracted from ML parameter list - // make nullspace available for MueLu - if (nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - RCP Op = fineLevel->Get >("A"); - RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const RCP rowMap = fineLevel->Get >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i * myLength + j]; - } - } - - fineLevel->Set("Nullspace", nullspace); - } - } - - // Do the same for coordinates - size_t num_coords = 0; - double* coordPTR[3]; - if (xcoord_) { - coordPTR[0] = xcoord_; - num_coords++; - if (ycoord_) { - coordPTR[1] = ycoord_; - num_coords++; - if (zcoord_) { - coordPTR[2] = zcoord_; - num_coords++; - } - } - } - if (num_coords) { - Teuchos::RCP fineLevel = H.GetLevel(0); - Teuchos::RCP Op = fineLevel->Get >("A"); - Teuchos::RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const Teuchos::RCP rowMap = fineLevel->Get >("A")->getRowMap(); - Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); - - for (size_t i = 0; i < num_coords; i++) { - Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); - const size_t myLength = coordinates->getLocalLength(); - for (size_t j = 0; j < myLength; j++) { - coordsi[j] = coordPTR[i][j]; - } - } - fineLevel->Set("Coordinates", coordinates); - } - } - - HierarchyManager::SetupHierarchy(H); -} - -// TODO: code factorization with MueLu_ParameterListInterpreter. -template -RCP > -MLParameterListInterpreter:: - GetSmootherFactory(const Teuchos::ParameterList& paramList, - const RCP& AFact) { - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - std::string type = "symmetric Gauss-Seidel"; // default - - // - // Get 'type' - // - - // //TODO: fix defaults!! - - // // Default coarse grid smoother - // std::string type; - // if ("smoother" == "coarse") { - // #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) - // type = ""; // use default defined by AmesosSmoother or Amesos2Smoother - // #else - // type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) - // #endif - // } else { - // // TODO: default smoother? - // type = ""; - // } - - if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); - TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl - << paramList); - - // - // Create the smoother prototype - // - - RCP smooProto; - std::string ifpackType; - Teuchos::ParameterList smootherParamList; - - if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { - if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME - - ifpackType = "RELAXATION"; - smootherParamList.set("relaxation: type", type); - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "Chebyshev" || type == "MLS") { - ifpackType = "CHEBYSHEV"; - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); - if (paramList.isParameter("smoother: MLS alpha")) { - MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "Hiptmair") { - ifpackType = "HIPTMAIR"; - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); - smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); - - auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); - auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); - - if (subSmootherType == "Chebyshev") { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); - - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); - - MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); - } - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 - -#if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_IFPACK) - ifpackType = paramList.get("smoother: ifpack type"); - - if (ifpackType == "ILU") { - // TODO fix this (type mismatch double vs. int) - // MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); - if (paramList.isParameter("smoother: ifpack level-of-fill")) - smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); - else - smootherParamList.set("fact: level-of-fill", as(0)); - - MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - - // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack - smooProto = - MueLu::GetIfpackSmoother(ifpackType, - smootherParamList, - paramList.get("smoother: ifpack overlap")); - smooProto->SetFactory("A", AFact); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); - } -#else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); -#endif - - } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ - std::string solverType = type.substr(strlen("Amesos") + 1); /* ("Amesos-KLU" -> "KLU") */ - - // Validator: following upper/lower case is what is allowed by ML - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ - for (int i = 0; i < validatorSize; i++) { - if (validator[i] == solverType) valid = true; - } - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); - - // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment - std::transform(solverType.begin() + 1, solverType.end(), solverType.begin() + 1, ::tolower); - - smooProto = Teuchos::rcp(new DirectSolver(solverType, Teuchos::ParameterList())); - smooProto->SetFactory("A", AFact); - - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); - } - TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); - - // - // Create the smoother factory - // - - RCP SmooFact = rcp(new SmootherFactory()); - - // Set parameters of the smoother factory - MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); - if (preOrPost == "both") { - SmooFact->SetSmootherPrototypes(smooProto, smooProto); - } else if (preOrPost == "pre") { - SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); - } else if (preOrPost == "post") { - SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); - } - - return SmooFact; -} - -template -void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); -} - -template -size_t MLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); -} - -template -void MLParameterListInterpreter::SetupOperator(Operator& Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); - -#ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG - - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } -} - -} // namespace MueLu - -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif /* MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP */ - -// TODO: see if it can be factorized with ML interpreter (ex: generation of Ifpack param list) diff --git a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList index fb5f2e6315cd..e00acf6f5b63 100644 --- a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList +++ b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList @@ -64,7 +64,6 @@ MapTransferFactory MatrixAnalysisFactory MergedBlockedMatrixFactory MergedSmoother -MLParameterListInterpreter - #if defined(HAVE_MUELU_DEPRECATED_CODE) MultiVectorTransferFactory NotayAggregationFactory NullspaceFactory diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake index 6ce2a712f4fb..9b9aeff30d1e 100644 --- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake +++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake @@ -63,7 +63,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MapTransferFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MatrixAnalysisFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MergedBlockedMatrixFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MergedSmoother ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MLParameterListInterpreter-.?if.defined[HAVE_MUELU_DEPRECATED_CODE] ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MultiVectorTransferFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NotayAggregationFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspaceFactory ) diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp deleted file mode 100644 index 004cb8991843..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP -#define MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class MLParameterListIntepreter; -} - -#ifndef MUELU_MLPARAMETERLISTINTEPRETER_SHORT -#define MUELU_MLPARAMETERLISTINTEPRETER_SHORT -#endif - -#endif - -#endif // MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp deleted file mode 100644 index a30343f06ed3..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class MLParameterListInterpreter; -} - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif - -#endif - -#endif // MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP From b96086ab23fa3f957e7c082beceaa3f1f94adc19 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 10 Dec 2024 08:15:52 -0700 Subject: [PATCH 43/50] MueLu: Remove deprecated NullspaceFactory_kokkos Signed-off-by: Christian Glusa --- .../src/Headers/MueLu_UseShortNamesScalar.hpp | 3 --- .../Interface/MueLu_FactoryFactory_decl.hpp | 1 - .../Interface/MueLu_FactoryFactory_def.hpp | 2 -- .../MueLu_NullspaceFactory_kokkos_decl.hpp | 23 ---------------- .../MueLu_NullspaceFactory_kokkos_def.hpp | 15 ----------- .../src/Utils/ClassList/SC-LO-GO-NO.classList | 1 - .../ETI_SC_LO_GO_NO_classes.cmake | 1 - .../MueLu_NullspaceFactory_kokkos_fwd.hpp | 27 ------------------- 8 files changed, 73 deletions(-) delete mode 100644 packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp delete mode 100644 packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp delete mode 100644 packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index b094106158d5..a5bf3f68b544 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -218,9 +218,6 @@ using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory< #ifdef MUELU_NULLSPACEFACTORY_SHORT using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; #endif -#ifdef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; -#endif #ifdef MUELU_NULLSPACEPRESMOOTHFACTORY_SHORT using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; #endif diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index a6c6add07da2..5176bc2fb677 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -126,7 +126,6 @@ #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos_fwd.hpp" #ifdef HAVE_MUELU_DEPRECATED_CODE -#include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" #endif #include "MueLu_SemiCoarsenPFactory_kokkos_fwd.hpp" diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp index cc144b4366fe..0694928ceceb 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp @@ -105,7 +105,6 @@ #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos.hpp" #ifdef HAVE_MUELU_DEPRECATED_CODE -#include "MueLu_NullspaceFactory_kokkos.hpp" #include "MueLu_SaPFactory_kokkos.hpp" #endif #include "MueLu_SemiCoarsenPFactory_kokkos.hpp" @@ -216,7 +215,6 @@ RCP FactoryFactory if (factoryName == "CoalesceDropFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); #ifdef HAVE_MUELU_DEPRECATED_CODE - if (factoryName == "NullspaceFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "SaPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp deleted file mode 100644 index 82fd49a91699..000000000000 --- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP - -#include "MueLu_NullspaceFactory.hpp" - -namespace MueLu { - -template -class [[deprecated]] NullspaceFactory_kokkos : public NullspaceFactory {}; - -} // namespace MueLu - -#define MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp deleted file mode 100644 index bad8376962c7..000000000000 --- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP - -#include "MueLu_NullspaceFactory_kokkos_decl.hpp" - -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList index e00acf6f5b63..c606287c8edf 100644 --- a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList +++ b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList @@ -67,7 +67,6 @@ MergedSmoother MultiVectorTransferFactory NotayAggregationFactory NullspaceFactory -NullspaceFactory_kokkos - #if defined(HAVE_MUELU_DEPRECATED_CODE) NullspacePresmoothFactory ParameterListInterpreter PatternFactory diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake index 9b9aeff30d1e..20dc4095f4f0 100644 --- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake +++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake @@ -66,7 +66,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MergedSmoother ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MultiVectorTransferFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NotayAggregationFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspaceFactory ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspaceFactory_kokkos-.?if.defined[HAVE_MUELU_DEPRECATED_CODE] ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspacePresmoothFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::ParameterListInterpreter ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::PatternFactory ) diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp deleted file mode 100644 index 805c7ee1970f..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class NullspaceFactory_kokkos; -} - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#define MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#endif - -#endif - -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP From d8399d55268e7565966634b592e31f355b84f963 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Tue, 10 Dec 2024 08:45:40 -0700 Subject: [PATCH 44/50] Correct CDash name for non-UVM build Signed-off-by: Samuel E. Browne --- .github/workflows/AT2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 1409523db6ba..d4575ba6457d 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -288,7 +288,7 @@ jobs: --workspace-dir /home/runner/_work/Trilinos \ --source-dir ${GITHUB_WORKSPACE} \ --build-dir /home/Trilinos/build \ - --dashboard-build-name=PR-${{ github.event.pull_request.number }}_${AT2_IMAGE}_release_static_uvm \ + --dashboard-build-name=PR-${{ github.event.pull_request.number }}_${AT2_IMAGE}_release_static \ --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ --filename-subprojects ./package_subproject_list.cmake \ From 9de67f494648b1207aeccfa937e9e9f973ab71d3 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 10 Dec 2024 11:52:35 -0700 Subject: [PATCH 45/50] MueLu: Fix regression counts Signed-off-by: Christian Glusa --- packages/muelu/test/unit_tests_kokkos/Regression.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/muelu/test/unit_tests_kokkos/Regression.cpp b/packages/muelu/test/unit_tests_kokkos/Regression.cpp index ccfbce86b07e..c1f41f7cade2 100644 --- a/packages/muelu/test/unit_tests_kokkos/Regression.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Regression.cpp @@ -98,7 +98,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, H2D, Scalar, LocalOrdinal, GlobalO } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 20 : 32); + size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 17 : 32); TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else @@ -175,7 +175,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregation, Scalar, LocalOrdinal, } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = std::is_same_v ? 18 : 17; + size_t targetNumDeepCopies = std::is_same_v ? 11 : 17; TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else From a7460f1310f08d0768162b562e692188fc2f445e Mon Sep 17 00:00:00 2001 From: mperego Date: Tue, 10 Dec 2024 14:59:33 -0700 Subject: [PATCH 46/50] Piro: remove Stokhos dependency and related tests and files (#13660) Stokhos capability in Piro is based on the Epetra stack and is no longer supported. Signed-off-by: Mauro Perego --- packages/piro/cmake/Dependencies.cmake | 2 +- packages/piro/cmake/Piro_config.hpp.in | 2 - packages/piro/doc/index.doc | 5 +- packages/piro/src/CMakeLists.txt | 19 - .../Piro_Epetra_NECoupledModelEvaluator.cpp | 1194 ----------------- .../Piro_Epetra_NECoupledModelEvaluator.hpp | 232 ---- .../piro/src/Piro_Epetra_StokhosMPSolver.cpp | 230 ---- .../piro/src/Piro_Epetra_StokhosMPSolver.hpp | 168 --- .../src/Piro_Epetra_StokhosNOXObserver.cpp | 66 - .../src/Piro_Epetra_StokhosNOXObserver.hpp | 57 - .../piro/src/Piro_Epetra_StokhosSolver.cpp | 155 --- .../piro/src/Piro_Epetra_StokhosSolver.hpp | 154 --- .../src/Piro_Epetra_StokhosSolverFactory.cpp | 552 -------- .../src/Piro_Epetra_StokhosSolverFactory.hpp | 148 -- packages/piro/test/MockModelEval_C.cpp | 104 -- packages/piro/test/MockModelEval_D.cpp | 116 -- packages/piro/test/Piro_UnitTests.cpp | 453 ------- 17 files changed, 3 insertions(+), 3654 deletions(-) delete mode 100644 packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp delete mode 100644 packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.hpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosSolver.cpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosSolver.hpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp delete mode 100644 packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp diff --git a/packages/piro/cmake/Dependencies.cmake b/packages/piro/cmake/Dependencies.cmake index ea637c4b8bec..7d5e7ef5584b 100644 --- a/packages/piro/cmake/Dependencies.cmake +++ b/packages/piro/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ SET(LIB_REQUIRED_DEP_PACKAGES Teuchos Stratimikos ThyraCore Tpetra Teko) -SET(LIB_OPTIONAL_DEP_PACKAGES NOX Tempus Stokhos +SET(LIB_OPTIONAL_DEP_PACKAGES NOX Tempus ROL Ifpack2 MueLu ThyraEpetraAdapters ThyraEpetraExtAdapters Epetra EpetraExt) SET(TEST_REQUIRED_DEP_PACKAGES ThyraTpetraAdapters MPI) SET(TEST_OPTIONAL_DEP_PACKAGES) diff --git a/packages/piro/cmake/Piro_config.hpp.in b/packages/piro/cmake/Piro_config.hpp.in index ac80335878e3..894f95980888 100644 --- a/packages/piro/cmake/Piro_config.hpp.in +++ b/packages/piro/cmake/Piro_config.hpp.in @@ -24,9 +24,7 @@ #cmakedefine HAVE_PIRO_TEMPUS /* DEPRECATED */ #cmakedefine Piro_ENABLE_Tempus -#cmakedefine HAVE_PIRO_STOKHOS /* DEPRECATED */ -#cmakedefine Piro_ENABLE_Stokhos #cmakedefine HAVE_PIRO_ROL /* DEPRECATED */ #cmakedefine Piro_ENABLE_ROL diff --git a/packages/piro/doc/index.doc b/packages/piro/doc/index.doc index 757f771f16ee..cbd522c5c185 100644 --- a/packages/piro/doc/index.doc +++ b/packages/piro/doc/index.doc @@ -59,8 +59,7 @@ by %Piro include:
  • NOX: Nonlinear Solver
  • LOCA: Continuation and Bifurcation Analysis Solver -
  • Stokhos: Embedded UQ solver for Stochastic-Galerkin over random variables -
  • LIME: Algorithms for multi-physics coupling (under development) +
  • ROL: Tools for Numerical Optimization
Each of these solvers not only takes a ModelEvaluator as @@ -121,7 +120,7 @@ and not just two. %Piro is developed by Andy Salinger, Roscoe Bartlett, Todd Coffey, Kim Liegeois, Roger Pawlowski, Mauro Perego, Eric Phipps and Irina Tezaur. Much of the code is adapted from -tests and examples for NOX, LOCA, Stokhos, and was developed and matured in the Albany +tests and examples for NOX, LOCA, and was developed and matured in the Albany application code before being library-ized into %Piro. */ diff --git a/packages/piro/src/CMakeLists.txt b/packages/piro/src/CMakeLists.txt index b515135120ff..b1b29af49e52 100644 --- a/packages/piro/src/CMakeLists.txt +++ b/packages/piro/src/CMakeLists.txt @@ -128,25 +128,6 @@ IF (Piro_ENABLE_Tempus) Piro_ObserverToTempusIntegrationObserverAdapter_Def.hpp) ENDIF() -# Optional StochasticGalerkin capability, depending upon Stokhos and NOX -IF (Piro_ENABLE_Stokhos AND Piro_ENABLE_NOX AND PIRO_HAVE_EPETRA_STACK) - APPEND_SET(HEADERS - Piro_Epetra_StokhosNOXObserver.hpp - Piro_Epetra_StokhosSolverFactory.hpp - Piro_Epetra_StokhosSolver.hpp - Piro_Epetra_StokhosMPSolver.hpp - Piro_Epetra_NECoupledModelEvaluator.hpp - ) - APPEND_SET(SOURCES - Piro_Epetra_StokhosNOXObserver.cpp - Piro_Epetra_StokhosSolverFactory.cpp - Piro_Epetra_StokhosSolver.cpp - Piro_Epetra_StokhosMPSolver.cpp - Piro_Epetra_NECoupledModelEvaluator.cpp - ) -ENDIF() - - IF (Piro_ENABLE_ROL) APPEND_SET(HEADERS Piro_ThyraProductME_Objective_SimOpt.hpp diff --git a/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp b/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp deleted file mode 100644 index 0422e22940cf..000000000000 --- a/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp +++ /dev/null @@ -1,1194 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_NECoupledModelEvaluator.hpp" -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Epetra_StokhosSolver.hpp" - -#include "Epetra_LocalMap.h" - -#include "Teuchos_Assert.hpp" -#include "Teuchos_TimeMonitor.hpp" -#include "Teuchos_VerboseObjectParameterListHelpers.hpp" - -#include "Stokhos_Epetra.hpp" -#include "Stokhos_ReducedBasisFactory.hpp" -#include "EpetraExt_MultiComm.h" - -Piro::Epetra::NECoupledModelEvaluator:: -NECoupledModelEvaluator( - const Teuchos::Array >& models_, - const Teuchos::Array >& piroParams_, - const Teuchos::RCP& network_model_, - const Teuchos::RCP& params_, - const Teuchos::RCP& comm_, - const Teuchos::Array< Teuchos::RCP >& observers_): - models(models_), - piroParams(piroParams_), - network_model(network_model_), - params(params_), - comm(comm_), - observers(observers_) -{ - // Setup VerboseObject - Teuchos::readVerboseObjectSublist(params.get(), this); - - n_models = models.size(); - solvers.resize(n_models); - - // Create solvers for models A and B - bool stochastic = params->get("Stochastic", false); - if (observers.size() < n_models) - observers.resize(n_models); - if (stochastic) { - sgSolvers.resize(n_models); - for (int i=0; isetup(models[i], observers[i]); - solvers[i] = sgSolvers[i]; - } - } - else { - Piro::Epetra::SolverFactory solverFactory; - for (int i=0; iget< Teuchos::Array >("Network Coupling Parameter Indices"); - g_indices = - params->get< Teuchos::Array >("Network Coupling Response Indices"); - TEUCHOS_ASSERT(p_indices.size() == n_models); - TEUCHOS_ASSERT(g_indices.size() == n_models); - - // Get number of parameter and response vectors - solver_inargs.resize(n_models); - solver_outargs.resize(n_models); - num_params.resize(n_models); - num_responses.resize(n_models); - num_params_total = 0; - num_responses_total = 0; - for (int i=0; icreateInArgs(); - solver_outargs[i] = solvers[i]->createOutArgs(); - num_params[i] = solver_inargs[i].Np(); - num_responses[i] = solver_outargs[i].Ng(); - num_params_total += num_params[i]; - num_responses_total += num_responses[i]; - } - num_params_total -= n_models; - num_responses_total -= n_models; - - // Building indexing maps between coupled system parameters/responses and - // individual components - // Parameter vector i of this model evaluator corresponds to parameter - // param_map[i].second for model param_map[i].first. Similarly for the - // responses - for (int i=0; iget_p_map(p_indices[i]); - n_p[i] = p_maps[i]->NumGlobalElements(); - nx += n_p[i]; - } - x_map = Teuchos::rcp(new Epetra_Map(nx, 0, *comm)); - x_overlap_map = Teuchos::rcp(new Epetra_LocalMap(nx, 0, *comm)); - x_importer = Teuchos::rcp(new Epetra_Import(*x_overlap_map, *x_map)); - x_overlap = Teuchos::rcp(new Epetra_Vector(*x_overlap_map)); - - // Build f map, which is the product of the g_indices response maps - // For the time being, we will assume local maps, in the future we need to - // build proper product maps - g_maps.resize(n_models); - n_g.resize(n_models); - int nf = 0; - for (int i=0; iget_g_map(g_indices[i]); - n_g[i] = g_maps[i]->NumGlobalElements(); - nf += n_g[i]; - } - f_map = Teuchos::rcp(new Epetra_Map(nf, 0, *comm)); - f_overlap_map = Teuchos::rcp(new Epetra_LocalMap(nf, 0, *comm)); - f_exporter = Teuchos::rcp(new Epetra_Export(*f_overlap_map, *f_map)); - f_overlap = Teuchos::rcp(new Epetra_Vector(*f_overlap_map)); - - // Determine what we support - supports_W = true; - supports_x_sg = true; - supports_f_sg = true; - supports_W_sg = true; - Teuchos::Array ds(n_models); - for (int i=0; iMyGlobalElements(); - for (int i=0; iNumMyElements(); i++) { - int row = f_map->GID(i); - W_graph->InsertGlobalIndices(row, nx, indices); - } - W_graph->FillComplete(); - - W_overlap_graph = - Teuchos::rcp(new Epetra_CrsGraph(Copy, *f_overlap_map, nx)); - for (int i=0; iNumMyElements(); i++) { - int row = f_overlap_map->GID(i); - W_overlap_graph->InsertGlobalIndices(row, nx, indices); - } - W_overlap_graph->FillComplete(); - W_overlap = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_overlap_graph)); - } - - // Build initial guess - Epetra_Vector x_init_overlap(*x_overlap_map); - int offset = 0; - for (int i=0; i p_init = - solvers[i]->get_p_init(p_indices[i]); - for (int j=0; jExport(x_init_overlap, *x_importer, Insert); - - // Create storage for parameters, responses, and derivatives - p.resize(n_models); - g.resize(n_models); - dgdp_layout.resize(n_models); - dgdp.resize(n_models); - for (int i=0; isublist("Dimension Reduction"); - if (!dim_reduct_params.isParameter("Reduce Dimension")) - reduce_dimension.resize(n_models, 0); - else if (dim_reduct_params.isType("Reduce Dimension")) - reduce_dimension.resize(n_models, - dim_reduct_params.get("Reduce Dimension")); - else if (dim_reduct_params.isType< Teuchos::Array >("Reduce Dimension")) - reduce_dimension = - dim_reduct_params.get< Teuchos::Array >("Reduce Dimension"); - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, std::logic_error, - "Invalid type for parameter \"Dimension Reduction\""); -} - -// Overridden from EpetraExt::ModelEvaluator - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_x_map() const -{ - return x_map; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_f_map() const -{ - return f_map; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_x_init() const -{ - return x_init; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_p_map(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_map(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_map(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_g_map(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_responses_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_g_map(): " << - "Invalid response index j = " << j << std::endl); - - return solvers[response_map[j].first]->get_g_map(response_map[j].second); -} - -Teuchos::RCP > -Piro::Epetra::NECoupledModelEvaluator:: -get_p_names(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_names(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_names(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_p_init(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_init(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_init(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -create_W() const -{ - Teuchos::RCP mat = - Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_graph)); - mat->FillComplete(); - return mat; -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::NECoupledModelEvaluator:: -createInArgs() const -{ - InArgsSetup inArgs; - inArgs.setModelEvalDescription(this->description()); - - // Deterministic InArgs - inArgs.setSupports(IN_ARG_x, true); - inArgs.set_Np(num_params_total); - - // Stochastic InArgs - if (supports_x_sg) { - inArgs.setSupports(IN_ARG_x_sg, supports_x_sg); - inArgs.setSupports(IN_ARG_sg_basis,true); - inArgs.setSupports(IN_ARG_sg_quadrature,true); - inArgs.setSupports(IN_ARG_sg_expansion,true); - for (int i=0; idescription()); - - // Deterministic OutArgs - outArgs.setSupports(OUT_ARG_f, true); - outArgs.setSupports(OUT_ARG_W, supports_W); - outArgs.set_W_properties( - DerivativeProperties(DERIV_LINEARITY_NONCONST, DERIV_RANK_FULL, true)); - outArgs.set_Np_Ng(num_params_total, num_responses_total); - for (int i=0; i out = this->getOStream(); - Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); - - // Create fresh in/out args for sub-models - for (int i=0; icreateInArgs(); - solver_outargs[i] = solvers[i]->createOutArgs(); - } - - EpetraExt::ModelEvaluator::InArgs network_inargs = inArgs; - EpetraExt::ModelEvaluator::OutArgs network_outargs = outArgs; - - // - // Deterministic calculation - // - Teuchos::RCP x = inArgs.get_x(); - if (x != Teuchos::null) { - - // p - x_overlap->Import(*x, *x_importer, Insert); - int offset = 0; - for (int i=0; i f = outArgs.get_f(); - if (f != Teuchos::null) { - for (int i=0; i W = outArgs.get_W(); - if (W != Teuchos::null) { - for (int i=0; i > basis = - inArgs.get_sg_basis(); - Teuchos::RCP multiComm = - x_sg->productComm(); - if (sg_overlap_map == Teuchos::null) - sg_overlap_map = - Teuchos::rcp(new Epetra_LocalMap(basis->size(), 0, - multiComm->TimeDomainComm())); - - if (x_sg_overlap == Teuchos::null) - x_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly( - basis, sg_overlap_map, x_overlap_map, multiComm)); - if (supports_f_sg && f_sg_overlap == Teuchos::null) - f_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly( - basis, sg_overlap_map, f_overlap_map, multiComm)); - if (supports_W_sg && W_sg_overlap == Teuchos::null) { - Teuchos::RCP domain_base_map = - x_overlap_map; - Teuchos::RCP range_base_map = - f_overlap_map; - W_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraOperatorOrthogPoly( - basis, sg_overlap_map, domain_base_map, range_base_map, - multiComm)); - for (int block=0; blocksize(); block++) { - Teuchos::RCP W = - Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_overlap_graph)); - W_sg_overlap->setCoeffPtr(block,W); - } - } - - for (int i=0; isize(); block++) { - (*x_sg_overlap)[block].Import((*x_sg)[block], *x_importer, Insert); - int offset = 0; - for (int i=0; i solver_inargs_red(n_models); - Teuchos::Array solver_outargs_red(n_models); - Teuchos::Array > solvers_red(n_models); - - Teuchos::Array > piroParams_red(n_models); - - for (int i=0; ievalModel(solver_inargs_red[0], solver_outargs_red[0]); - } - - { - TEUCHOS_FUNC_TIME_MONITOR( - "NECoupledModelEvaluator -- Model 2 nonlinear elimination"); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Eliminating model " << 2 << " states..."; - solvers_red[1]->evalModel(solver_inargs_red[1], solver_outargs_red[1]); - } - } - else { - for (int i=0; ievalModel(solver_inargs_red[i], solver_outargs_red[i]); - } - } - - // Project back to original stochastic bases - for (int i=0; ievalModel(solver_inargs, solver_outargs, - network_inargs, network_outargs, - n_p, n_g, p, g, dgdp, dgdp_layout, - p_sg, g_sg, dgdp_sg, dgdp_sg_layout); - - // Export network residuals, Jacobians, etc... - - // f - Teuchos::RCP f = outArgs.get_f(); - if (f != Teuchos::null) - f->Export(*f_overlap, *f_exporter, Insert); - - // W - Teuchos::RCP W = outArgs.get_W(); - if (W != Teuchos::null) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast(W, true); - W_crs->Export(*W_overlap, *f_exporter, Insert); - } - - // f_sg - if (supports_f_sg) { - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null) { - for (int block=0; blocksize(); block++) - (*f_sg)[block].Export((*f_sg_overlap)[block], *f_exporter, Insert); - } - } - - // W_sg - if (supports_W_sg) { - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null) { - for (int block=0; blocksize(); block++) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast( - W_sg->getCoeffPtr(block), true); - Teuchos::RCP W_overlap_crs = - Teuchos::rcp_dynamic_cast( - W_sg_overlap->getCoeffPtr(block), true); - W_crs->Export(*W_overlap_crs, *f_exporter, Insert); - } - } - } - } - - -void -Piro::Epetra::NECoupledModelEvaluator:: -do_dimension_reduction( - int model_index, - const InArgs& inArgs, - const InArgs& solver_inargs, - const OutArgs& solver_outargs, - const Teuchos::RCP& model, - const Teuchos::RCP& solver, - const Teuchos::RCP& solver_params, - InArgs& reduced_inargs, - OutArgs& reduced_outargs, - Teuchos::RCP& reduced_solver, - Teuchos::RCP& reduced_params) const -{ - TEUCHOS_FUNC_TIME_MONITOR("NECoupledModelEvaluator -- dimension reduction"); - - // First copy the in/out args to set everything we don't modify - reduced_inargs = solver_inargs; - reduced_outargs = solver_outargs; - reduced_solver = solver; - reduced_params = params; - - // Make sure there is something to do - InArgs::sg_const_vector_t x_sg; - if (supports_x_sg) - x_sg = inArgs.get_x_sg(); - if (!reduce_dimension[model_index] || x_sg == Teuchos::null) - return; - - Teuchos::RCP > basis = - Teuchos::rcp_dynamic_cast >( - inArgs.get_sg_basis(), true); - Teuchos::RCP > quad = - inArgs.get_sg_quadrature(); - Teuchos::RCP > expansion - = inArgs.get_sg_expansion(); - - // Copy Epetra PCEs into Stokhos PCE objects - int total_num_p = 0; - for (int i=0; icoefficientMap()->NumMyElements(); - } - } - int sz = basis->size(); - Teuchos::Array< Stokhos::OrthogPolyApprox > p_opa(total_num_p); - int index = 0; - for (int i=0; icoefficientMap()->NumMyElements(); k++) - p_opa[index+k].reset(basis); - for (int j=0; jcoefficientMap()->NumMyElements(); - } - } - - // Build Stieltjes basis, quadrature, and new PCEs - Teuchos::RCP > red_basis; - Teuchos::RCP > red_quad; - Teuchos::Array > red_pces; - Teuchos::ParameterList& reduct_params = - params->sublist("Dimension Reduction"); - int order = basis->order(); - int new_order = reduct_params.get("Reduced Order", -1); - if (new_order == -1) - new_order = order; - if (st_quad == Teuchos::null) { - st_quad = quad; - // st_quad = - // Teuchos::rcp(new Stokhos::SparseGridQuadrature( - // basis, new_order+1)); - // st_quad = - // Teuchos::rcp(new Stokhos::TensorProductQuadrature( - // basis, 4*new_order+1)); - // std::cout << "st_quad->size() = " << st_quad->size() << std::endl; - } - Teuchos::RCP > Cijk = - expansion->getTripleProduct(); - Stokhos::ReducedBasisFactory factory(reduct_params); - Teuchos::RCP< Stokhos::ReducedPCEBasis > gs_basis = - factory.createReducedBasis(new_order, p_opa, st_quad, Cijk); - red_basis = gs_basis; - red_quad = gs_basis->getReducedQuadrature(); - red_pces.resize(p_opa.size()); - for (int i=0; itransformFromOriginalBasis(p_opa[i].coeff(), red_pces[i].coeff()); - } - - Teuchos::RCP multiComm = x_sg->productComm(); - - // Copy into Epetra objects - int red_sz = red_basis->size(); - Teuchos::RCP red_overlap_map = - Teuchos::rcp(new Epetra_LocalMap(red_sz, 0, - multiComm->TimeDomainComm())); - - // p_red - index = 0; - for (int i=0; iget_p_map(i), - multiComm)); - for (int j=0; jcoefficientMap()->NumMyElements(); - reduced_inargs.set_p_sg(i, p_red); - } - } - - for (int i=0; iget_g_map(i), - multiComm)); - reduced_outargs.set_g_sg(i, g_red); - } - - // dg/dx_red - if (!solver_outargs.supports(OUT_ARG_DgDx_sg, i).none()) { - Teuchos::RCP dgdx_sg = - solver_outargs.get_DgDx_sg(i).getMultiVector(); - if (dgdx_sg != Teuchos::null) { - Teuchos::RCP dgdx_red = - Teuchos::rcp(new Stokhos::EpetraMultiVectorOrthogPoly( - red_basis, red_overlap_map, - dgdx_sg->coefficientMap(), - multiComm, - dgdx_sg->numVectors())); - reduced_outargs.set_DgDx_sg( - i, SGDerivative(dgdx_red, - solver_outargs.get_DgDx_sg(i).getMultiVectorOrientation())); - } - } - - // dg/dp_red - for (int j=0; j dgdp_sg = - solver_outargs.get_DgDp_sg(i,j).getMultiVector(); - if (dgdp_sg != Teuchos::null) { - Teuchos::RCP dgdp_red = - Teuchos::rcp(new Stokhos::EpetraMultiVectorOrthogPoly( - red_basis, red_overlap_map, - dgdp_sg->coefficientMap(), - multiComm, - dgdp_sg->numVectors())); - reduced_outargs.set_DgDp_sg( - i, j, SGDerivative(dgdp_red, - solver_outargs.get_DgDp_sg(i,j).getMultiVectorOrientation())); - } - } - } - } - - - // Setup new solver - reduced_params = - Teuchos::rcp(new Teuchos::ParameterList(*solver_params)); - Teuchos::ParameterList& red_sg_params = - reduced_params->sublist("Stochastic Galerkin"); - red_sg_params.sublist("Basis").set("Stochastic Galerkin Basis", - red_basis); - red_sg_params.sublist("Quadrature").set("Stochastic Galerkin Quadrature", - red_quad); - if (red_sg_params.sublist("Expansion").isParameter("Stochastic Galerkin Expansion")) - red_sg_params.sublist("Expansion").remove("Stochastic Galerkin Expansion"); - if (red_sg_params.isParameter("Triple Product Tensor")) - red_sg_params.remove("Triple Product Tensor"); - Teuchos::RCP reduced_piro_solver = - Teuchos::rcp(new Piro::Epetra::StokhosSolver(reduced_params, comm)); - reduced_piro_solver->setup(model, observers[model_index]); - reduced_solver = reduced_piro_solver; - - if (reduced_inargs.supports(IN_ARG_sg_basis)) - reduced_inargs.set_sg_basis(red_basis); - if (reduced_inargs.supports(IN_ARG_sg_quadrature)) - reduced_inargs.set_sg_quadrature(red_quad); - if (reduced_inargs.supports(IN_ARG_sg_expansion)) - reduced_inargs.set_sg_expansion(red_sg_params.sublist("Expansion").get< Teuchos::RCP< Stokhos::OrthogPolyExpansion > >("Stochastic Galerkin Expansion")); -} - -void -Piro::Epetra::NECoupledModelEvaluator:: -do_dimension_projection( - int model_index, - const InArgs& inArgs, - const InArgs& reduced_inargs, - const OutArgs& reduced_outargs, - OutArgs& solver_outargs) const -{ - TEUCHOS_FUNC_TIME_MONITOR("NECoupledModelEvaluator -- dimension projection"); - - // Make sure there is something to do - InArgs::sg_const_vector_t x_sg; - if (supports_x_sg) - x_sg = inArgs.get_x_sg(); - if (!reduce_dimension[model_index] || x_sg == Teuchos::null) - return; - - Teuchos::RCP > basis = - inArgs.get_sg_basis(); - Teuchos::RCP > quad = - inArgs.get_sg_quadrature(); - Teuchos::RCP > red_basis = - Teuchos::rcp_dynamic_cast >(reduced_inargs.get_sg_basis()); - - for (int i=0; itransformToOriginalBasis( - (*g_red)[0].Values(), - (*g_sg)[0].Values(), - g_red->coefficientMap()->NumMyElements(), - true); - } - } - - // dg/dx_sg - if (!solver_outargs.supports(OUT_ARG_DgDx_sg, i).none()) { - Teuchos::RCP dgdx_sg = - solver_outargs.get_DgDx_sg(i).getMultiVector(); - if (dgdx_sg != Teuchos::null) { - Teuchos::RCP dgdx_red = - reduced_outargs.get_DgDx_sg(i).getMultiVector(); - - // transformToOriginalBasis() needs the entries for each pce - // coefficient stored contiguously. This isn't the case for the - // full multivector (each column along with all of its pce - // coefficients is stored in one contiguous chunk). Thus we need - // to transform each column individually - int ncol = dgdx_red->numVectors(); - for (int col=0; coltransformToOriginalBasis( - (*dgdx_red)[0](col)->Values(), - (*dgdx_sg)[0](col)->Values(), - dgdx_red->coefficientMap()->NumMyElements(), - true); - } - } - - // dg/dp_sg - for (int j=0; j dgdp_sg = - solver_outargs.get_DgDp_sg(i,j).getMultiVector(); - if (dgdp_sg != Teuchos::null) { - Teuchos::RCP dgdp_red = - reduced_outargs.get_DgDp_sg(i,j).getMultiVector(); - - // transformToOriginalBasis() needs the entries for each pce - // coefficient stored contiguously. This isn't the case for the - // full multivector (each column along with all of its pce - // coefficients is stored in one contiguous chunk). Thus we need - // to transform each column individually - int ncol = dgdp_red->numVectors(); - for (int col=0; coltransformToOriginalBasis( - (*dgdp_red)[0](col)->Values(), - (*dgdp_sg)[0](col)->Values(), - dgdp_red->coefficientMap()->NumMyElements(), - true); - } - } - } - } -} - - -void -Piro::Epetra::ParamToResponseNetworkModel:: -evalModel( - const Teuchos::Array& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const -{ - - // f - Teuchos::RCP f = network_outargs.get_f(); - if (f != Teuchos::null) { - f->PutScalar(0.0); - for (int i=0; i W = network_outargs.get_W(); - if (W != Teuchos::null) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast(W, true); - W_crs->PutScalar(0.0); - int row, col; - double val; - for (int i=0; iReplaceGlobalValues(row, 1, &val, &col); - - // dg_2/dp_2 part - for (int j=0; jReplaceGlobalValues(row, 1, &val, &col); - } - } - for (int i=0; iReplaceGlobalValues(row, 1, &val, &col); - - // dg_1/dp_1 part - for (int j=0; jReplaceGlobalValues(row, 1, &val, &col); - } - } - } - - // f_sg - if (network_outargs.supports(EpetraExt::ModelEvaluator::OUT_ARG_f_sg)) { - EpetraExt::ModelEvaluator::OutArgs::sg_vector_t f_sg = - network_outargs.get_f_sg(); - if (f_sg != Teuchos::null) { - // std::cout << "g_sg[0] = " << *g_sg[0] << std::endl; - // std::cout << "g_sg[1] = " << *g_sg[1] << std::endl; - f_sg->init(0.0); - for (int block=0; blocksize(); block++) { - for (int i=0; i& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const = 0; - - }; - - class ParamToResponseNetworkModel : - public AbstractNetworkModel { - - public: - - //! Constructor - ParamToResponseNetworkModel() {} - - //! Destructor - virtual ~ParamToResponseNetworkModel() {} - - //! evaluate model - virtual void evalModel( - const Teuchos::Array& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const; - - }; - - class NECoupledModelEvaluator : - public EpetraExt::ModelEvaluator, - public Teuchos::VerboseObject { - public: - - /** \brief . */ - NECoupledModelEvaluator( - const Teuchos::Array >& models, - const Teuchos::Array >& piroParams, - const Teuchos::RCP& network_model, - const Teuchos::RCP& params, - const Teuchos::RCP& comm, - const Teuchos::Array< Teuchos::RCP >& observers = - Teuchos::Array >()); - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - //! Return array of parameter names - Teuchos::RCP > get_p_names(int l) const; - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - /** \brief . */ - Teuchos::RCP create_W() const; - /** \brief . */ - InArgs createInArgs() const; - /** \brief . */ - OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - protected: - - void do_dimension_reduction( - int model_index, - const InArgs& inArgs, - const InArgs& solver_inargs, - const OutArgs& solver_outargs, - const Teuchos::RCP& model, - const Teuchos::RCP& solver, - const Teuchos::RCP& solver_params, - InArgs& reduced_inargs, - OutArgs& reduced_outargs, - Teuchos::RCP& reduced_solver, - Teuchos::RCP& reduced_params) const; - - void do_dimension_projection( - int model_index, - const InArgs& inArgs, - const InArgs& reduced_inargs, - const OutArgs& reduced_outargs, - OutArgs& solver_outargs) const; - - private: - - // ///////////////////////////////////// - // Private member data - - typedef Stokhos::StandardStorage StorageType; - - Teuchos::Array > models; - Teuchos::Array< Teuchos::RCP > piroParams; - Teuchos::RCP network_model; - Teuchos::RCP params; - Teuchos::RCP comm; - Teuchos::Array< Teuchos::RCP > observers; - - Teuchos::Array< Teuchos::RCP > solvers; - Teuchos::Array< Teuchos::RCP > sgSolvers; - int n_models; - Teuchos::Array p_indices; - Teuchos::Array g_indices; - Teuchos::Array n_p; - Teuchos::Array n_g; - Teuchos::Array num_params; - Teuchos::Array num_responses; - int num_params_total; - int num_responses_total; - bool supports_W; - Teuchos::Array< std::pair > param_map; - Teuchos::Array< std::pair > response_map; - - mutable Teuchos::Array solver_inargs; - mutable Teuchos::Array solver_outargs; - - Teuchos::Array< Teuchos::RCP > p_maps; - Teuchos::Array< Teuchos::RCP > g_maps; - - Teuchos::RCP x_map; - Teuchos::RCP f_map; - Teuchos::RCP x_overlap_map; - Teuchos::RCP f_overlap_map; - Teuchos::RCP x_importer; - Teuchos::RCP f_exporter; - Teuchos::RCP x_overlap; - Teuchos::RCP f_overlap; - Teuchos::RCP W_graph; - Teuchos::RCP W_overlap_graph; - Teuchos::RCP W_overlap; - Teuchos::RCP x_init; - - Teuchos::Array< Teuchos::RCP > p; - Teuchos::Array< Teuchos::RCP > g; - Teuchos::Array< EDerivativeMultiVectorOrientation > dgdp_layout; - Teuchos::Array< Teuchos::RCP > dgdp; - - // Stochastic Galerkin data - bool supports_x_sg; - bool supports_f_sg; - bool supports_W_sg; - mutable Teuchos::RCP sg_overlap_map; - mutable OutArgs::sg_vector_t x_sg_overlap; - mutable OutArgs::sg_vector_t f_sg_overlap; - mutable OutArgs::sg_operator_t W_sg_overlap; - mutable Teuchos::Array p_sg; - mutable Teuchos::Array g_sg; - mutable Teuchos::Array dgdp_sg_layout; - mutable Teuchos::Array > dgdp_sg; - - Teuchos::Array reduce_dimension; - mutable Teuchos::RCP > st_quad; - }; - - } - -} - -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp b/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp deleted file mode 100644 index 930b9b41aac2..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosMPSolver.hpp" - -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Provider.hpp" - -#include "Stokhos_Epetra.hpp" -#include "NOX_Epetra_ModelEvaluatorInterface.H" -#include "NOX_Epetra_LinearSystem_Stratimikos.H" -#include "NOX_Epetra_LinearSystem_MPBD.hpp" - -Piro::Epetra::StokhosMPSolver:: -StokhosMPSolver(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& mpParams_, - const Teuchos::RCP& globalComm, - int block_size, int num_spatial_procs) : - piroParams(piroParams_), - mpParams(mpParams_), - num_mp(block_size) -{ - product_comm = - Stokhos::buildMultiComm(*globalComm, block_size, num_spatial_procs); -} - -Piro::Epetra::StokhosMPSolver::~StokhosMPSolver() -{ -} - -void -Piro::Epetra::StokhosMPSolver:: -setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver) -{ - Teuchos::RCP mp_comm = - Stokhos::getStochasticComm(product_comm); - Teuchos::RCP mp_block_map = - Teuchos::rcp(new Epetra_Map(num_mp, 0, *mp_comm)); - mp_model = model; - - // Turn mp_model into an MP-nonlinear problem - mp_nonlin_model = - Teuchos::rcp(new Stokhos::MPModelEvaluator(mp_model, product_comm, - mp_block_map, mpParams)); - - Piro::Epetra::SolverFactory solverFactory; - if (piroParams->get("Solver Type") == "NOX") - { - bool use_mpbd_solver = mpParams->get("Use MPBD Solver", false); - Teuchos::RCP linsys; - Teuchos::RCP nox_interface; - if (use_mpbd_solver) { - nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(mp_nonlin_model)); - Teuchos::RCP A = - mp_nonlin_model->create_W(); - Teuchos::RCP M = - mp_nonlin_model->create_WPrec()->PrecOp; - Teuchos::RCP iReq = - nox_interface; - Teuchos::RCP iJac = - nox_interface; - Teuchos::RCP iPrec = - nox_interface; - - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& noxstratlsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - Teuchos::ParameterList& mpbdParams = - mpParams->sublist("MPBD Linear Solver"); - mpbdParams.sublist("Deterministic Solver Parameters") = - noxstratlsParams; - Teuchos::RCP inner_A = model->create_W(); - Teuchos::RCP inner_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - Teuchos::RCP inner_iReq = - inner_nox_interface; - Teuchos::RCP inner_iJac = - inner_nox_interface; - Teuchos::RCP inner_u = model->get_x_init(); - Teuchos::RCP inner_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - printParams, - noxstratlsParams, - inner_iJac, inner_A, *inner_u)); - linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemMPBD(printParams, - mpbdParams, - inner_linsys, - iReq, iJac, A, - model->get_x_map())); - } - - solverFactory.setSource(nox_interface); - solverFactory.setSource(linsys); - } - // Create solver to map p -> g - mp_solver = solverFactory.createSolver(piroParams, mp_nonlin_model); - - // Create MP inverse model evaluator to map p_mp -> g_mp - Teuchos::Array mp_p_index_map = - mp_nonlin_model->get_p_mp_map_indices(); - Teuchos::Array mp_g_index_map = - mp_nonlin_model->get_g_mp_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - mp_nonlin_model->get_g_mp_base_maps(); - mp_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - mp_inverse_solver = - Teuchos::rcp(new Stokhos::MPInverseModelEvaluator(mp_solver, - mp_p_index_map, - mp_g_index_map, - base_g_maps)); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getSpatialComm() const -{ - return Stokhos::getSpatialComm(product_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getStochasticComm() const -{ - return Stokhos::getStochasticComm(product_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getGlobalMultiComm() const -{ - return product_comm; -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_x_map() const -{ - return mp_inverse_solver->get_x_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_f_map() const -{ - return mp_inverse_solver->get_f_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_p_map(int l) const -{ - return mp_inverse_solver->get_p_map(l); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_g_map(int j) const -{ - return mp_inverse_solver->get_g_map(j); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_x_init() const -{ - return mp_inverse_solver->get_x_init(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_p_init(int l) const -{ - return mp_nonlin_model->get_p_init(l); -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::StokhosMPSolver::createInArgs() const -{ - return mp_inverse_solver->createInArgs(); -} - -EpetraExt::ModelEvaluator::OutArgs -Piro::Epetra::StokhosMPSolver::createOutArgs() const -{ - return mp_inverse_solver->createOutArgs(); -} - -void -Piro::Epetra::StokhosMPSolver::evalModel(const InArgs& inArgs, - const OutArgs& outArgs ) const -{ - mp_inverse_solver->evalModel(inArgs, outArgs); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::create_g_mp(int l, Epetra_DataAccess CV, - const Epetra_Vector* v) const -{ - OutArgs outargs = mp_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - //if (piroParams->get("Solver Type") == "NOX" && l == ng) { - if (l == ng) { - return mp_nonlin_model->create_x_mp(CV, v); - } - else - return mp_nonlin_model->create_g_mp(l, CV, v); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::create_g_mv_mp(int l, int num_vecs, - Epetra_DataAccess CV, - const Epetra_MultiVector* v) const -{ - OutArgs outargs = mp_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - //if (piroParams->get("Solver Type") == "NOX" && l == ng) { - if (l == ng) { - return mp_nonlin_model->create_x_mv_mp(num_vecs, CV, v); - } - else - return mp_nonlin_model->create_g_mv_mp(l, num_vecs, CV, v); -} - diff --git a/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp b/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp deleted file mode 100644 index 8634b5f687b3..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp +++ /dev/null @@ -1,168 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_MP_STOKHOS_SOLVER_H -#define PIRO_EPETRA_MP_STOKHOS_SOLVER_H - -#include "EpetraExt_ModelEvaluator.h" -#include "EpetraExt_MultiComm.h" - -#include "Stokhos_MPModelEvaluator.hpp" -#include "Stokhos_MPInverseModelEvaluator.hpp" - -#include "NOX_Epetra_Observer.H" - -namespace Piro { -namespace Epetra { - - /*! - * \brief An epetra model evaluator adapter for setting up a multi-point - * solver. - */ - class StokhosMPSolver : public EpetraExt::ModelEvaluator { - public: - - /** \name Constructors/initializers */ - //@{ - - //! Constructor - StokhosMPSolver(const Teuchos::RCP& piroParams, - const Teuchos::RCP& mpParams, - const Teuchos::RCP& globalComm, - int block_size, int num_spatial_procs); - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Setup rest of model evaluator - void setup( - const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver = Teuchos::null); - - - //@} - - ~StokhosMPSolver(); - - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - - /** \brief . */ - // Teuchos::RCP create_W() const; - /** \brief . */ - EpetraExt::ModelEvaluator::InArgs createInArgs() const; - /** \brief . */ - EpetraExt::ModelEvaluator::OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - /** \name Accessors */ - //@{ - - Teuchos::RCP - get_mp_model() const { return mp_nonlin_model; } - - //! Set initial solution polynomial - void set_x_mp_init(const Stokhos::ProductEpetraVector& x_mp_in) { - mp_nonlin_model->set_x_mp_init(x_mp_in); - } - - //! Return initial MP x - Teuchos::RCP - get_x_mp_init() const { - return mp_nonlin_model->get_x_mp_init(); - } - - //! Set initial parameter polynomial - void set_p_mp_init(int i, const Stokhos::ProductEpetraVector& p_mp_in) { - mp_nonlin_model->set_p_mp_init(i, p_mp_in); - } - - //! Get initial parameter polynomial - Teuchos::RCP - get_p_mp_init(int l) const { - return mp_nonlin_model->get_p_mp_init(l); - } - - //! Create vector orthog poly using x map and owned mp map - Teuchos::RCP - create_x_mp(Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return mp_nonlin_model->create_x_mp(CV, v); - } - - //! Create vector orthog poly using p map - Teuchos::RCP - create_p_mp(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return mp_nonlin_model->create_p_mp(l, CV, v); - } - - //! Create multi-point vector using p map - Teuchos::RCP - create_p_mv_mp(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const { - return mp_nonlin_model->create_p_mv_mp(l, num_vecs, CV, v); - } - - //! Create vector orthog poly using g map - Teuchos::RCP - create_g_mp(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const; - - //! Create multi-vector orthog poly using g map - Teuchos::RCP - create_g_mv_mp(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const; - - //@} - - private: - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - - private: - - Teuchos::RCP piroParams; - Teuchos::RCP mpParams; - Teuchos::RCP product_comm; - Teuchos::RCP mp_model; - Teuchos::RCP mp_nonlin_model; - Teuchos::RCP mp_solver; - Teuchos::RCP mp_inverse_solver; - int num_mp; - - }; - -} -} -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp b/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp deleted file mode 100644 index d92fc5d92571..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosNOXObserver.hpp" -#include "Stokhos_EpetraVectorOrthogPoly.hpp" - -Piro::Epetra::StokhosNOXObserver::StokhosNOXObserver ( - const Teuchos::RCP& noxObserver_, - const Teuchos::RCP >& basis_, const Teuchos::RCP& stoch_map_, - const Teuchos::RCP& spatial_map_, - const Teuchos::RCP& product_map_, - const Teuchos::RCP& product_comm_, - const Teuchos::RCP& importer_, - int save_moments_) : - noxObserver(noxObserver_), - basis(basis_), - stoch_map(stoch_map_), - spatial_map(spatial_map_), - product_map(product_map_), - product_comm(product_comm_), - importer(importer_), - numSGBlocks(basis->size()), - save_moments(save_moments_) -{ - //if (noxObserver == Teuchos::null) cout << "XXX1" << endl; - if (save_moments > 0) - moment = Teuchos::rcp(new Epetra_Vector(*spatial_map)); - if (product_map != Teuchos::null) - overlap_vec = Teuchos::rcp(new Epetra_Vector(*product_map)); -} - -void Piro::Epetra::StokhosNOXObserver::observeSolution( - const Epetra_Vector& solution) -{ - - if (noxObserver == Teuchos::null) - return; - - // Copy into block vector, so Block access is available - overlap_vec->Import(solution, *importer, Insert); - Stokhos::EpetraVectorOrthogPoly vec_poly( - basis, stoch_map, spatial_map, product_map, product_comm, View, - *overlap_vec); - if (save_moments <= 0) { - for (int i=0; i< numSGBlocks; i++) { - noxObserver->observeSolution(vec_poly[i], i); - } - } - else { - // Always write out first moment - vec_poly.computeMean(*moment); - noxObserver->observeSolution(*moment, 1); - if (save_moments >= 2) { - vec_poly.computeStandardDeviation(*moment); - noxObserver->observeSolution(*moment, 2); - } - } - - -} diff --git a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp b/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp deleted file mode 100644 index 440197ed77d5..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp +++ /dev/null @@ -1,57 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOSNOXOBSERVER -#define PIRO_EPETRA_STOKHOSNOXOBSERVER - -#include "NOX_Epetra_Observer.H" -#include "EpetraExt_BlockVector.h" -#include "Epetra_Map.h" -#include "Teuchos_RCP.hpp" -#include "Stokhos_OrthogPolyBasis.hpp" -#include "EpetraExt_MultiComm.h" -#include "Epetra_Import.h" - -namespace Piro { -namespace Epetra { - -class StokhosNOXObserver : public NOX::Epetra::Observer -{ -public: - StokhosNOXObserver ( - const Teuchos::RCP& noxObserver_, - const Teuchos::RCP >& basis_, - const Teuchos::RCP& stoch_map_, - const Teuchos::RCP& spatial_map_, - const Teuchos::RCP& product_map_, - const Teuchos::RCP& product_comm_, - const Teuchos::RCP& importer_, - int save_moments_ = -1); - - void observeSolution(const Epetra_Vector& soln); - -private: - - Teuchos::RCP noxObserver; - Teuchos::RCP > basis; - Teuchos::RCP stoch_map; - Teuchos::RCP spatial_map; - Teuchos::RCP product_map; - Teuchos::RCP product_comm; - Teuchos::RCP importer; - const int numSGBlocks; - int save_moments; - Teuchos::RCP moment; - Teuchos::RCP overlap_vec; -}; - -} -} - -#endif //PIRO_EPETRA_STOKHOSNOXOBSERVER diff --git a/packages/piro/src/Piro_Epetra_StokhosSolver.cpp b/packages/piro/src/Piro_Epetra_StokhosSolver.cpp deleted file mode 100644 index 172bb9d46b3b..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolver.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosSolver.hpp" -#include "Stokhos_Epetra.hpp" - -Piro::Epetra::StokhosSolver:: -StokhosSolver(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& globalComm) : - piroParams(piroParams_), - sg_solver_factory(piroParams_, globalComm) -{ -} - -Piro::Epetra::StokhosSolver::~StokhosSolver() -{ -} - -void -Piro::Epetra::StokhosSolver:: -setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver) -{ - sg_nonlin_model = sg_solver_factory.createSGModel(model); - const Teuchos::RCP sg_observer = - sg_solver_factory.createSGObserver(noxObserver); - const Teuchos::RCP sg_block_solver = - sg_solver_factory.createSGSolver(sg_nonlin_model, sg_observer); - sg_solver = sg_solver_factory.createSGSolverAdapter(sg_block_solver); -} - -void -Piro::Epetra::StokhosSolver:: -resetSolverParameters(const Teuchos::ParameterList& new_solver_params) -{ - sg_solver_factory.resetSolverParameters(new_solver_params); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getSpatialComm() const -{ - return sg_solver_factory.getSpatialComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getStochasticComm() const -{ - return sg_solver_factory.getStochasticComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getGlobalMultiComm() const -{ - return sg_solver_factory.getGlobalMultiComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_x_map() const -{ - return sg_solver->get_x_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_f_map() const -{ - return sg_solver->get_f_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_p_map(int l) const -{ - return sg_solver->get_p_map(l); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_g_map(int j) const -{ - return sg_solver->get_g_map(j); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_x_init() const -{ - return sg_solver->get_x_init(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_p_init(int l) const -{ - return sg_solver->get_p_init(l); -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::StokhosSolver::createInArgs() const -{ - return sg_solver->createInArgs(); -} - -EpetraExt::ModelEvaluator::OutArgs -Piro::Epetra::StokhosSolver::createOutArgs() const -{ - return sg_solver->createOutArgs(); -} - -void -Piro::Epetra::StokhosSolver::evalModel(const InArgs& inArgs, - const OutArgs& outArgs ) const -{ - sg_solver->evalModel(inArgs, outArgs); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::create_g_sg(int l, Epetra_DataAccess CV, - const Epetra_Vector* v) const -{ - OutArgs outargs = sg_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - Piro::Epetra::StokhosSolverFactory::SG_METHOD sg_method = - sg_solver_factory.getSGMethod(); - if (sg_method != Piro::Epetra::StokhosSolverFactory::SG_NI && - sg_method != Piro::Epetra::StokhosSolverFactory::SG_MPNI && - piroParams->get("Solver Type") == "NOX" && l == ng) { - return sg_nonlin_model->create_x_sg(CV, v); - } - else - return sg_nonlin_model->create_g_sg(l, CV, v); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::create_g_mv_sg(int l, int num_vecs, - Epetra_DataAccess CV, - const Epetra_MultiVector* v) const -{ - OutArgs outargs = sg_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - Piro::Epetra::StokhosSolverFactory::SG_METHOD sg_method = - sg_solver_factory.getSGMethod(); - if (sg_method != Piro::Epetra::StokhosSolverFactory::SG_NI && - sg_method != Piro::Epetra::StokhosSolverFactory::SG_MPNI && - piroParams->get("Solver Type") == "NOX" && l == ng) { - return sg_nonlin_model->create_x_mv_sg(num_vecs, CV, v); - } - else - return sg_nonlin_model->create_g_mv_sg(l, num_vecs, CV, v); -} - diff --git a/packages/piro/src/Piro_Epetra_StokhosSolver.hpp b/packages/piro/src/Piro_Epetra_StokhosSolver.hpp deleted file mode 100644 index 25c943338b3a..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolver.hpp +++ /dev/null @@ -1,154 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOS_SOLVER_H -#define PIRO_EPETRA_STOKHOS_SOLVER_H - -#include "EpetraExt_ModelEvaluator.h" -#include "Piro_Epetra_StokhosSolverFactory.hpp" - -namespace Piro { -namespace Epetra { - - class StokhosSolver : public EpetraExt::ModelEvaluator { - public: - - /** \name Constructors/initializers */ - //@{ - - //! Constructor - StokhosSolver(const Teuchos::RCP& piroParams, - const Teuchos::RCP& globalComm); - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Setup rest of model evaluator - void setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver = Teuchos::null); - - //! Reset Stokhos solver parameters - void resetSolverParameters(const Teuchos::ParameterList& new_solver_params); - - - //@} - - ~StokhosSolver(); - - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - - /** \brief . */ - // Teuchos::RCP create_W() const; - /** \brief . */ - EpetraExt::ModelEvaluator::InArgs createInArgs() const; - /** \brief . */ - EpetraExt::ModelEvaluator::OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - /** \name Accessors */ - //@{ - - Teuchos::RCP > - getBasis() const { return sg_solver_factory.getBasis(); } - - Teuchos::RCP > - getQuad() const { return sg_solver_factory.getQuad(); } - - Teuchos::RCP - get_sg_model() const { return sg_nonlin_model; } - - //! Set initial solution polynomial - void set_x_sg_init(const Stokhos::EpetraVectorOrthogPoly& x_sg_in) { - sg_nonlin_model->set_x_sg_init(x_sg_in); - } - - //! Return initial SG x - Teuchos::RCP - get_x_sg_init() const { - return sg_nonlin_model->get_x_sg_init(); - } - - //! Set initial parameter polynomial - void set_p_sg_init(int i, const Stokhos::EpetraVectorOrthogPoly& p_sg_in) { - sg_nonlin_model->set_p_sg_init(i, p_sg_in); - } - - //! Get initial parameter polynomial - Teuchos::RCP - get_p_sg_init(int l) const { - return sg_nonlin_model->get_p_sg_init(l); - } - - //! Create vector orthog poly using x map and owned sg map - Teuchos::RCP - create_x_sg(Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return sg_nonlin_model->create_x_sg(CV, v); - } - - //! Create vector orthog poly using p map - Teuchos::RCP - create_p_sg(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return sg_nonlin_model->create_p_sg(l, CV, v); - } - - //! Create vector orthog poly using g map - Teuchos::RCP - create_g_sg(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const; - - //! Create multi-vector orthog poly using g map - Teuchos::RCP - create_g_mv_sg(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const; - - //@} - - private: - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - - private: - - Teuchos::RCP piroParams; - Piro::Epetra::StokhosSolverFactory sg_solver_factory; - Teuchos::RCP sg_nonlin_model; - Teuchos::RCP sg_solver; - - }; - -} -} -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp b/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp deleted file mode 100644 index c54f76ccec49..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp +++ /dev/null @@ -1,552 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosSolver.hpp" - -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Provider.hpp" - -#include "Stokhos.hpp" -#include "Stokhos_Epetra.hpp" -#include "Stokhos_SGModelEvaluator.hpp" -#include "Stokhos_SGModelEvaluator_Interlaced.hpp" -#include "Stokhos_SGModelEvaluator_Adaptive.hpp" - -#include "Teuchos_VerboseObjectParameterListHelpers.hpp" - -#include "NOX_Epetra_ModelEvaluatorInterface.H" -#include "NOX_Epetra_LinearSystem_Stratimikos.H" -#include "NOX_Epetra_LinearSystem_MPBD.hpp" -#include "NOX_Epetra_LinearSystem_SGGS.hpp" -#include "NOX_Epetra_LinearSystem_SGJacobi.hpp" - -Piro::Epetra::StokhosSolverFactory:: -StokhosSolverFactory(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& globalComm) : - piroParams(piroParams_) -{ - // Setup VerboseObject - Teuchos::readVerboseObjectSublist(piroParams.get(), this); - Teuchos::RCP out = this->getOStream(); - Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); - - // Validate parameters - Teuchos::ParameterList& sgParams = - piroParams->sublist("Stochastic Galerkin"); - sgParams.validateParameters(*getValidSGParameters(),0); - - sgSolverParams = - //Teuchos::rcp(&(sgParams.sublist("SG Solver Parameters")),false); - Teuchos::rcp(new Teuchos::ParameterList(sgParams.sublist("SG Solver Parameters"))); - - // Get SG expansion type - std::string sg_type = sgParams.get("SG Method", "Direct"); - if (sg_type == "Direct" || sg_type == "AD") - sg_method = SG_AD; - else if (sg_type == "Global") - sg_method = SG_GLOBAL; - else if (sg_type == "Non-intrusive") - sg_method = SG_NI; - else if (sg_type == "Multi-point Non-intrusive") - sg_method = SG_MPNI; - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! Piro::Epetra::StokhosSolverFactor(): " << - "Invalid SG Method " << sg_type << std::endl); - - // Get SG ME type - std::string sg_me_type = sgParams.get("SG ModelEvaluator Method", "Default"); - if (sg_me_type == "Default") - sg_me_method = SG_ME_DEFAULT; - else if (sg_me_type == "Interlaced") - sg_me_method = SG_ME_INTERLACED; - else if (sg_me_type == "Adaptive") - sg_me_method = SG_ME_ADAPTIVE; - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! Piro::Epetra::StokhosSolverFactor(): " << - "Invalid SG ModelEvaluator Method " << sg_me_type << std::endl); - - // Create SG basis - basis = Stokhos::BasisFactory::create(sgParams); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Basis size = " << basis->size() << std::endl; - - // Create SG Quadrature - Teuchos::ParameterList& expParams = sgParams.sublist("Expansion"); - std::string exp_type = expParams.get("Type", "Quadrature"); - if (exp_type == "Quadrature" || - sg_method == SG_GLOBAL || - sg_method == SG_NI || - sg_method == SG_MPNI) { - quad = Stokhos::QuadratureFactory::create(sgParams); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Quadrature size = " << quad->size() << std::endl; - } - - // Create SG expansion & triple-product - if (sg_method != SG_NI && sg_method != SG_MPNI) { - expansion = - Stokhos::ExpansionFactory::create(sgParams); - Cijk = - sgParams.get< Teuchos::RCP > >("Triple Product Tensor"); - } - - // Create stochastic parallel distribution - int num_spatial_procs = - sgParams.get("Number of Spatial Processors", -1); - int num_stoch_blocks; - if (sg_method == SG_MPNI) - num_stoch_blocks = quad->size(); - else - num_stoch_blocks = basis->size(); - sg_comm = - Stokhos::buildMultiComm(*globalComm, num_stoch_blocks, num_spatial_procs); - sg_parallel_data = - Teuchos::rcp(new Stokhos::ParallelData(basis, Cijk, sg_comm, sgParams)); - -} - -void -Piro::Epetra::StokhosSolverFactory:: -resetSolverParameters(const Teuchos::ParameterList& new_solver_params) -{ - *sgSolverParams = new_solver_params; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGModel(const Teuchos::RCP& model_) -{ - Teuchos::ParameterList& sgParams = - piroParams->sublist("Stochastic Galerkin"); - sgParams.sublist("Basis"); - - model = model_; - - // Set up stochastic Galerkin model - Teuchos::RCP sg_model; - if (sg_method == SG_AD) { - sg_model = model; - } - else if (sg_method == SG_MPNI) { - int num_mp = quad->size(); - Teuchos::RCP mp_comm = - Stokhos::getStochasticComm(sg_comm); - Teuchos::RCP mp_block_map = - Teuchos::rcp(new Epetra_Map(num_mp, 0, *mp_comm)); - Teuchos::RCP mp_model = model; - - // Turn mp_model into an MP-nonlinear problem - Teuchos::RCP mpParams = - Teuchos::rcp(&(sgParams.sublist("MP Solver Parameters")),false); - Teuchos::RCP mp_nonlinear_model = - Teuchos::rcp(new Stokhos::MPModelEvaluator(mp_model, sg_comm, - mp_block_map, mpParams)); - - bool use_mpbd_solver = mpParams->get("Use MPBD Solver", false); - Teuchos::RCP linsys; - Teuchos::RCP nox_interface; - if (use_mpbd_solver) { - nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(mp_nonlinear_model)); - Teuchos::RCP A = - mp_nonlinear_model->create_W(); - Teuchos::RCP M = - mp_nonlinear_model->create_WPrec()->PrecOp; - Teuchos::RCP iReq = - nox_interface; - Teuchos::RCP iJac = - nox_interface; - Teuchos::RCP iPrec = - nox_interface; - - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& noxstratlsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - Teuchos::ParameterList& mpbdParams = - mpParams->sublist("MPBD Linear Solver"); - mpbdParams.sublist("Deterministic Solver Parameters") = - noxstratlsParams; - Teuchos::RCP inner_A = model->create_W(); - Teuchos::RCP inner_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - Teuchos::RCP inner_iReq = - inner_nox_interface; - Teuchos::RCP inner_iJac = - inner_nox_interface; - Teuchos::RCP inner_u = model->get_x_init(); - Teuchos::RCP inner_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - printParams, - noxstratlsParams, - inner_iJac, inner_A, *inner_u)); - linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemMPBD(printParams, - mpbdParams, - inner_linsys, - iReq, iJac, A, - model->get_x_map())); - } - - Piro::Epetra::SolverFactory solverFactory; - solverFactory.setSource(nox_interface); - solverFactory.setSource(linsys); - - // Create solver to map p -> g - const Teuchos::RCP mp_solver - = solverFactory.createSolver(piroParams, mp_nonlinear_model); - - // Create MP inverse model evaluator to map p_mp -> g_mp - Teuchos::Array mp_p_index_map = - mp_nonlinear_model->get_p_mp_map_indices(); - Teuchos::Array mp_g_index_map = - mp_nonlinear_model->get_g_mp_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - mp_nonlinear_model->get_g_mp_base_maps(); - mp_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - Teuchos::RCP mp_inverse_solver = - Teuchos::rcp(new Stokhos::MPInverseModelEvaluator(mp_solver, - mp_p_index_map, - mp_g_index_map, - base_g_maps)); - - // Create MP-based SG Quadrature model evaluator to calculate g_sg - sg_model = - Teuchos::rcp(new Stokhos::SGQuadMPModelEvaluator(mp_inverse_solver, - sg_comm, - mp_block_map)); - } - else { - Teuchos::RCP underlying_model; - if (sg_method == SG_GLOBAL) { - underlying_model = model; - } else { - Piro::Epetra::SolverFactory solverFactory; - underlying_model = solverFactory.createSolver(piroParams, model); - } - sg_model = - Teuchos::rcp(new Stokhos::SGQuadModelEvaluator(underlying_model)); - } - - // Set up SG nonlinear model - if (sg_me_method == SG_ME_DEFAULT) - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator(sg_model, basis, - quad, expansion, - sg_parallel_data, - sgSolverParams)); - else if (sg_me_method == SG_ME_INTERLACED) - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator_Interlaced(sg_model, basis, - quad, expansion, - sg_parallel_data, - sgSolverParams)); - else { - - // Get row basis vector - Teuchos::ParameterList& sgAdaptParams = - piroParams->sublist("Stochastic Galerkin").sublist("Adaptivity"); - typedef Teuchos::RCP< std::vector > > > row_basis_vec_type; - row_basis_vec_type row_basis_vec ; - if (sgAdaptParams.isParameter("Adaptive Basis Vector")) - row_basis_vec = - sgAdaptParams.get("Adaptive Basis Vector"); - - // If it isn't supplied, fill it with a uniform basis - if (row_basis_vec == Teuchos::null) { - row_basis_vec = - Teuchos::rcp(new std::vector > >(sg_model->get_x_map()->NumMyElements(), - Teuchos::rcp_dynamic_cast >(basis))); - } - - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator_Adaptive(sg_model, basis, - *row_basis_vec, - quad, expansion, - sg_parallel_data, - false,-1, - sgSolverParams)); - } - - // Set up stochastic parameters - // One sublist for each stochastic parameter *vector*, and each parameter - // vector can provide an initial set of expansion coefficients in the basis. - // This decouples the stochastic parameters from the SG basis allowing e.g., - // more stochastic parameters than fundamental r.v.'s in the basis - // (for correlation) or fewer. - Teuchos::ParameterList& sgParameters = sgParams.sublist("SG Parameters"); - bool set_initial_params = sgParameters.get("Set Initial SG Parameters", true); - if (set_initial_params) { - int num_param_vectors = - sgParameters.get("Number of SG Parameter Vectors", 1); - Teuchos::Array point(basis->dimension(), 1.0); - Teuchos::Array basis_vals(basis->size()); - basis->evaluateBases(point, basis_vals); - int idx=0; - for (int i=0; i sg_p = - sg_nonlin_model->create_p_sg(p_vec); - - // Initalize sg parameter vector - int num_params = sg_p->coefficientMap()->NumMyElements(); - for (int j=0; j initial_p_vals; - initial_p_vals = pList.get(ss2.str(),initial_p_vals); - if (initial_p_vals.size() == 0) { - // Default to mean-zero linear expansion, ie, p_j = \xi_j, - // by setting term j+1 to 1 (unnormalized) - (*sg_p)[idx+1][j] = 1.0 / basis_vals[idx+1]; - } - else - for (Teuchos::Array::size_type l=0; lset_p_sg_init(p_vec, *sg_p); - } - } - - // Setup stochastic initial guess - if (sg_method != SG_NI && sg_method != SG_MPNI) { - Teuchos::RCP sg_x = - sg_nonlin_model->create_x_sg(); - sg_x->init(0.0); - if (sg_x->myGID(0)) - (*sg_x)[0] = *(model->get_x_init()); - sg_nonlin_model->set_x_sg_init(*sg_x); - } - - return sg_nonlin_model; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGObserver(const Teuchos::RCP& noxObserver) -{ - // Set up Observer to call noxObserver for each vector block - Teuchos::RCP sgnoxObserver; - - Teuchos::ParameterList& sgParams = piroParams->sublist("Stochastic Galerkin"); - if (noxObserver != Teuchos::null && sg_method != SG_NI && sg_method != SG_MPNI) { - int save_moments = sgParams.get("Save Moments",-1); - sgnoxObserver = - Teuchos::rcp(new Piro::Epetra::StokhosNOXObserver( - noxObserver, basis, - sg_nonlin_model->get_overlap_stochastic_map(), - model->get_x_map(), - sg_nonlin_model->get_x_sg_overlap_map(), - sg_comm, sg_nonlin_model->get_x_sg_importer(), save_moments)); - } - - return sgnoxObserver; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGSolver(const Teuchos::RCP& sg_model, - const Teuchos::RCP& sg_observer) -{ - // Get SG solver type - std::string solve_type = sgSolverParams->get("SG Solver Algorithm", "Krylov"); - SG_SOLVER solve_method; - if (solve_type == "Krylov") - solve_method = SG_KRYLOV; - else if (solve_type == "Gauss-Seidel") - solve_method = SG_GS; - else if (solve_type == "Jacobi") - solve_method = SG_JACOBI; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! ENAT_SGNOXSolver(): " << - "Invalid Solver Algorithm " << solve_type << std::endl); - - Teuchos::RCP sg_block_solver; - if (sg_method != SG_NI && sg_method != SG_MPNI) { - Piro::Epetra::SolverFactory solverFactory; - - Teuchos::RCP sg_linsys = Teuchos::null; - if (solve_method==SG_GS || solve_method==SG_JACOBI) { - // Create NOX interface - Teuchos::RCP det_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - - // Create NOX linear system object - Teuchos::RCP det_u = model->get_x_init(); - Teuchos::RCP det_A = model->create_W(); - Teuchos::RCP det_iReq = det_nox_interface; - Teuchos::RCP det_iJac = det_nox_interface; - //Teuchos::ParameterList det_printParams; - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& det_printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& det_lsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - - Teuchos::RCP det_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - det_printParams, det_lsParams, det_iJac, - det_A, *det_u)); - - // Sublist for linear solver for the Newton method - //Teuchos::ParameterList& lsParams = newtonParams.sublist("Linear Solver"); - Teuchos::ParameterList& sgjacobiParams = - newtonParams.sublist("Linear Solver"); - // Create NOX interface - Teuchos::RCP nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(sg_model)); - Teuchos::RCP base_map = model->get_x_map(); - Teuchos::RCP sg_map = sg_model->get_x_map(); - Teuchos::RCP A = sg_model->create_W(); - Teuchos::RCP iReq = nox_interface; - Teuchos::RCP iJac = nox_interface; - - if (solve_method==SG_GS) { - sgjacobiParams.sublist("Deterministic Solver Parameters") = det_lsParams; - - sg_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemSGGS( - printParams, sgjacobiParams, det_linsys, iReq, iJac, - basis, sg_parallel_data, A, base_map, sg_map)); - } - - else if (solve_method==SG_JACOBI) { - sgjacobiParams.sublist("Deterministic Solver Parameters") = det_lsParams; - Teuchos::ParameterList& jacobiOpParams = - sgjacobiParams.sublist("Jacobi SG Operator"); - jacobiOpParams.set("Only Use Linear Terms", true); - sg_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemSGJacobi( - printParams, sgjacobiParams, det_linsys, iReq, iJac, - basis, sg_parallel_data, A, base_map, sg_map)); - } - - solverFactory.setSource(sg_linsys); - } - - solverFactory.setSource(sg_observer); - - // Will find preconditioner for Matrix-Free method - sg_block_solver = solverFactory.createSolver(piroParams, sg_model); - } - else - sg_block_solver = sg_model; - - return sg_block_solver; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGSolverAdapter(const Teuchos::RCP& sg_solver) -{ - // Create SG Inverse model evaluator - Teuchos::Array sg_p_index_map = sg_nonlin_model->get_p_sg_map_indices(); - Teuchos::Array sg_g_index_map = sg_nonlin_model->get_g_sg_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - sg_nonlin_model->get_g_sg_base_maps(); - // Add sg_u response function supplied by Piro::Epetra::NOXSolver - if (sg_method != SG_NI && sg_method != SG_MPNI && - piroParams->get("Solver Type") == "NOX") { - sg_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - } - Teuchos::RCP sg_adapter = - Teuchos::rcp(new Stokhos::SGInverseModelEvaluator(sg_solver, - sg_p_index_map, - sg_g_index_map, - base_g_maps)); - - return sg_adapter; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createRSModel(const Teuchos::RCP& sg_model) -{ - // Create ResponseStatistic model evaluator - Teuchos::Array< Teuchos::RCP > base_g_maps = - sg_nonlin_model->get_g_sg_base_maps(); - // Add sg_u response function supplied by Piro::Epetra::NOXSolver - if (sg_method != SG_NI && sg_method != SG_MPNI && - piroParams->get("Solver Type", "NOX") == "NOX") { - base_g_maps.push_back(model->get_x_map()); - } - Teuchos::RCP block_map = - sg_nonlin_model->get_overlap_stochastic_map(); - Teuchos::RCP rs_model = - Teuchos::rcp(new Stokhos::ResponseStatisticModelEvaluator( - sg_model, base_g_maps, basis, sg_comm, block_map)); - - return rs_model; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getSpatialComm() const -{ - return Stokhos::getSpatialComm(sg_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getStochasticComm() const -{ - return Stokhos::getStochasticComm(sg_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getGlobalMultiComm() const -{ - return sg_comm; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory::getValidSGParameters() const -{ - Teuchos::RCP validPL = - Teuchos::rcp(new Teuchos::ParameterList("ValidSGParams"));; - validPL->sublist("SG Parameters", false, ""); - validPL->sublist("SG Solver Parameters", false, ""); - validPL->sublist("MP Solver Parameters", false, ""); - validPL->sublist("Basis", false, ""); - validPL->sublist("Pseudospectral Operator", false, ""); - validPL->sublist("Expansion", false, ""); - validPL->sublist("Quadrature", false, ""); - validPL->set("SG Method", "",""); - validPL->set("SG ModelEvaluator Method", "",""); - validPL->set("Triple Product Size", "",""); - validPL->set("Rebalance Stochastic Graph", false, ""); - validPL->set("Save Moments", -1, "Set to 2 for Mean and Variance. Default writes Coeffs"); - validPL->set("Number of Spatial Processors", -1, ""); - validPL->sublist("Isorropia", false, ""); - validPL->sublist("Response KL", false, ""); - validPL->sublist("Adaptivity", false, ""); - - return validPL; -} diff --git a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp b/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp deleted file mode 100644 index 7a9c6de6c171..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp +++ /dev/null @@ -1,148 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOS_SOLVER_FACTORY_H -#define PIRO_EPETRA_STOKHOS_SOLVER_FACTORY_H - -#include "EpetraExt_ModelEvaluator.h" -#include "Teuchos_VerboseObject.hpp" -#include "Piro_Epetra_StokhosNOXObserver.hpp" - -#include "Stokhos_SGModelEvaluatorBase.hpp" -#include "Stokhos_SGInverseModelEvaluator.hpp" -#include "Stokhos_OrthogPolyBasis.hpp" -#include "Stokhos_Quadrature.hpp" -#include "Stokhos_OrthogPolyExpansion.hpp" -#include "Stokhos_Sparse3Tensor.hpp" -#include "Stokhos_ParallelData.hpp" -#include "EpetraExt_MultiComm.h" - -namespace Piro { -namespace Epetra { - - class StokhosSolverFactory : - public Teuchos::VerboseObject { - public: - - //! SG method - enum SG_METHOD { - SG_AD, - SG_GLOBAL, - SG_NI, - SG_MPNI - }; - - //! SG ModelEvaluator method - enum SG_ME_METHOD { - SG_ME_DEFAULT, - SG_ME_INTERLACED, - SG_ME_ADAPTIVE - }; - - //! Constructor - StokhosSolverFactory(const Teuchos::RCP& piroParams, - const Teuchos::RCP& globalComm); - - //! Reset Stokhos solver parameters - void resetSolverParameters(const Teuchos::ParameterList& new_solver_params); - - /** \name Factory methods */ - //@{ - - //! Create stochastic model evaluator - Teuchos::RCP createSGModel( - const Teuchos::RCP& model); - - //! Create stochastic observer - Teuchos::RCP createSGObserver( - const Teuchos::RCP& noxObserver); - - //! Create stochastic solver - Teuchos::RCP createSGSolver( - const Teuchos::RCP& sg_model, - const Teuchos::RCP& sg_observer = Teuchos::null); - - //! Create stochastic solver adapter - Teuchos::RCP createSGSolverAdapter( - const Teuchos::RCP& sg_solver); - - //! Create response statistic model evaluator - Teuchos::RCP createRSModel( - const Teuchos::RCP& sg_model); - - //@} - - /** \name Accessors */ - //@{ - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Get stochastic basis - Teuchos::RCP > - getBasis() const { return basis; } - - //! Get quadrature rule - Teuchos::RCP > - getQuad() const { return quad; } - - //! Get SG method - SG_METHOD getSGMethod() const { return sg_method; } - - //! Get SG ME method - SG_ME_METHOD getSGMEMethod() const { return sg_me_method; } - - Teuchos::RCP > - getExpansion() const { return expansion; } - - Teuchos::RCP getParallelData() const - { return sg_parallel_data; } - - //@} - - private: - - //! Get valid parameters - Teuchos::RCP - getValidSGParameters() const; - - private: - - enum SG_SOLVER { - SG_KRYLOV, - SG_GS, - SG_JACOBI - }; - - Teuchos::RCP piroParams; - Teuchos::RCP sgSolverParams; - - SG_METHOD sg_method; - SG_ME_METHOD sg_me_method; - Teuchos::RCP > basis; - Teuchos::RCP > quad; - Teuchos::RCP > expansion; - Teuchos::RCP > Cijk; - Teuchos::RCP sg_comm; - Teuchos::RCP sg_parallel_data; - - Teuchos::RCP model; - Teuchos::RCP sg_nonlin_model; - - }; - -} -} -#endif diff --git a/packages/piro/test/MockModelEval_C.cpp b/packages/piro/test/MockModelEval_C.cpp index f6acef19d2e7..20cb651ffc32 100644 --- a/packages/piro/test/MockModelEval_C.cpp +++ b/packages/piro/test/MockModelEval_C.cpp @@ -13,10 +13,6 @@ #include "Epetra_LocalMap.h" #include "Epetra_CrsMatrix.h" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#endif - using Teuchos::RCP; using Teuchos::rcp; @@ -146,15 +142,6 @@ MockModelEval_C::createInArgs() const inArgs.set_Np(1); inArgs.setSupports(IN_ARG_x, true); -#ifdef HAVE_PIRO_STOKHOS - inArgs.setSupports(IN_ARG_x_sg, true); - inArgs.setSupports(IN_ARG_x_dot_sg, true); - inArgs.setSupports(IN_ARG_p_sg, 0, true); // 1 SG parameter vector - inArgs.setSupports(IN_ARG_sg_basis, true); - inArgs.setSupports(IN_ARG_sg_quadrature, true); - inArgs.setSupports(IN_ARG_sg_expansion, true); -#endif - return inArgs; } @@ -173,15 +160,6 @@ MockModelEval_C::createOutArgs() const outArgs.setSupports(OUT_ARG_DgDx, 0, DERIV_TRANS_MV_BY_ROW); outArgs.setSupports(OUT_ARG_DgDp, 0, 0, DERIV_MV_BY_COL); -#ifdef HAVE_PIRO_STOKHOS - outArgs.setSupports(OUT_ARG_f_sg, true); - outArgs.setSupports(OUT_ARG_W_sg, true); - outArgs.setSupports(OUT_ARG_g_sg, 0, true); - outArgs.setSupports(OUT_ARG_DfDp_sg, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDx_sg, 0, DERIV_TRANS_MV_BY_ROW); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 0, DERIV_MV_BY_COL); -#endif - return outArgs; } @@ -253,86 +231,4 @@ MockModelEval_C::evalModel(const InArgs& inArgs, const OutArgs& outArgs) const (*dgdp)[0][0] = p; } } - - // - // Stochastic calculation - // - -#ifdef HAVE_PIRO_STOKHOS - // Parse InArgs - RCP > basis = - inArgs.get_sg_basis(); - RCP > expn = - inArgs.get_sg_expansion(); - InArgs::sg_const_vector_t x_sg = inArgs.get_x_sg(); - InArgs::sg_const_vector_t p_sg = inArgs.get_p_sg(0); - - Stokhos::OrthogPolyApprox x(basis), x2(basis); - if (x_sg != Teuchos::null && proc == 0) { - for (int i=0; isize(); i++) { - x[i] = (*x_sg)[i][0]; - } - expn->times(x2, x, x); - } - - Stokhos::OrthogPolyApprox p(basis), p2(basis); - if (p_sg != Teuchos::null) { - for (int i=0; isize(); i++) { - p[i] = (*p_sg)[i][0]; - } - expn->times(p2, p, p); - } - - // Parse OutArgs - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*f_sg)[block][0] = 0.5*(x2[block] - p2[block]); - } - } - - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - Teuchos::RCP W = - Teuchos::rcp_dynamic_cast(W_sg->getCoeffPtr(block), - true); - int i = 0; - int ret = W->ReplaceMyValues(i, 1, &x[block], &i); - if (ret != 0) - std::cout << "ReplaceMyValues returned " << ret << "!" << std::endl; - } - } - - RCP dfdp_sg = - outArgs.get_DfDp_sg(0).getMultiVector(); - if (dfdp_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dfdp_sg)[block][0][0] = -p[block]; - } - } - - OutArgs::sg_vector_t g_sg = outArgs.get_g_sg(0); - if (g_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*g_sg)[block][0] = 0.5*(x2[block] + p2[block]); - } - } - - RCP dgdx_sg = - outArgs.get_DgDx_sg(0).getMultiVector(); - if (dgdx_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dgdx_sg)[block][0][0] = x[block]; - } - } - - RCP dgdp_sg = - outArgs.get_DgDp_sg(0,0).getMultiVector(); - if (dgdp_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dgdp_sg)[block][0][0] = p[block]; - } - } -#endif } diff --git a/packages/piro/test/MockModelEval_D.cpp b/packages/piro/test/MockModelEval_D.cpp index 14a3ed668f8b..3cdcfdc29b7f 100644 --- a/packages/piro/test/MockModelEval_D.cpp +++ b/packages/piro/test/MockModelEval_D.cpp @@ -13,10 +13,6 @@ #include "Epetra_LocalMap.h" #include "Epetra_CrsMatrix.h" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#endif - using Teuchos::RCP; using Teuchos::rcp; @@ -157,15 +153,6 @@ createInArgs() const inArgs.set_Np(2); inArgs.setSupports(IN_ARG_x, true); -#ifdef HAVE_PIRO_STOKHOS - inArgs.setSupports(IN_ARG_x_sg, true); - inArgs.setSupports(IN_ARG_p_sg, 0, true); - inArgs.setSupports(IN_ARG_p_sg, 1, true); - inArgs.setSupports(IN_ARG_sg_basis, true); - inArgs.setSupports(IN_ARG_sg_quadrature, true); - inArgs.setSupports(IN_ARG_sg_expansion, true); -#endif - return inArgs; } @@ -187,17 +174,6 @@ createOutArgs() const outArgs.setSupports(OUT_ARG_DgDp, 0, 0, DERIV_MV_BY_COL); outArgs.setSupports(OUT_ARG_DgDp, 0, 1, DERIV_MV_BY_COL); -#ifdef HAVE_PIRO_STOKHOS - outArgs.setSupports(OUT_ARG_f_sg, true); - outArgs.setSupports(OUT_ARG_W_sg, true); - outArgs.setSupports(OUT_ARG_g_sg, 0, true); - outArgs.setSupports(OUT_ARG_DfDp_sg, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DfDp_sg, 1, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDx_sg, 0, DERIV_TRANS_MV_BY_ROW); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 1, DERIV_MV_BY_COL); -#endif - return outArgs; } @@ -282,96 +258,4 @@ evalModel(const InArgs& inArgs, const OutArgs& outArgs) const (*dgdp2)[0][0] = 0.0; } } - - // - // Stochastic calculation - // - -#ifdef HAVE_PIRO_STOKHOS - // Parse InArgs - RCP > basis = - inArgs.get_sg_basis(); - RCP > expn = - inArgs.get_sg_expansion(); - InArgs::sg_const_vector_t x_sg = inArgs.get_x_sg(); - InArgs::sg_const_vector_t p1_sg = inArgs.get_p_sg(0); - InArgs::sg_const_vector_t p2_sg = inArgs.get_p_sg(1); - - // Parse OutArgs - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*f_sg)[block][0] = - (*x_sg)[block][0] - (*p1_sg)[block][0] + (*p2_sg)[block][0]; - } - } - - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null) { - W_sg->init(0.0); - Teuchos::RCP W = - Teuchos::rcp_dynamic_cast(W_sg->getCoeffPtr(0), - true); - if (proc == 0) { - int i = 0; - double val = 1.0; - W->ReplaceMyValues(i, 1, &val, &i); - } - } - - RCP dfdp1_sg = - outArgs.get_DfDp_sg(0).getMultiVector(); - if (dfdp1_sg != Teuchos::null) { - dfdp1_sg->init(0.0); - if (proc == 0) { - (*dfdp1_sg)[0][0][0] = -1.0; - } - } - RCP dfdp2_sg = - outArgs.get_DfDp_sg(1).getMultiVector(); - if (dfdp2_sg != Teuchos::null) { - dfdp2_sg->init(0.0); - if (proc == 0) { - (*dfdp2_sg)[0][0][0] = 1.0; - } - } - - Stokhos::OrthogPolyApprox x(basis); - if (x_sg != Teuchos::null && proc == 0) { - for (int i=0; isize(); i++) { - x[i] = (*x_sg)[i][0]; - } - } - - OutArgs::sg_vector_t g_sg = outArgs.get_g_sg(0); - if (g_sg != Teuchos::null && proc == 0) { - Stokhos::OrthogPolyApprox xinv(basis); - expn->divide(xinv, 1.0, x); - for (int block=0; blocksize(); block++) { - (*g_sg)[block][0] = xinv[block]; - } - } - - RCP dgdx_sg = - outArgs.get_DgDx_sg(0).getMultiVector(); - if (dgdx_sg != Teuchos::null && proc == 0) { - Stokhos::OrthogPolyApprox x2(basis), x2inv(basis); - expn->times(x2, x, x); - expn->divide(x2inv, -1.0, x2); - for (int block=0; blocksize(); block++) { - (*dgdx_sg)[block][0][0] = x2inv[block]; - } - } - - RCP dgdp1_sg = - outArgs.get_DgDp_sg(0,0).getMultiVector(); - if (dgdp1_sg != Teuchos::null) { - dgdp1_sg->init(0.0); - } - RCP dgdp2_sg = - outArgs.get_DgDp_sg(0,1).getMultiVector(); - if (dgdp2_sg != Teuchos::null) { - dgdp2_sg->init(0.0); - } -#endif } diff --git a/packages/piro/test/Piro_UnitTests.cpp b/packages/piro/test/Piro_UnitTests.cpp index fb58dd0be75a..c3a323a31090 100644 --- a/packages/piro/test/Piro_UnitTests.cpp +++ b/packages/piro/test/Piro_UnitTests.cpp @@ -15,20 +15,6 @@ #include "Piro_ConfigDefs.hpp" #ifdef HAVE_PIRO_NOX #include "Piro_Epetra_NOXSolver.hpp" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#include "Piro_Epetra_StokhosSolverFactory.hpp" -#include "MockModelEval_C.hpp" - -#include "Piro_Epetra_StokhosSolver.hpp" -#include "Piro_Epetra_NECoupledModelEvaluator.hpp" -#include "MockModelEval_D.hpp" - -#include "Thyra_EpetraModelEvaluator.hpp" -#include "Piro_PerformAnalysis.hpp" -#include "Thyra_VectorBase.hpp" -#include "Thyra_DetachedVectorView.hpp" -#endif #endif #include "Piro_Epetra_SolverFactory.hpp" @@ -117,38 +103,6 @@ void testSensitivities(const std::string& inputFile, } } -#ifdef HAVE_PIRO_STOKHOS -int testResponses(const Epetra_Vector& g, - const Teuchos::Array testValues, - double absTol, double relTol, - const std::string& tag, - Teuchos::FancyOStream& out) -{ - int failures = 0; - TEUCHOS_TEST_FOR_EXCEPTION(g.MyLength() != testValues.size(), - std::logic_error, - tag << " Test Values array has size " << - testValues.size() << "but expected size " << - g.MyLength()); - for (int i=0; i default_out = - Teuchos::VerboseObjectBase::getDefaultOStream(); - Teuchos::VerboseObjectBase::setDefaultOStream(rcp(&out,false)); - - // Create a communicator for Epetra objects - RCP globalComm; -#ifdef HAVE_MPI - globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); -#else - globalComm = rcp(new Epetra_SerialComm); -#endif - - std::string xml_filename = "input_SGSolve.xml"; - - // Set up application parameters - RCP appParams = - Teuchos::getParametersFromXmlFile(xml_filename); - - // Create stochastic Galerkin solver factory - RCP piroParams = - rcp(&(appParams->sublist("Piro")),false); - setOStream(rcp(&out,false), *piroParams); - Piro::Epetra::StokhosSolverFactory sg_solver_factory(piroParams, - globalComm); - - // Get comm for spatial problem - RCP app_comm = sg_solver_factory.getSpatialComm(); - - // Create application model evaluator - RCP model = rcp(new MockModelEval_C(app_comm)); - - // Setup rest of solver - RCP sg_model = - sg_solver_factory.createSGModel(model); - RCP sg_solver = - sg_solver_factory.createSGSolver(sg_model); - RCP rs_model = - sg_solver_factory.createRSModel(sg_solver); - - // Evaluate SG responses at SG parameters - EpetraExt::ModelEvaluator::InArgs sg_inArgs = rs_model->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs sg_outArgs = - rs_model->createOutArgs(); - int p_index = 1; // PC expansion coefficients of params - int g_index = 0; - int num_g = 2; - int x_index = num_g-1; - int g_mean_index = g_index + num_g; - int g_var_index = g_index + 2*num_g; - RCP p_init = rs_model->get_p_init(p_index); - RCP g = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_index)))); - RCP x = - rcp(new Epetra_Vector(*(rs_model->get_g_map(x_index)))); - RCP g_mean = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_mean_index)))); - RCP g_var = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_var_index)))); - RCP dgdp_mean = - rcp(new Epetra_MultiVector( - *(rs_model->get_p_map(p_index)), - rs_model->get_g_map(g_mean_index)->NumMyElements())); - RCP dgdp_var = - rcp(new Epetra_MultiVector( - *(rs_model->get_p_map(p_index)), - rs_model->get_g_map(g_var_index)->NumMyElements())); - - sg_outArgs.set_g(g_index, g); - sg_outArgs.set_g(x_index, x); - sg_outArgs.set_g(g_mean_index, g_mean); - sg_outArgs.set_g(g_var_index, g_var); - sg_outArgs.set_DgDp( - g_mean_index, p_index, - EpetraExt::ModelEvaluator::Derivative( - dgdp_mean, - EpetraExt::ModelEvaluator::DERIV_TRANS_MV_BY_ROW - ) - ); - sg_outArgs.set_DgDp( - g_var_index, p_index, - EpetraExt::ModelEvaluator::Derivative( - dgdp_var, - EpetraExt::ModelEvaluator::DERIV_TRANS_MV_BY_ROW - ) - ); - - rs_model->evalModel(sg_inArgs, sg_outArgs); - - // Test derivatives with finite differences - double delta = 1.0e-6; - int num_p = rs_model->get_p_map(p_index)->NumMyElements(); - int num_resp = model->get_g_map(g_index)->NumMyElements(); - Teuchos::RCP p_pert = - Teuchos::rcp(new Epetra_Vector((*rs_model->get_p_map(p_index)))); - Teuchos::RCP g_mean_pert = - Teuchos::rcp(new Epetra_Vector(*(rs_model->get_g_map(g_mean_index)))); - Teuchos::RCP g_var_pert = - Teuchos::rcp(new Epetra_Vector(*(rs_model->get_g_map(g_var_index)))); - Teuchos::RCP dgdp_mean_fd = - Teuchos::rcp(new Epetra_MultiVector(*(rs_model->get_p_map(p_index)), - num_resp)); - Teuchos::RCP dgdp_var_fd = - Teuchos::rcp(new Epetra_MultiVector(*(rs_model->get_p_map(p_index)), - num_resp)); - EpetraExt::ModelEvaluator::InArgs sg_inArgs_pert = - rs_model->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs sg_outArgs_pert = - rs_model->createOutArgs(); - sg_inArgs_pert.set_p(p_index, p_pert); - sg_outArgs_pert.set_g(g_mean_index, g_mean_pert); - sg_outArgs_pert.set_g(g_var_index, g_var_pert); - for (int i=0; iPutScalar(0.0); - g_var_pert->PutScalar(0.0); - - // Compute perturbed g - rs_model->evalModel(sg_inArgs_pert, sg_outArgs_pert); - - // Compute FD derivatives - for (int j=0; j piroParams1 = - Teuchos::getParametersFromXmlFile(problem1_filename); - setOStream(rcp(&out,false), *piroParams1); - RCP model1 = rcp(new MockModelEval_D(globalComm)); - - // Setup problem 2 - RCP piroParams2 = - Teuchos::getParametersFromXmlFile(problem2_filename); - setOStream(rcp(&out,false), *piroParams2); - RCP model2 = rcp(new MockModelEval_D(globalComm)); - - // Setup coupled model - RCP coupledParams = - Teuchos::getParametersFromXmlFile(coupled_filename); - setOStream(rcp(&out,false), *coupledParams); - Teuchos::Array< RCP > models(2); - models[0] = model1; models[1] = model2; - Teuchos::Array< RCP > piroParams(2); - piroParams[0] = piroParams1; piroParams[1] = piroParams2; - RCP network_model = - rcp(new Piro::Epetra::ParamToResponseNetworkModel); - RCP coupledModel = - rcp(new Piro::Epetra::NECoupledModelEvaluator(models, piroParams, - network_model, - coupledParams, globalComm)); - coupledModel->setOStream(rcp(&out,false)); - - // Setup solver - Piro::Epetra::SolverFactory solverFactory; - RCP coupledSolver = - solverFactory.createSolver(coupledParams, coupledModel); - - // Solve coupled system - EpetraExt::ModelEvaluator::InArgs inArgs = coupledSolver->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs outArgs = coupledSolver->createOutArgs(); - for (int i=0; iget_p_init(i)); - for (int i=0; i g = - rcp(new Epetra_Vector(*(coupledSolver->get_g_map(i)))); - outArgs.set_g(i, g); - } - coupledSolver->evalModel(inArgs, outArgs); - - // Regression tests - int failures = 0; - Teuchos::ParameterList& testParams = - coupledParams->sublist("Regression Tests"); - double relTol = testParams.get("Relative Tolerance", 1.0e-3); - double absTol = testParams.get("Absolute Tolerance", 1.0e-8); - - // Print results - for (int i=0; i g = outArgs.get_g(i); - if (g != Teuchos::null) { - out << "Response vector " << i << ":" << std::endl; - g->Print(out); - - // Test response - std::stringstream ss1; - ss1 << "Response " << i << " Test Values"; - bool testResponse = - testParams.isType< Teuchos::Array >(ss1.str()); - if (testResponse) { - Teuchos::Array testValues = - testParams.get >(ss1.str()); - failures += testResponses(*g, testValues, absTol, relTol, "Response", - out); - } - - } - } - - success = failures == 0; - Teuchos::VerboseObjectBase::setDefaultOStream(default_out); -} - -TEUCHOS_UNIT_TEST( Piro, SGCoupled ) -{ - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; - - RCP default_out = - Teuchos::VerboseObjectBase::getDefaultOStream(); - Teuchos::VerboseObjectBase::setDefaultOStream(rcp(&out,false)); - - // Create a communicator for Epetra objects - RCP globalComm; -#ifdef HAVE_MPI - globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); -#else - globalComm = rcp(new Epetra_SerialComm); -#endif - - std::string problem1_filename = "input_problem1_sg.xml"; - std::string problem2_filename = "input_problem2_sg.xml"; - std::string coupled_filename = "input_coupled_sg.xml"; - - // Setup stochastic coupled problem to get spatial comm's - RCP coupledParams = - Teuchos::getParametersFromXmlFile(coupled_filename); - setOStream(rcp(&out,false), *coupledParams); - RCP coupledSolver = - rcp(new Piro::Epetra::StokhosSolver(coupledParams, globalComm)); - RCP app_comm = coupledSolver->getSpatialComm(); - - // Setup problem 1 - RCP piroParams1 = - Teuchos::getParametersFromXmlFile(problem1_filename); - setOStream(rcp(&out,false), *piroParams1); - RCP model1 = rcp(new MockModelEval_D(app_comm)); - - // Setup problem 2 - RCP piroParams2 = - Teuchos::getParametersFromXmlFile(problem2_filename); - setOStream(rcp(&out,false), *piroParams2); - RCP model2 = rcp(new MockModelEval_D(app_comm)); - - // Setup coupled model - Teuchos::Array< RCP > models(2); - models[0] = model1; models[1] = model2; - Teuchos::Array< RCP > piroParams(2); - piroParams[0] = piroParams1; piroParams[1] = piroParams2; - RCP network_model = - rcp(new Piro::Epetra::ParamToResponseNetworkModel); - RCP coupledModel = - rcp(new Piro::Epetra::NECoupledModelEvaluator(models, piroParams, - network_model, - coupledParams, globalComm)); - coupledModel->setOStream(rcp(&out,false)); - - // Setup solver - coupledSolver->setup(coupledModel); - - Teuchos::RCP x_sg_init = - coupledSolver->get_x_sg_init(); - Teuchos::RCP x_sg_init_new = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly(*x_sg_init)); - Teuchos::RCP > basis = - coupledSolver->getBasis(); - for (int i=0; idimension(); i++) - (*x_sg_init_new)[i+1].PutScalar(1.0); - coupledSolver->set_x_sg_init(*x_sg_init_new); - - // Solve coupled system - EpetraExt::ModelEvaluator::InArgs inArgs = coupledSolver->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs outArgs = coupledSolver->createOutArgs(); - for (int i=0; iget_p_sg_init(i)); - for (int i=0; i g_sg = - coupledSolver->create_g_sg(i); - outArgs.set_g_sg(i, g_sg); - } - coupledSolver->evalModel(inArgs, outArgs); - - // Regression tests - int failures = 0; - Teuchos::ParameterList& testParams = - coupledParams->sublist("Regression Tests"); - double relTol = testParams.get("Relative Tolerance", 1.0e-3); - double absTol = testParams.get("Absolute Tolerance", 1.0e-8); - - - // Print results - for (int i=0; i g_sg = - outArgs.get_g_sg(i); - if (g_sg != Teuchos::null) { - Epetra_Vector g_mean(*(coupledSolver->get_g_map(i))); - Epetra_Vector g_std_dev(*(coupledSolver->get_g_map(i))); - g_sg->computeMean(g_mean); - g_sg->computeStandardDeviation(g_std_dev); - out.precision(12); - out << "Response " << i << " Mean = " << std::endl - << g_mean << std::endl; - out << "Response " << i << " Std. Dev. = " << std::endl - << g_std_dev << std::endl; - out << "Response vector " << i << ":" << std::endl - << *(outArgs.get_g_sg(i)) << std::endl; - - // Test mean - std::stringstream ss1; - ss1 << "Response " << i << " Mean Test Values"; - bool testMean = - testParams.isType< Teuchos::Array >(ss1.str()); - if (testMean) { - Teuchos::Array testValues = - testParams.get >(ss1.str()); - failures += testResponses(g_mean, testValues, absTol, relTol, "Mean", - out); - } - - // Test std. dev. - std::stringstream ss2; - ss2 << "Response " << i << " Standard Deviation Test Values"; - bool testSD = - testParams.isType< Teuchos::Array >(ss2.str()); - if (testSD) { - Teuchos::Array testValues = - testParams.get >(ss2.str()); - failures += testResponses(g_std_dev, testValues, absTol, relTol, - "Standard Deviation", out); - } - - } - } - } - - success = failures == 0; - Teuchos::VerboseObjectBase::setDefaultOStream(default_out); -} -#endif #endif TEUCHOS_UNIT_TEST( Piro, Basic ) From f590a0aac5cc5c29e177fcecdecd0856fa0f92a2 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 11 Dec 2024 01:11:27 -0700 Subject: [PATCH 47/50] Tacho : function to return nnz of factors Signed-off-by: iyamazaki --- .../tacho/example/Tacho_ExampleDriver.hpp | 1 + .../shylu_node/tacho/src/Tacho_Driver.hpp | 2 ++ .../tacho/src/impl/Tacho_Driver_Impl.hpp | 20 +++++++++++-------- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 14 +++++++++++-- .../tacho/src/impl/Tacho_SymbolicTools.cpp | 1 + .../tacho/src/impl/Tacho_SymbolicTools.hpp | 1 + 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp index dce1a645b801..613d7f25bd19 100644 --- a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp +++ b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp @@ -257,6 +257,7 @@ template int driver(int argc, char *argv[]) { std::cout << std::endl; std::cout << " Initi Time " << initi_time << std::endl; + std::cout << " > nnz = " << solver.getNumNonZerosU() << std::endl; std::cout << " Facto Time " << facto_time / (double)nfacts << std::endl; std::cout << " Solve Time " << solve_time / (double)nsolves << std::endl; std::cout << std::endl; diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp index 17f871051458..29cd41feeb38 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp @@ -113,6 +113,7 @@ template struct Driver { ordinal_type_array_host _h_peri_graph; // ** symbolic factorization output + ordinal_type _nnz_u; // supernodes output ordinal_type _nsupernodes; ordinal_type_array _supernodes; @@ -217,6 +218,7 @@ template struct Driver { /// /// get interface /// + ordinal_type getNumNonZerosU() const; ordinal_type getNumSupernodes() const; ordinal_type_array getSupernodes() const; ordinal_type_array getPermutationVector() const; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp index 605130e14fa9..0fb38ab2f4cf 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp @@ -24,7 +24,7 @@ template Driver::Driver() : _method(1), _order_connected_graph_separately(1), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(), _h_perm(), _peri(), _h_peri(), _m_graph(0), _nnz_graph(0), _h_ap_graph(), _h_aj_graph(), _h_perm_graph(), - _h_peri_graph(), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1), + _h_peri_graph(), _nnz_u(0), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1), _mb(-1), _nb(-1), _front_update_mode(-1), _levelset(0), _device_level_cut(0), _device_factor_thres(128), _device_solve_thres(128), _variant(2), _nstreams(16), _pivot_tol(0.0), _max_num_superblocks(-1) {} @@ -173,6 +173,7 @@ template void Driver::useDefaultPivotToleranc /// /// get interface /// +template ordinal_type Driver::getNumNonZerosU() const { return _nnz_u; } template ordinal_type Driver::getNumSupernodes() const { return _nsupernodes; } template typename Driver::ordinal_type_array Driver::getSupernodes() const { @@ -192,11 +193,11 @@ typename Driver::ordinal_type_array Driver::getInversePermutatio // internal only template int Driver::analyze() { int r_val(0); - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { /// do nothing if (_verbose) { - printf("TachoSolver: Analyze\n"); - printf("====================\n"); + printf("TachoSolver: Analyze (Small Problem)\n"); + printf("====================================\n"); printf(" Linear system A\n"); printf(" number of equations: %10d\n", _m); printf("\n"); @@ -255,6 +256,7 @@ template int Driver::analyze_linear_system() symbolic_tools_type S(_m, _h_ap, _h_aj, _h_perm, _h_peri); S.symbolicFactorize(_verbose); + _nnz_u = S.NumNonzerosU(); _nsupernodes = S.NumSupernodes(); _stree_level = S.SupernodesTreeLevel(); _stree_roots = S.SupernodesTreeRoots(); @@ -300,6 +302,7 @@ template int Driver::analyze_condensed_graph( S.symbolicFactorize(_verbose); S.evaporateSymbolicFactors(_h_aw_graph, _verbose); + _nnz_u = S.NumNonzerosU(); _nsupernodes = S.NumSupernodes(); _stree_level = S.SupernodesTreeLevel(); _stree_roots = S.SupernodesTreeRoots(); @@ -343,7 +346,7 @@ template int Driver::initialize() { /// /// initialize numeric tools /// - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { /// do nothing } else { /// @@ -383,7 +386,7 @@ template int Driver::factorize(const value_ty } } - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { factorize_small_host(ax); } else { _N->factorize(ax, _pivot_tol, _verbose); @@ -476,7 +479,7 @@ int Driver::solve(const value_type_matrix &x, const value_type_matrix &b } } - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { solve_small_host(x, b, t); } else { TACHO_TEST_FOR_EXCEPTION(t.extent(0) < x.extent(0) || t.extent(1) < x.extent(1), std::logic_error, @@ -566,7 +569,7 @@ void Driver::computeSpMV(const value_type_array &ax, const value_type_ma } template int Driver::exportFactorsToCrsMatrix(crs_matrix_type &A) { - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { typedef ArithTraits ats; const typename ats::mag_type zero(0); @@ -644,6 +647,7 @@ template int Driver::release() { _h_perm_graph = ordinal_type_array_host(); _h_peri_graph = ordinal_type_array_host(); + _nnz_u = 0; _nsupernodes = 0; _supernodes = ordinal_type_array(); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 4da7fc19dca7..3210bbba80db 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -2286,7 +2286,12 @@ class NumericToolsLevelSet : public NumericToolsBase { if (verbose) { printf("Summary: LevelSetTools-Variant-%d (CholeskyFactorize)\n", variant); printf("=====================================================\n"); - printf( "\n ** Team = %f s, Device = %f s, Update = %f s **\n\n",time_parallel,time_device,time_update ); + printf( "\n ** Team = %f s, Device = %f s, Update = %f s **\n",time_parallel,time_device,time_update ); + if (variant == 3) { + printf( " extractCRS with total nnzL = %d and nnzU = %d\n\n",colindL.extent(0),colindU.extent(0) ); + } else { + printf( "\n" ); + } print_stat_factor(); fflush(stdout); } @@ -4368,7 +4373,12 @@ class NumericToolsLevelSet : public NumericToolsBase { if (verbose) { printf("Summary: LevelSetTools-Variant-%d (LU Factorize)\n", variant); printf("================================================\n"); - printf( "\n ** Team = %f s, Device = %f s, Update = %f s (%d streams) **\n\n",time_parallel,time_device,time_update,_nstreams ); + printf( "\n ** Team = %f s, Device = %f s, Update = %f s (%d streams) **\n",time_parallel,time_device,time_update,_nstreams ); + if (variant == 3) { + printf( " extractCRS with total nnzL = %d and nnzU = %d\n\n",colindL.extent(0),colindU.extent(0) ); + } else { + printf( "\n" ); + } print_stat_factor(); fflush(stdout); } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp index f8e30826a1dc..6f687b8ba1a7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp @@ -665,6 +665,7 @@ SymbolicTools::SymbolicTools(const ordinal_type m, const size_type_array &ap, co const ordinal_type_array &perm, const ordinal_type_array &peri) : _m(m), _ap(ap), _aj(aj), _perm(perm), _peri(peri) {} +ordinal_type SymbolicTools::NumNonzerosU() const { return stat.nnz_u; } ordinal_type SymbolicTools::NumSupernodes() const { return _supernodes.extent(0) - 1; } ordinal_type_array SymbolicTools::Supernodes() const { return _supernodes; } size_type_array SymbolicTools::gidSuperPanelPtr() const { return _gid_super_panel_ptr; } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp index b9a7d8b5236c..2c08e42a95d2 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp @@ -169,6 +169,7 @@ class SymbolicTools { Kokkos::deep_copy(_peri, G.InvPermVector()); } + ordinal_type NumNonzerosU() const; ordinal_type NumSupernodes() const; ordinal_type_array Supernodes() const; size_type_array gidSuperPanelPtr() const; From 324ba20a99906a63253563c13fdfc202b2176322 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 11 Dec 2024 01:13:26 -0700 Subject: [PATCH 48/50] Tacho : buid with CUDA + OpenMP Signed-off-by: iyamazaki --- packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp | 6 +++--- packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp index 70910b85d8b5..7245ae403d71 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp @@ -32,10 +32,10 @@ struct CholAlgorithm { }; struct CholAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp index 2886782fba4f..103906bde63d 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp @@ -31,10 +31,10 @@ struct GemmAlgorithm { }; struct GemmAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp index 8193de22d610..0ccace7b4ffd 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp @@ -31,10 +31,10 @@ struct GemvAlgorithm { }; struct GemvAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp index 686eb93909fb..4498fb789921 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp @@ -31,10 +31,10 @@ struct HerkAlgorithm { }; struct HerkAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp index 87eb07915c69..19c9702a83ba 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp @@ -32,10 +32,10 @@ struct LDL_Algorithm { }; struct LDL_Algorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp index b1f4f33e1122..88cb90fe864c 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp @@ -32,10 +32,10 @@ struct LU_Algorithm { }; struct LU_Algorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp index 7475de0db6ab..2865c2ebd55c 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp @@ -30,10 +30,10 @@ struct TrsmAlgorithm { }; struct TrsmAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp index dd3eee93269c..20631b350e12 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp @@ -30,10 +30,10 @@ struct TrsvAlgorithm { }; struct TrsvAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; From 808d11115b9131c0835c63f9d711d3a132aeeb01 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 11 Dec 2024 15:23:20 -0500 Subject: [PATCH 49/50] Tacho : compile warnings Signed-off-by: iyamazaki --- .../shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 3210bbba80db..25068de037d7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -2288,7 +2288,7 @@ class NumericToolsLevelSet : public NumericToolsBase { printf("=====================================================\n"); printf( "\n ** Team = %f s, Device = %f s, Update = %f s **\n",time_parallel,time_device,time_update ); if (variant == 3) { - printf( " extractCRS with total nnzL = %d and nnzU = %d\n\n",colindL.extent(0),colindU.extent(0) ); + printf( " extractCRS with total nnzL = %ld and nnzU = %ld\n\n",colindL.extent(0),colindU.extent(0) ); } else { printf( "\n" ); } @@ -4375,7 +4375,7 @@ class NumericToolsLevelSet : public NumericToolsBase { printf("================================================\n"); printf( "\n ** Team = %f s, Device = %f s, Update = %f s (%d streams) **\n",time_parallel,time_device,time_update,_nstreams ); if (variant == 3) { - printf( " extractCRS with total nnzL = %d and nnzU = %d\n\n",colindL.extent(0),colindU.extent(0) ); + printf( " extractCRS with total nnzL = %ld and nnzU = %ld\n\n",colindL.extent(0),colindU.extent(0) ); } else { printf( "\n" ); } From 2bdbe49e463db0e26899f9e928ac63e1083bad4a Mon Sep 17 00:00:00 2001 From: Alan Williams Date: Fri, 13 Dec 2024 13:08:42 -0700 Subject: [PATCH 50/50] STK: Snapshot 12-13-24 13:08 from Sierra 5.23.2-429-g07a311ce Signed-off-by: Alan Williams --- packages/stk/CHANGELOG.md | 7 + .../stk/stk_doc_tests/stk_mesh/howToNgp.cpp | 8 +- .../stk_topology/shell_sides.cpp | 8 +- .../stk/stk_expreval/stk_expreval/Node.cpp | 6 +- .../stk_search/SearchMeshTest.cpp | 73 ++ packages/stk/stk_io/stk_io/InputFile.cpp | 491 ++------------ packages/stk/stk_io/stk_io/InputFile.hpp | 34 +- packages/stk/stk_io/stk_io/InputQuery.cpp | 514 ++++++++++++++ packages/stk/stk_io/stk_io/InputQuery.hpp | 115 ++++ packages/stk/stk_io/stk_io/IossBridge.cpp | 17 +- packages/stk/stk_io/stk_io/IossBridge.hpp | 9 +- packages/stk/stk_io/stk_io/MeshField.cpp | 5 + packages/stk/stk_io/stk_io/MeshField.hpp | 4 + .../stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp | 12 +- .../stk/stk_io/stk_io/StkMeshIoBroker.cpp | 8 +- .../stk/stk_io/stk_io/StkMeshIoBroker.hpp | 4 +- .../stk/stk_mesh/stk_mesh/base/Bucket.cpp | 44 +- .../stk/stk_mesh/stk_mesh/base/Bucket.hpp | 24 +- .../stk/stk_mesh/stk_mesh/base/BulkData.cpp | 30 +- .../stk/stk_mesh/stk_mesh/base/BulkData.hpp | 2 +- .../stk_mesh/stk_mesh/base/DeviceField.hpp | 79 ++- .../stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp | 421 +++++------- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp | 63 +- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp | 4 + .../stk/stk_mesh/stk_mesh/base/FieldBase.cpp | 24 +- .../stk/stk_mesh/stk_mesh/base/FieldBase.hpp | 6 +- .../stk_mesh/stk_mesh/base/GetNgpField.hpp | 32 +- .../stk/stk_mesh/stk_mesh/base/HostField.hpp | 15 +- .../stk/stk_mesh/stk_mesh/base/HostMesh.hpp | 8 + .../stk/stk_mesh/stk_mesh/base/MetaData.cpp | 153 +++-- .../stk/stk_mesh/stk_mesh/base/MetaData.hpp | 30 +- packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp | 35 +- .../stk_mesh/base/NgpFieldSyncDebugger.hpp | 18 +- .../stk/stk_mesh/stk_mesh/base/NgpTypes.hpp | 12 +- .../stk_mesh/stk_mesh/base/SideSetHelper.cpp | 6 +- .../stk_mesh/base/StkFieldSyncDebugger.cpp | 82 ++- .../stk_mesh/base/StkFieldSyncDebugger.hpp | 10 + packages/stk/stk_mesh/stk_mesh/base/Types.hpp | 1 + .../stk_mesh/baseImpl/BucketConnDynamic.hpp | 11 +- .../stk_mesh/baseImpl/BucketRepository.cpp | 81 +-- .../stk_mesh/baseImpl/BucketRepository.hpp | 17 +- .../stk_mesh/baseImpl/MeshImplUtils.cpp | 9 +- .../stk_mesh/baseImpl/MeshModification.cpp | 9 +- .../stk_mesh/baseImpl/NgpMeshHostData.hpp | 5 - .../stk_mesh/stk_mesh/baseImpl/Partition.cpp | 6 +- .../stk_mesh/stk_mesh/baseImpl/Partition.hpp | 37 +- .../abstract_cdt_interface.hpp | 4 + .../stk_middle_mesh_util/create_stk_mesh.cpp | 10 +- .../stk_middle_mesh_util/stk_field_copier.cpp | 10 +- .../stk_ngp_test/GlobalReporter.hpp | 6 + .../stk_ngp_test/NgpTestDeviceMacros.hpp | 6 - .../stk_ngp_test/stk_ngp_test/ngp_test.hpp | 14 - .../stk_mesh/NgpMeshUpdate.cpp | 10 + .../stk_mesh/perfNgpFieldStateRotation.cpp | 136 ++++ .../stk_search/VolumeToOne.cpp | 32 +- .../MortonLBVH_TreeManipulationUtils.hpp | 67 +- .../stk_topology/topology_defn.hpp | 48 +- .../stk_transfer/copy_by_id/SearchById.hpp | 1 + .../copy_by_id/TransferCopyTranslator.hpp | 1 + .../TextMeshStkTopologyMapping.hpp | 13 +- .../stk_io/UnitTestGmeshFixture.cpp | 7 +- .../stk_io/UnitTestReadFieldData.cpp | 37 + .../stk_mesh/UnitTestBucket.cpp | 2 +- .../stk_mesh/UnitTestFieldDataManager.cpp | 74 +- .../stk_mesh/UnitTestFieldImpl.cpp | 13 +- .../stk_mesh/UnitTestMetaData.cpp | 6 +- .../stk_mesh/UnitTestSideSet.cpp | 373 +++++++++- .../ngp/NgpDebugFieldSync_Fixtures.hpp | 25 +- .../stk_mesh/ngp/NgpMeshTest.cpp | 146 +++- .../stk_mesh/ngp/UnitTestNgp.cpp | 14 +- .../ngp/UnitTestNgpDebugFieldSync.cpp | 21 +- ...FieldSync_AccessDuringMeshModification.cpp | 26 +- ...TestNgpDebugFieldSync_MeshModification.cpp | 30 +- ...estNgpDebugFieldSync_PartialAllocation.cpp | 15 +- .../ngp/UnitTestNgpMeshModification.cpp | 29 + .../ngp/UnitTestTransposePinnedMapped.cpp | 4 +- .../stk_mesh/ngp/ngpFieldTest.cpp | 84 ++- .../test_stk_field_copier.cpp | 11 +- .../stk_ngp_test/utest_VirtualFunction.cpp | 1 + .../stk_search/UnitTestCoarseSearchTwoBox.cpp | 40 ++ .../stk_topology/topology_test_utils.hpp | 5 +- .../utest_c/unit_test_shell_quad.cpp | 54 +- .../utest_c/unit_test_shell_tri.cpp | 37 +- .../unit_test_shell_tri_all_face_sides.cpp | 8 +- .../diag/UnitTestParallelTimerImpl.cpp | 255 +++++++ .../parallel/UnitTestParallelComm.cpp | 30 +- .../stk_util/util/UnitTestStridedArray.cpp | 3 +- packages/stk/stk_util/stk_util/Version.hpp | 2 +- .../command_line/CommandLineParser.hpp | 17 +- .../stk_util/diag/ParallelTimerImpl.cpp | 254 +++++++ .../stk_util/diag/ParallelTimerImpl.hpp | 210 ++++++ .../stk/stk_util/stk_util/diag/PrintTimer.cpp | 447 +----------- .../stk/stk_util/stk_util/diag/PrintTimer.hpp | 8 +- packages/stk/stk_util/stk_util/diag/Timer.cpp | 635 +----------------- packages/stk/stk_util/stk_util/diag/Timer.hpp | 93 ++- .../stk/stk_util/stk_util/diag/TimerImpl.cpp | 333 +++++++++ .../stk/stk_util/stk_util/diag/TimerImpl.hpp | 370 ++++++++++ .../stk_util/stk_util/environment/EnvData.cpp | 1 - .../stk_util/stk_util/environment/EnvData.hpp | 1 - .../stk_util/environment/Scheduler.cpp | 2 +- .../stk/stk_util/stk_util/ngp/NgpSpaces.hpp | 2 + .../stk_util/registry/ProductRegistry.cpp | 2 +- .../stk_util/stk_util/util/FPExceptions.hpp | 12 +- .../stk_util/stk_util/util/StkNgpVector.hpp | 19 +- .../stk_util/stk_util/util/StridedArray.hpp | 2 +- 105 files changed, 4215 insertions(+), 2492 deletions(-) create mode 100644 packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp create mode 100644 packages/stk/stk_io/stk_io/InputQuery.cpp create mode 100644 packages/stk/stk_io/stk_io/InputQuery.hpp create mode 100644 packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp create mode 100644 packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp create mode 100644 packages/stk/stk_util/stk_util/diag/TimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/TimerImpl.hpp diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 514f7e831a1a..f74c63a19424 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG +5.23.2 (STK_VERSION 5230200) 12/11/2024 + misc fixes for AMD/ROCm (ATS-4) + stk_mesh: speedup for device-field multi-state rotation + reduce stacksize (sizeof(DeviceMesh)) from ~2900 to ~470 + stk_search: misc fixes + stk_io: add query for existence of fields on database + 5.21.6-1 (STK_VERSION 5210601) 10/31/2024 stk_mesh, stk_search: more fixes for HIP unified and Cuda no-uvm builds diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp index 7bcbd60764f4..d45a371d6576 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp @@ -372,13 +372,12 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) typedef stk::ngp::TeamPolicy::member_type TeamHandleType; const auto& teamPolicy = stk::ngp::TeamPolicy(ngpMesh.num_buckets(stk::topology::ELEM_RANK), Kokkos::AUTO); - Kokkos::parallel_for(teamPolicy, KOKKOS_LAMBDA(const TeamHandleType& team) { const stk::mesh::NgpMesh::BucketType& bucket = ngpMesh.get_bucket(stk::topology::ELEM_RANK, team.league_rank()); - unsigned numElems = bucket.size(); + const unsigned numElems = bucket.size(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElems), [&] (const int& i) { @@ -409,9 +408,8 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) TEST_F(NgpHowTo, loopOverElemFaces) { - if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { - GTEST_SKIP(); - } + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); diff --git a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp index 3482ca2468c2..ba891a31e780 100644 --- a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp +++ b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp @@ -71,10 +71,10 @@ TEST(stk_topology, shell_side_topology) { EXPECT_EQ(shell.num_sides(),6u); EXPECT_EQ(shell.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(shell.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(shell.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(shell.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(5), stk::topology::LINE_2); } //end_shell_side_topo diff --git a/packages/stk/stk_expreval/stk_expreval/Node.cpp b/packages/stk/stk_expreval/stk_expreval/Node.cpp index 83916c8c7c9c..8a5ac920c8f1 100644 --- a/packages/stk/stk_expreval/stk_expreval/Node.cpp +++ b/packages/stk/stk_expreval/stk_expreval/Node.cpp @@ -82,7 +82,11 @@ double& Node::setResult() { void Node::eval() { - stk::util::clear_fp_errors(); + if (m_owner->get_fp_error_behavior() != Eval::FPErrorBehavior::Ignore) + { + stk::util::clear_fp_errors(); + } + switch (m_opcode) { case OPCODE_STATEMENT: { setResult() = m_left->getResult(); diff --git a/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp new file mode 100644 index 000000000000..d0d9c408ea6f --- /dev/null +++ b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp @@ -0,0 +1,73 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkSearch, NGP_coarse_search_mesh_elem_boxes_MORTON) +{ + using ExecSpace = Kokkos::DefaultExecutionSpace; + MPI_Comm comm = MPI_COMM_WORLD; + if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } + + stk::mesh::MeshBuilder builder(comm); + std::shared_ptr bulkPtr = builder.create(); + + stk::io::fill_mesh("generated:1x9x19|sideset:xXyYzZ", *bulkPtr); + + Kokkos::View elemBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + Kokkos::View faceBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK); + + std::cout<<"Num elem-boxes: "< searchResults; + stk::search::local_coarse_search(elemBoxes, faceBoxes, searchMethod, searchResults, ExecSpace{}); + + const size_t expectedSize = 2910; + EXPECT_EQ(expectedSize, searchResults.size())<<"expected results size: "< +#include #include // for exception #include // for copy, sort, max, find #include // for fmod @@ -80,34 +81,6 @@ namespace { bool meshFieldSort(const stk::io::MeshField& f1, const stk::io::MeshField &f2) { return f1.field()->mesh_meta_data_ordinal() < f2.field()->mesh_meta_data_ordinal(); } - - void add_missing_fields(std::vector *missingFields, - std::map missing_fields_collector) { - if (missingFields) - { - std::vector discoveredMissingFields; - for (auto missingStatedFieldIter : missing_fields_collector) - { - discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, - missingStatedFieldIter.second->db_name())); - } - std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), - [](const stk::io::MeshField &a, const stk::io::MeshField &b) { - return (a.db_name() < b.db_name()) - || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); - - for(stk::io::MeshField &missingField : *missingFields) - { - std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); - if(iter != discoveredMissingFields.end()) - { - discoveredMissingFields.erase(iter); - } - } - - missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); - } - } } namespace stk { @@ -156,11 +129,11 @@ namespace io { m_stopTime(std::numeric_limits::max()), m_periodType(CYCLIC), m_fieldsInitialized(false), - m_haveCachedEntityList(false), + m_haveCachedEntityList(false), m_multiStateSuffixes(nullptr) { STK_ThrowErrorMsgIf(m_database == nullptr || !m_database->ok(true), - "ERROR: Invalid Ioss region detected in add_mesh_database"); + "ERROR: Invalid Ioss region detected in add_mesh_database"); Ioss::DatabaseUsage db_usage = m_database->usage(); if (db_usage == Ioss::READ_RESTART) { @@ -178,8 +151,8 @@ namespace io { } STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } @@ -202,8 +175,8 @@ namespace io { m_region = std::shared_ptr(region); STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } } @@ -220,7 +193,7 @@ namespace io { if (!fieldAlreadyExists) { m_fields.push_back(mesh_field); stk::io::set_field_role(*mesh_field.field(), Ioss::Field::TRANSIENT); - m_fieldsInitialized = false; + m_fieldsInitialized = false; } } @@ -278,58 +251,9 @@ namespace io { for (size_t i=0; i < fields.size(); i++) { const Ioss::Field::RoleType* role = stk::io::get_field_role(*fields[i]); if ( role && *role == Ioss::Field::TRANSIENT ) { - add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); - } - } - } - - void InputFile::build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf) - { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - - if (entity != nullptr) { - if (f->entity_rank() == rank) { - build_field_part_associations(mf, *part, rank, entity); - process_fields_for_grouping_entity(mf, *part, entity); - - if(entity->type() == Ioss::SIDESET) { - auto io_side_set = dynamic_cast(entity); - STK_ThrowRequire(io_side_set != nullptr); - auto fbs = io_side_set->get_side_blocks(); - - for(auto& io_fblock : fbs) { - build_field_part_associations(mf, *part, rank, io_fblock); - process_fields_for_grouping_entity(mf, *part, io_fblock); - } - } - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - - node_entity = region->get_entity(nodes_name); - - if (node_entity == nullptr) { - nodes_name = part->name() + "_n"; - node_entity = region->get_entity(nodes_name); - } - - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - build_field_part_associations(mf, *part, stk::topology::NODE_RANK, node_entity); - process_fields_for_grouping_entity(mf, *part, node_entity); - } - } + add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); } + } } bool InputFile::read_input_field(stk::io::MeshField &mf, stk::mesh::BulkData &bulk) @@ -356,14 +280,15 @@ namespace io { "ERROR: Input database '" << region->get_database()->get_filename() << "' has no transient data."); + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + const stk::mesh::FieldBase *f = mf.field(); - std::vector::iterator P = mf.m_subsetParts.begin(); - while (P != mf.m_subsetParts.end()) { + + for (const stk::mesh::Part* part : mf.m_subsetParts) { // Find the Ioss::GroupingEntity corresponding to this part... mf.set_inactive(); - const stk::mesh::Part *part = *P; ++P; - build_field_part_associations_for_part(region, f, part, mf); + iq.build_field_part_associations_for_part(mf, part); if (mf.is_active()) { mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); @@ -371,35 +296,30 @@ namespace io { } if(mf.m_subsetParts.empty()) { - mf.set_inactive(); - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - if (f->entity_rank() == stk::topology::NODE_RANK) { - build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0]); - process_fields_for_grouping_entity(mf, meta.universal_part(), region->get_node_blocks()[0]); - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - const stk::mesh::Part * part = *ip; + mf.set_inactive(); + // Now handle the non-subsetted fields... - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - build_field_part_associations_for_part(region, f, part, mf); - } + // Check universal_part() NODE_RANK first... + const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::NodeBlock* nb = region->get_node_blocks()[0]; + iq.build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, nb); + iq.process_fields_for_grouping_entity(mf, meta.universal_part(), nb); } + } - if (mf.is_active()) { - mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + // Now handle all non-nodeblock parts... + for ( const stk::mesh::Part * part : meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + iq.build_field_part_associations_for_part(mf, part); } + } + + if (mf.is_active()) { + mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + } } return mf.is_active(); @@ -432,318 +352,6 @@ namespace io { return read_defined_input_fields(state_time, missingFields, bulk); } - bool InputFile::build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector) - { - bool field_is_missing = false; - stk::mesh::FieldBase *f = mesh_field.field(); - // Only add TRANSIENT Fields -- check role; if not present assume transient... - const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); - if (role == nullptr || *role == Ioss::Field::TRANSIENT) { - if (stk::io::is_field_on_part(f, rank, part)) { - const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); - FieldType field_type; - stk::io::get_io_field_type(f, res, &field_type); - if (field_type.type != Ioss::Field::INVALID) { - - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - field_is_missing = true; - if (missing_fields_collector) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector)[f->field_state(missing_state)] = &mesh_field; - } - } - } - - bool field_exists = io_entity->field_exists(db_name); - if (!field_exists) { - field_is_missing = true; - if (missing_fields_collector) { - (*missing_fields_collector)[f] = &mesh_field; - } - } - - // See if field with that name exists on io_entity... - if (field_exists) { - mesh_field.add_part(rank, part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - } - } - return field_is_missing; - } - - bool InputFile::process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr) - { - STK_ThrowRequireMsg(io_entity != nullptr, "Null IO entity"); - - bool doesFieldExist = false; - - stk::mesh::FieldBase *f = mesh_field.field(); - - stk::mesh::EntityRank rank = part_primary_entity_rank(part); - if(f->entity_rank() == rank) { - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - if (missing_fields_collector_ptr) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector_ptr)[f->field_state(missing_state)] = &mesh_field; - } - } else { - doesFieldExist = true; - } - } - - if(doesFieldExist == false) { - doesFieldExist = io_entity->field_exists(db_name); - if (!doesFieldExist) { - if (missing_fields_collector_ptr) { - (*missing_fields_collector_ptr)[f] = &mesh_field; - } - } - } - - // See if field with that name exists on io_entity... - if (doesFieldExist) { - mesh_field.add_part(f->entity_rank(), part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - - return doesFieldExist; - } - - void InputFile::build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - - for (auto &mesh_field : m_fields) - { - if(mesh_field.is_active()) { - continue; - } - - std::map missingFieldCollector; - bool doesFieldExist = false; - stk::mesh::Part &universalPart = meta.universal_part(); - Ioss::GroupingEntity * universalNodeEntity = region->get_entity("nodeblock_1"); - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, universalPart, universalNodeEntity, &missingFieldCollector); - - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - - if(io_entity == nullptr) { - continue; - } - - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_entity, &missingFieldCollector); - - if(io_entity->type() == Ioss::SIDEBLOCK || io_entity->type() == Ioss::SIDESET) - { - static const std::string s_nodeset_suffix("_n"); - - std::string ns_name = part->name(); - ns_name += s_nodeset_suffix; - Ioss::NodeSet *io_node_set = region->get_nodeset(ns_name); - if(io_node_set != nullptr) { - // Process hidden nodesets - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_node_set, &missingFieldCollector); - } - } - - if(io_entity->type() == Ioss::SIDESET) - { - Ioss::SideSet* sideSet = dynamic_cast(io_entity); - auto faceBlocks = sideSet->get_side_blocks(); - for (auto faceBlock : faceBlocks) - { - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, faceBlock, &missingFieldCollector); - } - - } - } - } - - if (!doesFieldExist) - { - num_missing_fields += missingFieldCollector.size(); - if (nullptr != missingFields) - { - add_missing_fields(missingFields, missingFieldCollector); - } - else { - for (auto missingField : missingFieldCollector) { - std::cout << "Missing field: " << missingField.second->db_name() << std::endl; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - } - - void InputFile::build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - std::map missing_fields_collector; - std::map *missing_fields_collector_ptr = - (missingFields ? &missing_fields_collector : 0); - - // Each input field will have a list of the Parts that the field exists on... - // Create this list. - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - // First handle any fields that are subsetted (restricted to a specified list of parts) - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - const stk::mesh::FieldBase *f = (*I).field(); - std::vector::iterator P = (*I).m_subsetParts.begin(); - while (P != (*I).m_subsetParts.end()) { - // Find the Ioss::GroupingEntity corresponding to this part... - const stk::mesh::Part *part = *P; ++P; - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - STK_ThrowErrorMsgIf( io_entity == nullptr, - "ERROR: For field '" << (*I).field()->name() - << "' Could not find database entity corresponding to the part named '" - << part->name() << "'."); - field_is_missing = build_field_part_associations(*I, *part, rank, io_entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - if (f->entity_rank() == stk::topology::NODE_RANK) { - bool field_is_missing = build_field_part_associations(*I, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0], missing_fields_collector_ptr); - if (field_is_missing) { - ++num_missing_fields; - } - } - } - ++I; - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - if (entity != nullptr && !m_fields.empty() && entity->type() != Ioss::SIDESET) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - field_is_missing = build_field_part_associations(*I, *part, rank, entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - - add_missing_fields(missingFields, missing_fields_collector); - } - double InputFile::map_analysis_to_db_time(double time) const { double db_time = time; @@ -773,15 +381,16 @@ namespace io { std::sort(m_fields.begin(), m_fields.end(), meshFieldSort); bool ignore_missing_fields = (missingFields != nullptr); + Ioss::Region *region = m_region.get(); if (!m_fieldsInitialized) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - (*I).set_inactive(); ++I; - } + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); - build_field_part_associations(bulk, missingFields); - build_field_part_associations_from_grouping_entity(bulk, missingFields); + for (stk::io::MeshField& mf : m_fields) { + mf.set_inactive(); + iq.build_field_part_associations(mf, missingFields); + iq.build_field_part_associations_from_grouping_entity(mf, missingFields); + } m_fieldsInitialized = true; } @@ -796,7 +405,6 @@ namespace io { STK_ThrowErrorMsgIf (m_region.get() == nullptr, "ERROR: There is no Input mesh/restart region associated with this Mesh Data."); - Ioss::Region *region = m_region.get(); // Get struct containing interval of database time(s) containing 'time' DBStepTimeInterval sti(region, db_time); @@ -859,13 +467,15 @@ namespace io { bool ignore_missing_fields = (missingFields != nullptr); + if (!m_fieldsInitialized) { + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + for (auto & meshField : m_fields) { meshField.set_inactive(); + iq.build_field_part_associations(meshField, missingFields); } - build_field_part_associations(bulk, missingFields); - m_fieldsInitialized = true; } @@ -903,5 +513,14 @@ namespace io { return time_read; } + void InputFile::initialize_input_fields() + { + for (auto & meshField : m_fields) { + meshField.set_inactive(); + meshField.clear_field_parts(); + } + + m_fieldsInitialized = false; + } } } diff --git a/packages/stk/stk_io/stk_io/InputFile.hpp b/packages/stk/stk_io/stk_io/InputFile.hpp index fe0e5c3ee239..a8c7d37e3e0f 100644 --- a/packages/stk/stk_io/stk_io/InputFile.hpp +++ b/packages/stk/stk_io/stk_io/InputFile.hpp @@ -87,10 +87,6 @@ class Part; stk::mesh::BulkData &bulk, bool useEntityListCache = false); void get_global_variable_names(std::vector &names); - void build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missing); - - void build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields); - std::shared_ptr get_input_ioss_region() { if (m_region.get() == nullptr && m_database.get() != nullptr) { @@ -138,22 +134,22 @@ class Part; return true; } + const std::vector& get_multistate_suffixes() const + { + static std::vector emptyVector; + + if(nullptr != m_multiStateSuffixes) { + return *m_multiStateSuffixes; + } + + return emptyVector; + } + + DatabasePurpose get_database_purpose() const { return m_db_purpose; } + + void initialize_input_fields(); + private: - bool process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr = nullptr); - - bool build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields = nullptr); - - void build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf); DatabasePurpose m_db_purpose; std::shared_ptr m_database; diff --git a/packages/stk/stk_io/stk_io/InputQuery.cpp b/packages/stk/stk_io/stk_io/InputQuery.cpp new file mode 100644 index 000000000000..6d865a2024bf --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.cpp @@ -0,0 +1,514 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +// ####################### Start Clang Header Tool Managed Headers ######################## +// clang-format off +#include +#include // for exception +#include // for copy, sort, max, find +#include // for fmod +#include // for size_t +#include // for operator<<, basic_ostream +#include // for numeric_limits +#include // for runtime_error +#include // for READ_RESTART, Database... +#include // for DBStepTimeInterval +#include +#include // for is_part_io_part, all_f... +#include // for MeshField, MeshField::... +#include +#include // for FieldBase, FieldBase::... +#include // for find_restriction +#include // for MetaData +#include // for filename_substitution +#include "stk_util/environment/RuntimeWarning.hpp" // for RuntimeWarningAdHoc +#include // for ThrowErrorMsgIf, Throw... +#include // for move, pair +#include "Ioss_DBUsage.h" // for DatabaseUsage, READ_MODEL +#include "Ioss_DatabaseIO.h" // for DatabaseIO +#include "Ioss_EntityType.h" // for SIDESET, EntityType +#include "Ioss_Field.h" // for Field, Field::TRANSIENT +#include "Ioss_GroupingEntity.h" // for GroupingEntity +#include "Ioss_IOFactory.h" // for IOFactory +#include "Ioss_MeshType.h" // for MeshType, MeshType::UN... +#include "Ioss_NodeBlock.h" // for NodeBlock +#include "Ioss_NodeSet.h" // for NodeSet +#include "Ioss_Property.h" // for Property +#include "Ioss_Region.h" // for Region, NodeBlockConta... +#include "Ioss_SideBlock.h" // for SideBlock +#include "Ioss_SideSet.h" // for SideSet +#include "StkIoUtils.hpp" // for part_primary_entity_rank +#include "stk_mesh/base/BulkData.hpp" // for BulkData +#include "stk_mesh/base/FieldState.hpp" // for FieldState +#include "stk_mesh/base/Part.hpp" // for Part +#include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank +#include "stk_topology/topology.hpp" // for topology, topology::NO... +// clang-format on +// ####################### End Clang Header Tool Managed Headers ######################## + +namespace { +void add_missing_fields(std::vector *missingFields, + stk::io::MissingFieldMap& missingFieldsCollector) +{ + if (nullptr != missingFields) { + std::vector discoveredMissingFields; + for (auto missingStatedFieldIter : missingFieldsCollector) + { + discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, + missingStatedFieldIter.second->db_name())); + } + std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), + [](const stk::io::MeshField &a, const stk::io::MeshField &b) { + return (a.db_name() < b.db_name()) + || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); + + for(stk::io::MeshField &missingField : *missingFields) { + std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); + if(iter != discoveredMissingFields.end()) { + discoveredMissingFields.erase(iter); + } + } + + missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); + } +} +} + +namespace stk { +namespace io { + + InputQuery::InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes) + : m_region(region), + m_meta(meta), + m_dbPurpose(dbPurpose), + m_multiStateSuffixes(multiStateSuffixes) + { + } + + bool InputQuery::build_field_part_associations(stk::io::MeshField &meshField, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollector) + { + bool fieldIsMissing = false; + stk::mesh::FieldBase *f = meshField.field(); + // Only add TRANSIENT Fields -- check role; if not present assume transient... + const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); + if (role == nullptr || *role == Ioss::Field::TRANSIENT) { + if (stk::io::is_field_on_part(f, rank, part)) { + const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); + FieldType fieldType; + stk::io::get_io_field_type(f, res, &fieldType); + + if (fieldType.type != Ioss::Field::INVALID) { + const std::string &dbName = meshField.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + fieldIsMissing = true; + if (missingFieldsCollector) { + for (stk::mesh::FieldState missingState : missingStates) + (*missingFieldsCollector)[f->field_state(missingState)] = &meshField; + } + } + } + + bool fieldExists = ioEntity->field_exists(dbName); + if (!fieldExists) { + fieldIsMissing = true; + if (missingFieldsCollector) { + (*missingFieldsCollector)[f] = &meshField; + } + } + + // See if field with that name exists on ioEntity... + if (fieldExists) { + meshField.add_part(rank, part, ioEntity); + meshField.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + meshField.set_active(); + } + } + } + } + return fieldIsMissing; + } + + int InputQuery::build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + MissingFieldMap missingFieldsCollector; + MissingFieldMap *missingFieldsCollectorPtr = (missingFields ? &missingFieldsCollector : nullptr); + + // Each input field will have a list of the Parts that the field exists on... + // Create this list. + int numMissingFields = 0; + // First handle any fields that are sub-setted (restricted to a specified list of parts) + + const stk::mesh::FieldBase *f = mf.field(); + + for (const stk::mesh::Part *part : mf.m_subsetParts) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + bool fieldIsMissing = false; + + if (f->entity_rank() == rank) { + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + STK_ThrowErrorMsgIf( ioEntity == nullptr, + "ERROR: For field '" << + mf.field()->name() << + "' Could not find database entity corresponding to the part named '" << + part->name() << "'."); + fieldIsMissing |= build_field_part_associations(mf, *part, rank, ioEntity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + + + // Now handle the non-subsetted fields... + + // Check universal_part() NODE_RANK first... + if (mf.m_subsetParts.empty()) { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = m_region.get_node_blocks()[0]; + bool fieldIsMissing = build_field_part_associations(mf, m_meta.universal_part(), stk::topology::NODE_RANK, + nodeEntity, missingFieldsCollectorPtr); + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + + // Now handle all non-nodeblock parts... + for ( stk::mesh::Part * const part : m_meta.get_parts()) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + if (entity != nullptr && entity->type() != Ioss::SIDESET) { + + if (mf.m_subsetParts.empty()) { + f = mf.field(); + bool fieldIsMissing = false; + if (f->entity_rank() == rank) { + fieldIsMissing |= build_field_part_associations(mf, *part, rank, entity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + add_missing_fields(missingFields, missingFieldsCollector); + + return numMissingFields; + } + + bool InputQuery::process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr) + { + STK_ThrowRequireMsg(ioEntity != nullptr, "Null IO entity"); + + bool doesFieldExist = false; + + stk::mesh::FieldBase *f = mf.field(); + + stk::mesh::EntityRank rank = part_primary_entity_rank(part); + if(f->entity_rank() == rank) { + const std::string &dbName = mf.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + if (missingFieldsCollectorPtr) { + for (stk::mesh::FieldState missingState : missingStates) { + (*missingFieldsCollectorPtr)[f->field_state(missingState)] = &mf; + } + } + } else { + doesFieldExist = true; + } + } + + if(doesFieldExist == false) { + doesFieldExist = ioEntity->field_exists(dbName); + if (!doesFieldExist) { + if (missingFieldsCollectorPtr) { + (*missingFieldsCollectorPtr)[f] = &mf; + } + } + } + + // See if field with that name exists on ioEntity... + if (doesFieldExist) { + mf.add_part(f->entity_rank(), part, ioEntity); + mf.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + mf.set_active(); + } + } + + return doesFieldExist; + } + + int InputQuery::build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + int numMissingFields = 0; + + if(mf.is_active()) { + return numMissingFields; + } + + MissingFieldMap missingFieldCollector; + bool doesFieldExist = false; + stk::mesh::Part &universalPart = m_meta.universal_part(); + Ioss::GroupingEntity * universalNodeEntity = m_region.get_entity("nodeblock_1"); + + doesFieldExist |= process_fields_for_grouping_entity(mf, universalPart, universalNodeEntity, &missingFieldCollector); + + for ( stk::mesh::Part * const part : m_meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + + if(ioEntity == nullptr) { + continue; + } + + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioEntity, &missingFieldCollector); + + if(ioEntity->type() == Ioss::SIDEBLOCK || ioEntity->type() == Ioss::SIDESET) { + static const std::string s_nodeset_suffix("_n"); + + std::string nsName = part->name(); + nsName += s_nodeset_suffix; + Ioss::NodeSet *ioNodeSet = m_region.get_nodeset(nsName); + if(ioNodeSet != nullptr) { + // Process hidden nodesets + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioNodeSet, &missingFieldCollector); + } + } + + if(ioEntity->type() == Ioss::SIDESET) { + Ioss::SideSet* sideSet = dynamic_cast(ioEntity); + auto faceBlocks = sideSet->get_side_blocks(); + for (auto faceBlock : faceBlocks) { + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, faceBlock, &missingFieldCollector); + } + } + } + } + + if (!doesFieldExist) { + numMissingFields += missingFieldCollector.size(); + if (nullptr != missingFields) { + add_missing_fields(missingFields, missingFieldCollector); + } + else { + for (auto missingField : missingFieldCollector) { + std::cout << "Missing field: " << missingField.second->db_name() << std::endl; + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + return numMissingFields; + } + + void InputQuery::build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part) + { + stk::mesh::FieldBase *f = mf.field(); + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + + if (entity != nullptr) { + if (f->entity_rank() == rank) { + build_field_part_associations(mf, *part, rank, entity); + process_fields_for_grouping_entity(mf, *part, entity); + + if(entity->type() == Ioss::SIDESET) { + auto io_side_set = dynamic_cast(entity); + STK_ThrowRequire(io_side_set != nullptr); + auto fbs = io_side_set->get_side_blocks(); + + for(auto& io_fblock : fbs) { + build_field_part_associations(mf, *part, rank, io_fblock); + process_fields_for_grouping_entity(mf, *part, io_fblock); + } + } + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodes_name = part->name() + "_nodes"; + + nodeEntity = m_region.get_entity(nodes_name); + + if (nodeEntity == nullptr) { + nodes_name = part->name() + "_n"; + nodeEntity = m_region.get_entity(nodes_name); + } + + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity); + process_fields_for_grouping_entity(mf, *part, nodeEntity); + } + } + } + } + + bool verify_field_request(const Ioss::Region& region, const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, bool printWarning) + { + stk::io::InputQuery iq(region, meta, dbPurpose, (multiStateSuffixes.empty() ? nullptr : &multiStateSuffixes)); + + stk::io::MeshField mf(meshField.field(), meshField.db_name()); + std::vector missingFields; + + iq.build_field_part_associations(mf, &missingFields, false); + iq.build_field_part_associations_from_grouping_entity(mf, &missingFields, false); + + if(missingFields.size() > 0) { + std::ostringstream oss; + oss << "For input IO field: " + << meshField.db_name() + << " the following associated fields for the requested STK field: " + << meshField.field()->name() + << " of rank: " + << meshField.field()->entity_rank() + << ", are missing in database: " + << region.get_database()->get_filename() + << std::endl; + + for(auto & missingField : missingFields) { + oss << "\t" << missingField.field()->name() << std::endl; + } + + stk::RuntimeWarning() << oss.str(); + } + + return mf.is_active(); + } + + bool verify_field_request(const StkMeshIoBroker &broker, const MeshField &meshField, bool printWarning) + { + auto region = broker.get_input_ioss_region(); + if(!region) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no input Ioss::Region" << std::endl; + } + + return false; + } + + if(broker.is_meta_data_null()) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no stk::mesh::MetaData defined" << std::endl; + } + + return false; + } + + const stk::mesh::MetaData &meta = broker.meta_data(); + InputFile& inputFile = broker.get_mesh_database(broker.get_active_mesh()); + + return verify_field_request(*region, meta, inputFile.get_database_purpose(), + inputFile.get_multistate_suffixes(), + meshField, printWarning); + } +} +} + diff --git a/packages/stk/stk_io/stk_io/InputQuery.hpp b/packages/stk/stk_io/stk_io/InputQuery.hpp new file mode 100644 index 000000000000..dfc9128068e1 --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.hpp @@ -0,0 +1,115 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_STK_IO_STK_IO_INPUTQUERY_HPP_ +#define STK_STK_IO_STK_IO_INPUTQUERY_HPP_ + +#include +#include // for DatabasePurpose +#include +#include +#include "Ioss_EntityType.h" + +namespace Ioss { +class PropertyManager; +class GroupingEntity; +class Region; +class DatabaseIO; +} + +namespace stk { +namespace mesh { +class MetaData; +class BulkData; +class Part; +} + +namespace io { +class StkMeshIoBroker; + +using MissingFieldMap = std::map; + +class InputQuery +{ +public: + InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes = nullptr); + + ~InputQuery() { } + + int build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + int build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + void build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part); + + bool process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr = nullptr); + + bool build_field_part_associations(stk::io::MeshField &mesh_field, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFields = nullptr); + +private: + const Ioss::Region& m_region; + const stk::mesh::MetaData& m_meta; + DatabasePurpose m_dbPurpose; + const std::vector* m_multiStateSuffixes = nullptr; +}; + +bool verify_field_request(const StkMeshIoBroker &broker, + const stk::io::MeshField &meshField, + bool printWarning = true); + +bool verify_field_request(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, + bool printWarning = true); + +} +} + +#endif /* STK_STK_IO_STK_IO_INPUTQUERY_HPP_ */ diff --git a/packages/stk/stk_io/stk_io/IossBridge.cpp b/packages/stk/stk_io/stk_io/IossBridge.cpp index a69dc2a801f3..0f72a3976e68 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.cpp +++ b/packages/stk/stk_io/stk_io/IossBridge.cpp @@ -1280,7 +1280,7 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta return stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; } else if (name == "shell4") { return stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; - } else if (name == "shel8") { + } else if (name == "shell8") { return stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; } else if (name == "shell9") { return stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; @@ -1837,7 +1837,8 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } } - const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, std::vector* multiStateSuffixes) + const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, + const std::vector* multiStateSuffixes) { if(nullptr != multiStateSuffixes) { STK_ThrowRequireMsg((multiStateSuffixes->size() >= fieldState), @@ -1872,26 +1873,27 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } std::string get_stated_field_name(const std::string &fieldBaseName, stk::mesh::FieldState stateIdentifier, - std::vector* multiStateSuffixes) + const std::vector* multiStateSuffixes) { std::string field_name_with_suffix = fieldBaseName + get_suffix_for_field_at_state(stateIdentifier, multiStateSuffixes); return field_name_with_suffix; } bool field_state_exists_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, stk::mesh::FieldState stateIdentifier, - Ioss::GroupingEntity *ioEntity, std::vector* multiStateSuffixes) + Ioss::GroupingEntity *ioEntity, const std::vector* multiStateSuffixes) { std::string fieldNameWithSuffix = get_stated_field_name(dbName, stateIdentifier, multiStateSuffixes); return ioEntity->field_exists(fieldNameWithSuffix); } - bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *ioEntity, - std::vector &missingStates, std::vector* inputMultiStateSuffixes) + bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *ioEntity, std::vector &missingStates, + const std::vector* inputMultiStateSuffixes) { bool allStatesExist = true; size_t stateCount = field->number_of_states(); - std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; + const std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; if(nullptr != multiStateSuffixes) { STK_ThrowRequire(multiStateSuffixes->size() >= stateCount); @@ -4415,6 +4417,5 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta filter_nodes_by_local_connectivity(bulk, params.get_subset_selector(), nodes); } - }//namespace io }//namespace stk diff --git a/packages/stk/stk_io/stk_io/IossBridge.hpp b/packages/stk/stk_io/stk_io/IossBridge.hpp index 0198587b9f04..9a4fa424c936 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.hpp +++ b/packages/stk/stk_io/stk_io/IossBridge.hpp @@ -372,13 +372,14 @@ void delete_selector_property(Ioss::Region &io_region); void delete_selector_property(Ioss::GroupingEntity *io_entity); std::string get_stated_field_name(const std::string &field_base_name, stk::mesh::FieldState state_identifier, - std::vector* multiStateSuffixes=nullptr); + const std::vector* multiStateSuffixes=nullptr); bool field_state_exists_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, stk::mesh::FieldState state_identifier, - Ioss::GroupingEntity *io_entity, std::vector* multiStateSuffixes=nullptr); + Ioss::GroupingEntity *io_entity, const std::vector* multiStateSuffixes=nullptr); -bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *io_entity, - std::vector &missing_states, std::vector* multiStateSuffixes=nullptr); +bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *io_entity, std::vector &missing_states, + const std::vector* multiStateSuffixes=nullptr); void multistate_field_data_from_ioss(const stk::mesh::BulkData& mesh, const stk::mesh::FieldBase *field, diff --git a/packages/stk/stk_io/stk_io/MeshField.cpp b/packages/stk/stk_io/stk_io/MeshField.cpp index 77d92f5d39cc..bb4e1dd75a76 100644 --- a/packages/stk/stk_io/stk_io/MeshField.cpp +++ b/packages/stk/stk_io/stk_io/MeshField.cpp @@ -302,6 +302,11 @@ double MeshField::restore_field_data(stk::mesh::BulkData &bulk, return time_read; } +void MeshField::clear_field_parts() +{ + m_fieldParts.clear(); +} + void MeshFieldPart::release_field_data() { m_preStep = 0; diff --git a/packages/stk/stk_io/stk_io/MeshField.hpp b/packages/stk/stk_io/stk_io/MeshField.hpp index 10f320e17f30..5f38142c4c75 100644 --- a/packages/stk/stk_io/stk_io/MeshField.hpp +++ b/packages/stk/stk_io/stk_io/MeshField.hpp @@ -51,6 +51,7 @@ namespace stk { namespace mesh { class Part; } } // clang-format on // ####################### End Clang Header Tool Managed Headers ######################## namespace stk { namespace io { class InputFile; } } +namespace stk { namespace io { class InputQuery; } } namespace stk { namespace io { @@ -91,6 +92,7 @@ class MeshField public: friend class InputFile; + friend class InputQuery; // Options: // * Frequency: @@ -165,6 +167,8 @@ class MeshField bool field_restored() const {return m_fieldRestored;} double time_restored() const {return m_timeRestored;} + void clear_field_parts(); + private: MeshField(); diff --git a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp index f50450bca6e2..6577523e3730 100644 --- a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp +++ b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp @@ -112,7 +112,6 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) STKIORequire(ss_part != nullptr); stk::mesh::FieldBase *distribution_factors_field = nullptr; - bool surface_df_defined = false; // Has the surface df field been defined yet? size_t block_count = sset->block_count(); for (size_t i=0; i < block_count; i++) { @@ -125,13 +124,14 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) } if (sb->field_exists("distribution_factors")) { - if (!surface_df_defined) { - stk::topology::rank_t side_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); - std::string field_name = sset->name() + "_df"; - distribution_factors_field = &meta.declare_field(side_rank, field_name); + stk::topology::rank_t side_block_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); + std::string field_name = sset->name() + "_df"; + distribution_factors_field = meta.get_field(side_block_rank, field_name); + + if (distribution_factors_field == nullptr) { + distribution_factors_field = &meta.declare_field(side_block_rank, field_name); stk::io::set_field_role(*distribution_factors_field, Ioss::Field::MESH); stk::io::set_distribution_factor_field(*ss_part, *distribution_factors_field); - surface_df_defined = true; } stk::io::set_distribution_factor_field(*sb_part, *distribution_factors_field); int side_node_count = sb->topology()->number_nodes(); diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp index fb9f5e4438eb..76e427b1a2b6 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp @@ -372,7 +372,7 @@ std::shared_ptr StkMeshIoBroker::get_input_ioss_region() const } } -InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) +InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) const { validate_input_file_index(input_file_index); return *m_inputFiles[input_file_index]; @@ -1275,6 +1275,12 @@ bool StkMeshIoBroker::get_throw_on_missing_input_fields() const void StkMeshIoBroker::set_enable_all_face_sides_shell_topo(bool flag) { m_enableAllFaceSidesShellTopo = flag; + if (m_inputFiles.size() > m_activeMeshIndex) { + Ioss::Region *region = m_inputFiles[m_activeMeshIndex]->get_input_ioss_region().get(); + if (nullptr != region) { + region->property_add(Ioss::Property("ENABLE_ALL_FACE_SIDES_SHELL", "YES")); + } + } } bool StkMeshIoBroker::get_enable_all_face_sides_shell_topo() const diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp index f58f08df3a40..f7164f11ad61 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp @@ -287,8 +287,8 @@ namespace stk { // Get a reference to an existing mesh database so it can be modified // Typical modifications deal with - // times: tart/stop/offset/scale/cycle/periodlength. - InputFile &get_mesh_database(size_t input_file_index); + // times: start/stop/offset/scale/cycle/periodlength. + InputFile &get_mesh_database(size_t input_file_index) const; // Remove the specified mesh database from the list of mesh databases. // All files associated with the mesh database will be closed and destructors diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp index 4eb2e69024ce..a10eb53b61b7 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp @@ -245,7 +245,8 @@ unsigned get_default_bucket_capacity() { return impl::default_maximum_bucket_cap unsigned get_default_initial_bucket_capacity() { return impl::default_initial_bucket_capacity; } unsigned get_default_maximum_bucket_capacity() { return impl::default_maximum_bucket_capacity; } -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) { bool result = true ; { @@ -259,21 +260,19 @@ bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) } inline -bool bucket_key_less( const unsigned * lhs , const unsigned * rhs ) +bool bucket_key_less( const OrdinalVector& lhs , const OrdinalVector& rhs ) { - const unsigned * const last_lhs = lhs + ( *lhs < *rhs ? *lhs : *rhs ); - while ( last_lhs != lhs && *lhs == *rhs ) { ++lhs ; ++rhs ; } - return *lhs < *rhs ; + return lhs < rhs; } -// The part count and part ordinals are less bool BucketLess::operator()( const Bucket * lhs_bucket , - const unsigned * rhs ) const -{ return bucket_key_less( lhs_bucket->key() , rhs ); } + const OrdinalVector& rhs ) const +{ return bucket_key_less( lhs_bucket->key_vector() , rhs ); } -bool BucketLess::operator()( const unsigned * lhs , +bool BucketLess::operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const -{ return bucket_key_less( lhs , rhs_bucket->key() ); } +{ return bucket_key_less( lhs , rhs_bucket->key_vector() ); } +#endif //---------------------------------------------------------------------- @@ -287,7 +286,7 @@ Bucket::Bucket(BulkData & mesh, m_entity_rank(entityRank), m_topology(), m_key(key), - m_partOrdsBeginEnd(m_key.data()+1,m_key.data()+1+m_key[0]), + m_partOrdsBeginEnd(m_key.data(),m_key.data()+m_key.size()), m_capacity(initialCapacity), m_maxCapacity(maximumCapacity), m_size(0), @@ -323,7 +322,7 @@ Bucket::Bucket(BulkData & mesh, setup_connectivity(m_topology, entityRank, stk::topology::FACE_RANK, m_face_kind, m_fixed_face_connectivity); setup_connectivity(m_topology, entityRank, stk::topology::ELEMENT_RANK, m_element_kind, m_fixed_element_connectivity); - m_parts.reserve(m_key.size()-1); + m_parts.reserve(m_key.size()); supersets(m_parts); m_mesh.new_bucket_callback(m_entity_rank, m_parts, m_capacity, this); @@ -654,20 +653,13 @@ unsigned Bucket::get_ngp_field_bucket_is_modified(unsigned fieldOrdinal) const void Bucket::reset_part_ord_begin_end() { - m_partOrdsBeginEnd.first = m_key.data()+1; - m_partOrdsBeginEnd.second = m_key.data()+1+m_key[0]; + m_partOrdsBeginEnd.first = m_key.data(); + m_partOrdsBeginEnd.second = m_key.data()+m_key.size(); } void Bucket::reset_bucket_key(const OrdinalVector& newPartOrdinals) { - unsigned newPartCount = newPartOrdinals.size(); - - m_key.resize(newPartCount + 1); - m_key[0] = newPartCount; - - for(unsigned i = 0; i < newPartCount; i++) { - m_key[i+1] = newPartOrdinals[i]; - } + m_key = newPartOrdinals; } void Bucket::reset_bucket_parts(const OrdinalVector& newPartOrdinals) @@ -748,6 +740,10 @@ bool Bucket::destroy_relation(Entity e_from, Entity e_to, const RelationIdentifi DestroyRelationFunctor functor(from_bucket_ordinal, e_to, static_cast(local_id)); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } @@ -756,6 +752,10 @@ bool Bucket::declare_relation(unsigned bucket_ordinal, Entity e_to, const Connec DeclareRelationFunctor functor(bucket_ordinal, e_to, ordinal, permutation); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp index 423d3c07eefc..7dfa4941326a 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp @@ -91,8 +91,10 @@ std::ostream & operator << ( std::ostream & , const Bucket & ); std::ostream & print( std::ostream & , const std::string & indent , const Bucket & ); +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 // The part count and parts are equal -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +#endif #define CONNECTIVITY_TYPE_SWITCH(entity_kind, fixed_func_sig, dynamic_func_sig, check_invalid) \ switch(entity_kind) { \ @@ -220,9 +222,9 @@ class Bucket std::pair superset_part_ordinals() const { return m_partOrdsBeginEnd; } -#ifndef DOXYGEN_COMPILE + const std::vector & key_vector() const { return m_key; } + const unsigned * key() const { return m_key.data() ; } -#endif /* DOXYGEN_COMPILE */ /** \brief The allocation size, in bytes, of this bucket */ unsigned allocation_size() const { return 0 ; } @@ -460,8 +462,6 @@ class Bucket unsigned maximumCapacity, unsigned bucketId); - const std::vector & key_vector() const { return m_key; } - // Add a new entity to end of bucket void add_entity(Entity entity = Entity()); @@ -598,16 +598,16 @@ bool has_superset( const Bucket & bucket , const Part & p ) */ bool has_superset( const Bucket & bucket , const PartVector & parts ); - -struct BucketLess { - bool operator()( const Bucket * lhs_bucket , const unsigned * rhs ) const ; - bool operator()( const unsigned * lhs , const Bucket * rhs_bucket ) const ; +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1 2025 +struct STK_DEPRECATED BucketLess { + bool operator()( const Bucket * lhs_bucket , const OrdinalVector& rhs ) const ; + bool operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const ; }; -inline -BucketVector::iterator -lower_bound( BucketVector & v , const unsigned * key ) +STK_DEPRECATED inline BucketVector::iterator +lower_bound( BucketVector & v , const OrdinalVector& key ) { return std::lower_bound( v.begin() , v.end() , key , BucketLess() ); } +#endif struct BucketIdComparator { diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp index 5a1655330001..1b81c06f73b8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp @@ -859,12 +859,13 @@ Entity BulkData::declare_element_side_with_id(const stk::mesh::EntityId globalSi } } else { - EntityKey sideKey(mesh_meta_data().side_rank(), globalSideId); + stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); + EntityKey sideKey(sideTop.rank(), globalSideId); + std::pair result = internal_get_or_create_entity_with_notification(sideKey); side = result.first; const bool newlyCreated = result.second; - stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); if (newlyCreated) { PARTVECTOR allParts = add_root_topology_part(parts, mesh_meta_data().get_topology_root_part(sideTop)); allParts.push_back(&mesh_meta_data().locally_owned_part()); @@ -4118,12 +4119,6 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) m_meshModification.get_deleted_entity_cache().update_deleted_entities_container(); - for (FieldBase * stkField : mesh_meta_data().get_fields()) { - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); - } - } - for(SelectorBucketMap& selectorBucketMap : m_selector_to_buckets_maps) { for (SelectorBucketMap::iterator itr = selectorBucketMap.begin(), end = selectorBucketMap.end(); itr != end; ++itr) { if (itr->second.empty()) { @@ -4136,6 +4131,14 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) } notify_finished_mod_end(); + + if (mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : mesh_meta_data().get_fields()) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); + } + } + } } bool BulkData::internal_modification_end_for_skin_mesh( EntityRank entity_rank, ModEndOptimizationFlag opt, const stk::mesh::Selector& selectedToSkin, @@ -4807,8 +4810,8 @@ void BulkData::internal_change_bucket_parts_without_propagating_to_downward_conn bucket->reset_bucket_parts(newBucketPartList); originalPartition->reset_partition_key(bucket->key_vector()); } else { - if(impl::partition_key_less(originalPartition->key(), partition->key()) || - impl::partition_key_less(partition->key(), originalPartition->key()) ) { + if(originalPartition->get_legacy_partition_id() < partition->get_legacy_partition_id() || + partition->get_legacy_partition_id() < originalPartition->get_legacy_partition_id()) { originalPartition->remove_bucket(bucket); bucket->reset_bucket_parts(newBucketPartList); @@ -5573,7 +5576,14 @@ void BulkData::de_induce_parts_from_nodes(const stk::mesh::EntityVector & deacti unsigned BulkData::num_sides(Entity entity) const { + if (bucket(entity).topology().has_mixed_rank_sides()) { + auto num_connected_edges = num_connectivity(entity, stk::topology::EDGE_RANK); + auto num_connected_faces = num_connectivity(entity, stk::topology::FACE_RANK); + + return num_connected_edges + num_connected_faces; + } else { return num_connectivity(entity, mesh_meta_data().side_rank()); + } } void BulkData::sort_entities(const stk::mesh::EntitySorterBase& sorter) diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp index d5f491b8cff8..c752f7255b32 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp @@ -1464,7 +1464,7 @@ class BulkData { friend class ::stk::io::StkMeshIoBroker; template friend class stk::mesh::DeviceMeshT; friend class stk::mesh::StkFieldSyncDebugger; - template class NgpDebugger> friend class stk::mesh::DeviceField; + template class NgpDebugger> friend class stk::mesh::DeviceField; // friends until it is decided what we're doing with Fields and Parallel and BulkData friend void communicate_field_data(const Ghosting & ghosts, const std::vector & fields); diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp index 0756cb18a676..f02b07a3c54e 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp @@ -59,7 +59,7 @@ constexpr unsigned NUM_COMPONENTS_INDEX = 0; constexpr unsigned FIRST_DIMENSION_INDEX = 1; constexpr unsigned INVALID_ORDINAL = 9999999; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { constexpr double OVERALLOCATION_FACTOR = 1.1; @@ -69,19 +69,18 @@ namespace impl { return std::lround(size_requested*OVERALLOCATION_FACTOR); } - template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); - template FieldDataDeviceViewType get_device_data(DeviceField&); + template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); + template FieldDataDeviceViewType get_device_data(DeviceField&); } -template class NgpDebugger> +template class NgpDebugger> class DeviceField : public NgpFieldBase { -private: - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; - public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; KOKKOS_FUNCTION DeviceField() @@ -114,19 +113,19 @@ class DeviceField : public NgpFieldBase initialize(); } - KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; KOKKOS_FUNCTION ~DeviceField() {} - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; void initialize() { hostField->template make_field_sync_debugger(); - fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); + fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); } - void set_field_states(DeviceField* /*fields*/[]) + void set_field_states(DeviceField* /*fields*/[]) { } @@ -359,13 +358,15 @@ class DeviceField : public NgpFieldBase void swap_field_views(NgpFieldBase *other) override { - DeviceField* deviceFieldT = dynamic_cast*>(other); + DeviceField* deviceFieldT = dynamic_cast*>(other); STK_ThrowRequireMsg(deviceFieldT != nullptr, "DeviceField::swap_field_views called with class that can't dynamic_cast to DeviceField"); swap_views(deviceData, deviceFieldT->deviceData); + swap_views(hostBucketPtrData, deviceFieldT->hostBucketPtrData); + swap_views(deviceBucketPtrData, deviceFieldT->deviceBucketPtrData); } KOKKOS_FUNCTION - void swap(DeviceField &other) + void swap(DeviceField &other) { swap_views(deviceData, other.deviceData); } @@ -405,10 +406,15 @@ class DeviceField : public NgpFieldBase private: ExecSpace& get_execution_space() const { return hostField->get_execution_space(); } - void set_execution_space(const ExecSpace& executionSpace) { hostField->set_execution_space(executionSpace); } + void set_execution_space(const ExecSpace& executionSpace) + { + static_assert(Kokkos::SpaceAccessibility::accessible); + hostField->set_execution_space(executionSpace); + } void set_execution_space(ExecSpace&& executionSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); hostField->set_execution_space(std::forward(executionSpace)); } @@ -468,8 +474,9 @@ class DeviceField : public NgpFieldBase void construct_view(const BucketVector& buckets, const std::string& name, unsigned numPerEntity) { unsigned numBuckets = buckets.size(); - FieldDataDeviceViewType tempDataDeviceView = FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), numBuckets, - ORDER_INDICES(bucketCapacity, numPerEntity)); + FieldDataDeviceViewType tempDataDeviceView = + FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), + numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); fieldSyncDebugger.initialize_view(tempDataDeviceView); copy_unmodified_buckets(buckets, tempDataDeviceView, numPerEntity); @@ -609,7 +616,7 @@ class DeviceField : public NgpFieldBase Kokkos::deep_copy(get_execution_space(), deviceBucketPtrData, hostBucketPtrData); } - void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) + void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) { for(unsigned i = 0; i < buckets.size(); i++) { unsigned oldBucketId = buckets[i]->get_ngp_field_bucket_id(get_ordinal()); @@ -617,7 +624,7 @@ class DeviceField : public NgpFieldBase if(!buckets[i]->get_ngp_field_bucket_is_modified(get_ordinal())) { STK_ThrowRequire(deviceData.extent(0) != 0 && deviceSelectedBucketOffset.extent(0) != 0); - copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); } } } @@ -682,19 +689,19 @@ class DeviceField : public NgpFieldBase void shift_bucket_forward(unsigned oldBucketId, unsigned newBucketId, unsigned numPerEntity) { - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - oldBucketId, newBucketId, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + oldBucketId, newBucketId, + numPerEntity); } void shift_buckets_backward(const std::vector & backwardShiftList, unsigned numPerEntity) { for (auto it = backwardShiftList.rbegin(); it != backwardShiftList.rend(); ++it) { const BackwardShiftIndices& shiftIndices = *it; - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - shiftIndices.oldIndex, - shiftIndices.newIndex, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + shiftIndices.oldIndex, + shiftIndices.newIndex, + numPerEntity); } } @@ -784,11 +791,11 @@ class DeviceField : public NgpFieldBase host = Kokkos::create_mirror_view(view); } - friend NgpDebugger; - friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); - friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); + friend NgpDebugger; + friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); + friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); - FieldDataDeviceViewType deviceData; + FieldDataDeviceViewType deviceData; FieldDataPointerHostViewType hostBucketPtrData; FieldDataPointerDeviceViewType deviceBucketPtrData; @@ -814,20 +821,20 @@ class DeviceField : public NgpFieldBase UnsignedViewType deviceFieldBucketsNumComponentsPerEntity; UnsignedViewType deviceFieldBucketsMarkedModified; - NgpDebugger fieldSyncDebugger; + NgpDebugger fieldSyncDebugger; }; namespace impl { //not for public consumption. calling this will void your warranty. -template -const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) +template +const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) { return deviceField.deviceData; } -template -FieldDataDeviceViewType get_device_data(DeviceField& deviceField) +template +FieldDataDeviceViewType get_device_data(DeviceField& deviceField) { return deviceField.deviceData; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp index 27faedc82158..ece362a2e4f9 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp @@ -97,6 +97,9 @@ struct DeviceBucketT { KOKKOS_INLINE_FUNCTION ConnectedOrdinals get_connected_ordinals(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_INLINE_FUNCTION + Permutations get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_FUNCTION ConnectedNodes get_nodes(unsigned offsetIntoBucket) const { return get_connected_entities(offsetIntoBucket, stk::topology::NODE_RANK); @@ -136,6 +139,7 @@ struct DeviceBucketT { void initialize_bucket_attributes(const stk::mesh::Bucket &bucket); void initialize_fixed_data_from_host(const stk::mesh::Bucket &bucket); void update_entity_data_from_host(const stk::mesh::Bucket &bucket); + void update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket); void resize_device_views(const stk::mesh::Bucket &bucket); std::pair scan_entities_for_nodal_connectivity(const stk::mesh::Bucket & bucket); @@ -146,6 +150,11 @@ struct DeviceBucketT { OrdinalViewType m_nodeOrdinals; + Unsigned2dViewType m_sparseConnectivityOffsets; + BucketConnectivityType m_sparseConnectivity; + OrdinalViewType m_sparseConnectivityOrdinals; + PermutationViewType m_sparseConnectivityPermutations; + PartOrdinalViewType m_partOrdinals; const stk::mesh::DeviceMeshT* m_owningMesh; @@ -244,69 +253,39 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) - { - return buckets[rank](entity.bucket_id).get_connected_entities(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedEntities connectedEntities(nullptr, 0); - if (numConnected > 0) { - int stride = 1; - connectedEntities = - ConnectedEntities(&(sparseConnectivity[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedEntities; + return buckets[rank](entityIndex.bucket_id).get_connected_entities(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION - ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return buckets[rank](entity.bucket_id).get_nodes(entity.bucket_ord); + return buckets[rank](entityIndex.bucket_id).get_nodes(entityIndex.bucket_ord); } KOKKOS_FUNCTION - ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::EDGE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::EDGE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::FACE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::FACE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::ELEM_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::ELEM_RANK); } KOKKOS_FUNCTION - ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) { - return buckets[rank](entity.bucket_id).get_connected_ordinals(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedOrdinals connectedOrdinals(nullptr, 0); - if (numConnected > 0) - { - int stride = 1; - connectedOrdinals = ConnectedOrdinals( - &(sparseConnectivityOrdinals[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedOrdinals; + return buckets[rank](entityIndex.bucket_id).get_connected_ordinals(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -334,24 +313,9 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - Permutations permutations(nullptr, 0); - if (connectedRank == stk::topology::NODE_RANK) - { - return permutations; - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - if (numConnected > 0) - { - int stride = 1; - permutations = Permutations(&(sparsePermutations[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return permutations; + return buckets[rank](entityIndex.bucket_id).get_connected_permutations(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -522,12 +486,29 @@ class DeviceMeshT : public NgpMeshBase return m_needSyncToHost; } -private: - void set_entity_keys(const stk::mesh::BulkData& bulk_in); + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + using EntitiesMemorySpace = typename std::remove_reference::type::memory_space; + using AddPartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + using RemovePartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'entities' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'addPartOrdinals' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'removePartOrdinals' View is inaccessible from the DeviceMesh execution space"); - void set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in); + using HostEntitiesType = typename std::remove_reference::type::HostMirror; + using HostAddPartOrdinalsType = typename std::remove_reference::type::HostMirror; + using HostRemovePartOrdinalsType = typename std::remove_reference::type::HostMirror; + } - void fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in); +private: + void set_entity_keys(const stk::mesh::BulkData& bulk_in); KOKKOS_FUNCTION bool is_last_bucket_reference(unsigned rank = stk::topology::NODE_RANK) const @@ -557,8 +538,6 @@ class DeviceMeshT : public NgpMeshBase void copy_mesh_indices_to_device(); - void copy_bucket_entity_offsets_to_device(); - void copy_sparse_connectivities_to_device(); void copy_volatile_fast_shared_comm_map_to_device(); @@ -578,11 +557,6 @@ class DeviceMeshT : public NgpMeshBase HostMeshIndexType hostMeshIndices; MeshIndexType deviceMeshIndices; - BucketEntityOffsetsViewType bucketEntityOffsets[stk::topology::NUM_RANKS]; - UnsignedViewType entityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - EntityViewType sparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - OrdinalViewType sparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - PermutationViewType sparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; UnsignedViewType volatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; FastSharedCommMapViewType volatileFastSharedCommMap[stk::topology::NUM_RANKS]; }; @@ -599,9 +573,10 @@ DeviceBucketT::get_connected_entities(unsigned offsetIntoBuck const size_t nodeOffset = m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedEntities(&m_nodeConnectivity(nodeOffset), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_entities(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedEntities(&m_sparseConnectivity(offset), length, 1); } template @@ -613,10 +588,26 @@ DeviceBucketT::get_connected_ordinals(unsigned offsetIntoBuck const unsigned numNodes = m_nodeConnectivityOffsets(offsetIntoBucket+1)-m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedOrdinals(m_nodeOrdinals.data(), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_ordinals(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedOrdinals(&m_sparseConnectivityOrdinals(offset), length, 1); +} + +template +KOKKOS_INLINE_FUNCTION +typename DeviceBucketT::Permutations +DeviceBucketT::get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const { + STK_NGP_ThrowAssert(connectedRank < stk::topology::NUM_RANKS); + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + if (m_sparseConnectivityPermutations.size() <= offset) { + return Permutations(nullptr, 0); + } + + return Permutations(&m_sparseConnectivityPermutations(offset), length, 1); } + template void DeviceBucketT::initialize_bucket_attributes(const stk::mesh::Bucket &bucket) { @@ -660,33 +651,37 @@ void DeviceBucketT::resize_device_views(const stk::mesh::Buck { Kokkos::Profiling::pushRegion("resize_device_views()"); + Kokkos::Profiling::pushRegion("set node ordinals"); + const auto [maxNodesPerEntity, totalNumConnectedNodes] = scan_entities_for_nodal_connectivity(bucket); if (m_nodeOrdinals.size() != maxNodesPerEntity) { - m_nodeOrdinals = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "NodeOrdinals"), - static_cast(maxNodesPerEntity)); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeOrdinals, static_cast(maxNodesPerEntity)); OrdinalViewType& nodeOrds = m_nodeOrdinals; //local var to avoid implicit this capture Kokkos::parallel_for(Kokkos::RangePolicy(0, maxNodesPerEntity), KOKKOS_LAMBDA(const int i) { nodeOrds(i) = static_cast(i); }); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("bucket entities"); if (m_entities.size() != m_bucketCapacity) { - m_entities = EntityViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "BucketEntities"), m_bucketCapacity); + Kokkos::resize(Kokkos::WithoutInitializing, m_entities, m_bucketCapacity); STK_ThrowRequireMsg(m_bucketCapacity > 0, "bucket capacity must be greater than 0"); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("nodal connectivity"); if (m_nodeConnectivity.size() != totalNumConnectedNodes) { - m_nodeConnectivity = BucketConnectivityType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivity"), totalNumConnectedNodes); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivity, totalNumConnectedNodes); } if (m_nodeConnectivityOffsets.size() != m_bucketCapacity+1) { - m_nodeConnectivityOffsets = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivityOffsets"), m_bucketCapacity+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivityOffsets, m_bucketCapacity+1); } Kokkos::Profiling::popRegion(); + Kokkos::Profiling::popRegion(); } template @@ -725,6 +720,87 @@ void DeviceBucketT::update_entity_data_from_host(const stk::m Kokkos::Profiling::popRegion(); } +constexpr double RESIZE_FACTOR = 0.05; + +template +inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) +{ + const size_t currentSize = deviceView.extent(0); + const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); + const bool needGrowth = (requiredSize > currentSize); + const bool needShrink = (requiredSize < shrinkThreshold); + + if (needGrowth || needShrink) { + const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); + deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); + hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); + } +} + +template +void DeviceBucketT::update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket) +{ + Kokkos::Profiling::pushRegion("update_sparse_connectivity_from_host()"); + + Unsigned2dViewType::HostMirror hostConnectivityOffsets("hostConnectivityOffsets", 0,0); + Kokkos::resize(Kokkos::WithoutInitializing, hostConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_sparseConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + BucketConnectivityType::HostMirror hostConnectivity("hostConnectivity", 0); + OrdinalViewType::HostMirror hostConnectivityOrdinals("hostConnectivityOrdinals", 0); + PermutationViewType::HostMirror hostConnectivityPermutations("hostConnectivityPermutations", 0); + + const stk::mesh::EntityRank endRank = static_cast(bucket.mesh().mesh_meta_data().entity_rank_count()); + + unsigned offset = 0; + for(stk::mesh::EntityRank connectedRank=stk::topology::EDGE_RANK; connectedRank void DeviceMeshT::update_mesh() { @@ -738,15 +814,23 @@ void DeviceMeshT::update_mesh() const bool anyChanges = fill_buckets(*bulk); if (anyChanges) { + Kokkos::Profiling::pushRegion("anyChanges stuff"); + + Kokkos::Profiling::pushRegion("entity-keys"); set_entity_keys(*bulk); copy_entity_keys_to_device(); - set_bucket_entity_offsets(*bulk); - copy_bucket_entity_offsets_to_device(); - fill_sparse_connectivities(*bulk); - copy_sparse_connectivities_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("volatile-fast-shared-comm-map"); copy_volatile_fast_shared_comm_map_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("mesh-indices"); fill_mesh_indices(*bulk); copy_mesh_indices_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::popRegion(); } synchronizedCount = bulk->synchronized_count(); @@ -780,6 +864,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) bucketBuffer[iBucket].initialize_bucket_attributes(stkBucket); bucketBuffer[iBucket].initialize_fixed_data_from_host(stkBucket); bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; Kokkos::Profiling::popRegion(); } @@ -789,6 +874,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) new (&bucketBuffer[iBucket]) DeviceBucketT(buckets[rank][ngpBucketId]); if (stkBucket.is_modified()) { bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; } bucketBuffer[iBucket].m_bucketId = stkBucket.bucket_id(); @@ -811,23 +897,6 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) return anyBucketChanges; } -constexpr double RESIZE_FACTOR = 0.05; - -template -inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) -{ - const size_t currentSize = deviceView.extent(0); - const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); - const bool needGrowth = (requiredSize > currentSize); - const bool needShrink = (requiredSize < shrinkThreshold); - - if (needGrowth || needShrink) { - const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); - deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); - hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); - } -} - template void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_in) { @@ -848,119 +917,6 @@ void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_i } } -template -void DeviceMeshT::set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in) -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; ranksize(); - } - for (unsigned i = stkBuckets.size(); i < hostBucketEntityOffsets[rank].extent(0); ++i) { - hostBucketEntityOffsets[rank](i) = bucketOffsetIntoEntities; - } - } -} - -template -void DeviceMeshT::fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in) -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - unsigned totalNumConnectedEntities[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - unsigned totalNumPermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank 0) { - - const stk::mesh::Entity* connectedEntities = stkBucket.begin(iEntity, connectedRank); - const stk::mesh::ConnectivityOrdinal* connectedOrdinals = stkBucket.begin_ordinals(iEntity, connectedRank); - const stk::mesh::Permutation* permutations = hasPermutation ? stkBucket.begin_permutations(iEntity, connectedRank) : nullptr; - for(unsigned i=0; i void DeviceMeshT::fill_mesh_indices(const stk::mesh::BulkData& bulk_in) { @@ -999,37 +955,6 @@ void DeviceMeshT::copy_mesh_indices_to_device() deviceMeshIndices = nonconst_device_mesh_indices; } -template -void DeviceMeshT::copy_bucket_entity_offsets_to_device() -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank -void DeviceMeshT::copy_sparse_connectivities_to_device() -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank void DeviceMeshT::copy_volatile_fast_shared_comm_map_to_device() { diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp index ca5a1b61ba11..16dafae9617b 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp @@ -71,7 +71,7 @@ void verify_declare_element_edge( ? elem_top.edge_topology(local_edge_id) : invalid; STK_ThrowErrorMsgIf( elem_top!=stk::topology::INVALID_TOPOLOGY && local_edge_id >= elem_top.num_edges(), - "For elem " << mesh.identifier(elem) << ", local_edge_id " << local_edge_id << ", " << + "For elem " << mesh.identifier(elem) << " ("<(s)); m_field_meta_data.swap(sField->m_field_meta_data); @@ -517,15 +518,21 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) std::swap(m_modifiedOnHost, sField->m_modifiedOnHost); std::swap(m_modifiedOnDevice, sField->m_modifiedOnDevice); } - - for(int s = 0; s < numStates; ++s) { - NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); - if (ngpField != nullptr) { - ngpField->update_bucket_pointer_view(); - ngpField->fence(); + Kokkos::Profiling::popRegion(); + + if (!(rotateNgpFieldViews && allStatesHaveNgpFields)) { + Kokkos::Profiling::pushRegion("ngpField update_bucket_pointer_view"); + for(int s = 0; s < numStates; ++s) { + NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); + if (ngpField != nullptr) { + ngpField->update_bucket_pointer_view(); + ngpField->fence(); + } } + Kokkos::Profiling::popRegion(); } + Kokkos::Profiling::pushRegion("ngpField swap_field_views"); if (rotateNgpFieldViews && allStatesHaveNgpFields) { for (int s = 1; s < numStates; ++s) { NgpFieldBase* ngpField_sminus1 = field_state(static_cast(s-1))->get_ngp_field(); @@ -533,12 +540,13 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) ngpField_s->swap_field_views(ngpField_sminus1); } } + Kokkos::Profiling::popRegion(); } } void FieldBase::modify_on_host() const -{ +{ STK_ThrowRequireMsg(m_modifiedOnDevice == false, "Modify on host called for Field: " << name() << " but it has an uncleared modified_on_device"); @@ -556,7 +564,7 @@ FieldBase::modify_on_device() const void FieldBase::modify_on_host(const Selector& s) const -{ +{ modify_on_host(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp index 99c56712f671..59b907ca36c1 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp @@ -62,7 +62,7 @@ class BulkData; class MetaData; class UnitTestFieldImpl; class FieldBase; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { class FieldRepository; @@ -340,8 +340,8 @@ class FieldBase friend NgpFieldBase* impl::get_ngp_field(const FieldBase & stkField); friend void impl::set_ngp_field(const FieldBase & stkField, NgpFieldBase * ngpField); - template class NgpDebugger> friend class HostField; - template class NgpDebugger> friend class DeviceField; + template class NgpDebugger> friend class HostField; + template class NgpDebugger> friend class DeviceField; template friend class Field; protected: diff --git a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp index 920e47fde7d0..fb219d103710 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp @@ -41,13 +41,16 @@ namespace stk { namespace mesh { -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(execSpace); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -59,16 +62,19 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(std::forward(execSpace)); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -80,13 +86,17 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field(const FieldBase & stkField) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field(const FieldBase & stkField) { - auto& ngpFieldRef = get_updated_ngp_field_async(stkField, Kokkos::DefaultExecutionSpace()); + using ExecSpace = Kokkos::DefaultExecutionSpace; + static_assert(Kokkos::SpaceAccessibility::accessible); + + auto& ngpFieldRef = get_updated_ngp_field_async(stkField, ExecSpace()); ngpFieldRef.fence(); return ngpFieldRef; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp index fe70d29c9009..279b38e9f6dc 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp @@ -54,13 +54,14 @@ namespace stk { namespace mesh { -template class NgpDebugger> +template class NgpDebugger> class HostField : public NgpFieldBase { public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; HostField() : NgpFieldBase(), @@ -79,10 +80,10 @@ class HostField : public NgpFieldBase field->template make_field_sync_debugger(); } - HostField(const HostField&) = default; - HostField(HostField&&) = default; - HostField& operator=(const HostField&) = default; - HostField& operator=(HostField&&) = default; + HostField(const HostField&) = default; + HostField(HostField&&) = default; + HostField& operator=(const HostField&) = default; + HostField& operator=(HostField&&) = default; void update_field(const ExecSpace& newExecSpace) override { @@ -96,7 +97,7 @@ class HostField : public NgpFieldBase update_field(); } - void set_field_states(HostField* fields[]) {} + void set_field_states(HostField* fields[]) {} size_t num_syncs_to_host() const override { return field->num_syncs_to_host(); } size_t num_syncs_to_device() const override { return field->num_syncs_to_device(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp index 36823b3ede6f..04eba0b2be61 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp @@ -302,6 +302,14 @@ class HostMeshT : public NgpMeshBase return false; } + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); + } + private: stk::mesh::BulkData *bulk; size_t m_syncCountWhenUpdated; diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp index 67ac6017ab89..d1a0173ee887 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp @@ -48,6 +48,7 @@ #include "stk_mesh/base/Part.hpp" // for Part, etc #include "stk_mesh/base/Selector.hpp" // for Selector #include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank, etc +#include "stk_mesh/base/StkFieldSyncDebugger.hpp" #include "stk_mesh/baseImpl/PartRepository.hpp" // for PartRepository #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, etc @@ -145,20 +146,21 @@ void MetaData::require_valid_entity_rank( EntityRank rank ) const //---------------------------------------------------------------------- MetaData::MetaData(size_t spatial_dimension, const std::vector& entity_rank_names) - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { const size_t numRanks = stk::topology::NUM_RANKS; STK_ThrowRequireMsg(entity_rank_names.size() <= numRanks, "MetaData: number of entity-ranks (" << entity_rank_names.size() << ") exceeds limit of stk::topology::NUM_RANKS (" << numRanks <<")"); @@ -172,20 +174,21 @@ MetaData::MetaData(size_t spatial_dimension, const std::vector& ent } MetaData::MetaData() - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { // Declare the predefined parts @@ -444,59 +447,51 @@ void MetaData::internal_declare_part_subset( Part & superset , Part & subset, bo //---------------------------------------------------------------------- -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Part & arg_part , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Part& part, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - require_same_mesh_meta_data( MetaData::get(arg_part) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_part, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + require_same_mesh_meta_data(MetaData::get(part)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + part, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, part, numScalarsPerEntity, firstDimension); } -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Selector & arg_selector , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Selector& selector, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_selector, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + selector, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, selector, numScalarsPerEntity, firstDimension); } //---------------------------------------------------------------------- @@ -754,6 +749,13 @@ std::vector MetaData::get_part_aliases(const Part& part) const return std::vector(); } + +void MetaData::declare_field_sync_debugger_field(stk::mesh::FieldBase& field) +{ + FieldSyncDebugger::declare_field(field); +} + + //---------------------------------------------------------------------- //---------------------------------------------------------------------- // Verify parallel consistency of fields and parts @@ -1216,7 +1218,7 @@ get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair< } -stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension) +stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension, bool useAllFaceSideShell) { stk::topology t; @@ -1269,8 +1271,7 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SPRING_3; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<3> >()) ) { - t = stk::topology::SHELL_TRI_3; - // t = stk::topology::SHELL_TRI_3_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_3_ALL_FACE_SIDES : stk::topology::SHELL_TRI_3; } //NOTE: shards does not define a shell triangle 4 @@ -1278,21 +1279,17 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SHELL_TRI_4; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<6> >()) ) { - t = stk::topology::SHELL_TRI_6; - // t = stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_6_ALL_FACE_SIDES : stk::topology::SHELL_TRI_6; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<4> >()) ) { - t = stk::topology::SHELL_QUAD_4; - // t = stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_4; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<8> >()) ) { - t = stk::topology::SHELL_QUAD_8; - // t = stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_8; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<9> >()) ) { - t = stk::topology::SHELL_QUAD_9; - // t = stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_9; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::Tetrahedron<4> >()) ) diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp index 0585b10fbd52..8b1e13486f5f 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp @@ -108,7 +108,7 @@ inline void set_topology(Part & part) stk::topology get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair& supersets); /** get the stk::topology given a Shards Cell Topology */ -stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3); +stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3, bool useAllFaceSideShell = false); /** Get the Shards Cell Topology given a stk::topology */ shards::CellTopology get_cell_topology(stk::topology topo); @@ -590,14 +590,28 @@ class MetaData { bool delete_part_alias_case_insensitive(Part& part, const std::string& alias); std::vector get_part_aliases(const Part& part) const; + // To enable the Field Sync Debugger in a production run, add the STK_DEBUG_FIELD_SYNC + // define to your build. This function is solely used to flip external parts of the + // debugger on for unit testing when it is not enabled globally. + // + void enable_field_sync_debugger() { + m_isFieldSyncDebuggerEnabled = true; + } + + bool is_field_sync_debugger_enabled() { +#ifdef STK_DEBUG_FIELD_SYNC + return true; +#else + return m_isFieldSyncDebuggerEnabled; +#endif + } + protected: Part & declare_internal_part( const std::string & p_name); /** \} */ private: - // Functions - MetaData( const MetaData & ); ///< \brief Not allowed MetaData & operator = ( const MetaData & ); ///< \brief Not allowed @@ -611,11 +625,9 @@ class MetaData { void assign_topology(Part& part, stk::topology stkTopo); - // Members + void declare_field_sync_debugger_field(stk::mesh::FieldBase& field); BulkData* m_bulk_data; - bool m_commit ; - bool m_are_late_fields_enabled; impl::PartRepository m_part_repo ; CSet m_attributes ; @@ -641,6 +653,10 @@ class MetaData { std::map > m_partAlias; std::map> m_partReverseAlias; + bool m_commit; + bool m_are_late_fields_enabled; + bool m_isFieldSyncDebuggerEnabled; + /** \name Invariants/preconditions for MetaData. * \{ */ @@ -878,6 +894,8 @@ MetaData::declare_field(stk::topology::rank_t arg_entity_rank, f[0]->set_mesh(m_bulk_data); + declare_field_sync_debugger_field(*f[0]); + return *f[0]; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp index 063f2a2616cd..3821c5932c40 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp @@ -51,34 +51,45 @@ template class DeviceMeshT; class StkFieldSyncDebugger; class EmptyStkFieldSyncDebugger; -template class NgpFieldSyncDebugger; -template class EmptyNgpFieldSyncDebugger; +template class NgpFieldSyncDebugger; +template class EmptyNgpFieldSyncDebugger; #ifdef STK_DEBUG_FIELD_SYNC using DefaultStkFieldSyncDebugger = StkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; #else using DefaultStkFieldSyncDebugger = EmptyStkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; #endif -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class HostField; -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class DeviceField; - #ifdef STK_USE_DEVICE_MESH using NgpMeshDefaultMemSpace = stk::ngp::MemSpace; +#else + using NgpMeshDefaultMemSpace = stk::ngp::HostMemSpace; +#endif + +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class HostField; +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class DeviceField; + +#ifdef STK_USE_DEVICE_MESH template using NgpMeshT = stk::mesh::DeviceMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::DeviceField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::DeviceField; + #else - using NgpMeshDefaultMemSpace = typename stk::ngp::HostExecSpace::memory_space; template using NgpMeshT = stk::mesh::HostMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::HostField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::HostField; #endif } diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp index 3f42f77c62b9..5a5ce4e6de72 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp @@ -52,7 +52,7 @@ namespace stk { namespace mesh { //============================================================================== -template +template class EmptyNgpFieldSyncDebugger { public: @@ -111,7 +111,7 @@ class EmptyNgpFieldSyncDebugger }; //============================================================================== -template +template class NgpFieldSyncDebugger { public: @@ -293,8 +293,8 @@ class NgpFieldSyncDebugger stk::mesh::Selector fieldSelector(*(ngpField->hostField)); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; LastFieldModLocationType & localLastFieldModLocation = lastFieldModLocation; ScalarUvmType & localLostDeviceFieldData = lostDeviceFieldData; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; @@ -349,8 +349,8 @@ class NgpFieldSyncDebugger const stk::mesh::BulkData & bulk = *ngpField->hostBulk; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(bulk); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; stk::mesh::for_each_entity_run(ngpMesh, ngpField->rank, modifiedSelector, @@ -371,8 +371,8 @@ class NgpFieldSyncDebugger const stk::mesh::FieldBase & stkField = *ngpField->hostField; if (buckets.size() != 0) { - lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), - ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); + lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), + ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); lastFieldModLocation = LastFieldModLocationType(stkField.name()+"_lastModLocation", buckets.size(), ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); } @@ -490,7 +490,7 @@ class NgpFieldSyncDebugger ScalarUvmType lostDeviceFieldData; ScalarUvmType anyPotentialDeviceFieldModification; LastFieldModLocationType lastFieldModLocation; - FieldDataDeviceViewType lastFieldValue; + FieldDataDeviceViewType lastFieldValue; typename UnsignedViewType::HostMirror debugHostSelectedBucketOffset; UnsignedViewType debugDeviceSelectedBucketOffset; }; diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp index 85099a167005..b0465dd85342 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp @@ -75,14 +75,18 @@ using HostMeshIndexType = MeshIndexType::HostMirror; using BucketEntityOffsetsViewType = Kokkos::View; template using BucketEntityOffsetsViewTypeT = Kokkos::View; -template using FieldDataDeviceViewType = Kokkos::View; -template using FieldDataHostViewType = Kokkos::View; +template +using FieldDataDeviceViewType = Kokkos::View; +template +using FieldDataHostViewType = Kokkos::View; using FieldDataPointerHostViewType = Kokkos::View; using FieldDataPointerDeviceViewType = Kokkos::View; -template using UnmanagedHostInnerView = Kokkos::View>; -template using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedHostInnerView = Kokkos::View>; #ifdef STK_USE_DEVICE_MESH #define ORDER_INDICES(i,j) j,i diff --git a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp index 3930bae52dec..341264f6ca51 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp @@ -473,11 +473,11 @@ void SideSetHelper::add_sideset_entry_for_element_selected_by_sidesets(Entity en { if(mesh.bucket_ptr(entity) == nullptr) { return; } - const unsigned numSides = mesh.num_sides(entity); + const unsigned numSides = stk::mesh::num_sides(mesh, entity); if(sidesetsAndSelectors.size() > 0 && mesh.entity_rank(entity) == stk::topology::ELEM_RANK && numSides > 0) { - const stk::mesh::ConnectivityOrdinal* ordinals = mesh.begin_ordinals(entity, mesh.mesh_meta_data().side_rank()); - const stk::mesh::Entity* sides = mesh.begin(entity, mesh.mesh_meta_data().side_rank()); + const std::vector ordinals = stk::mesh::get_side_ordinals(mesh, entity); + const stk::mesh::EntityVector sides = stk::mesh::get_sides(mesh, entity); stk::mesh::SideSetEntry entry(entity); diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp index 12e5f4e0d4a4..b4a3875e5073 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp @@ -38,10 +38,61 @@ #include "MetaData.hpp" #include "FieldRestriction.hpp" #include "stk_mesh/baseImpl/BucketRepository.hpp" +#include "stk_util/util/string_utils.hpp" namespace stk { namespace mesh { +const static std::string s_lastFieldModLocationPrefix = "DEBUG_lastFieldModLocation_"; + +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + meta.declare_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name(), + field.number_of_states()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, part, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, selector, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +} + StkFieldSyncDebugger::StkFieldSyncDebugger(const FieldBase* stkField) : m_stkField(stkField), m_isDataInitialized(false) @@ -133,7 +184,8 @@ StkFieldSyncDebugger::fill_last_mod_location_field_from_device() for (unsigned ordinal = 0; ordinal < bucket->size(); ++ordinal) { const Entity & entity = (*bucket)[ordinal]; const unsigned numComponents = field_scalars_per_entity(lastModLocationField, entity); - uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, entity)); + uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, + entity)); for (unsigned component = 0; component < numComponents; ++component) { const unsigned bucketOffset = ngpField.debug_get_bucket_offset(bucket->bucket_id()); lastModLocation[component] = m_debugFieldLastModification(bucketOffset, ORDER_INDICES(ordinal, component)); @@ -147,30 +199,10 @@ StkFieldSyncDebugger::get_last_mod_location_field() const { if (m_lastModLocationField == nullptr) { STK_ThrowRequire(impl::get_ngp_field(*m_stkField) != nullptr); - BulkData & bulk = m_stkField->get_mesh(); - MetaData & meta = bulk.mesh_meta_data(); - meta.enable_late_fields(); - FieldState state = m_stkField->state(); - FieldBase* fieldWithStateNew = m_stkField->field_state(stk::mesh::StateNew); - Field & lastModLocationField = - meta.declare_field(m_stkField->entity_rank(), - "DEBUG_lastFieldModLocation_"+fieldWithStateNew->name(), - m_stkField->number_of_states()); - - meta.set_mesh_on_fields(&bulk); - const FieldBase::RestrictionVector & fieldRestrictions = m_stkField->restrictions(); - if (not fieldRestrictions.empty()) { - for (const FieldBase::Restriction & restriction : fieldRestrictions) { - const unsigned numComponents = restriction.num_scalars_per_entity(); - std::vector initLastModLocation(numComponents, LastModLocation::HOST_OR_DEVICE); - put_field_on_mesh(lastModLocationField, restriction.selector(), numComponents, initLastModLocation.data()); - } - } - else { - bulk.reallocate_field_data(lastModLocationField); - } - - m_lastModLocationField = lastModLocationField.field_state(state); + MetaData& meta = m_stkField->mesh_meta_data(); + m_lastModLocationField = meta.get_field(m_stkField->entity_rank(), + s_lastFieldModLocationPrefix + m_stkField->name()); + STK_ThrowRequire(m_lastModLocationField != nullptr); } return *m_lastModLocationField; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp index b9c2be4e2189..184d77d1be88 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp @@ -48,6 +48,16 @@ namespace mesh { class Bucket; class FieldBase; +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field); +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension); +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension); + +} + class EmptyStkFieldSyncDebugger { public: diff --git a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp index 4457df3f1796..ebf41a176298 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp @@ -116,6 +116,7 @@ struct MeshIndex Bucket* bucket; unsigned bucket_ordinal; + STK_FUNCTION MeshIndex(Bucket *bucketIn, size_t ordinal) : bucket(bucketIn), bucket_ordinal(ordinal) {} }; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp index f5fa92071b4a..a703517b4c6f 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp @@ -65,7 +65,7 @@ class BucketConnDynamic m_ordinals(), m_permutations(), m_numUnusedEntries(0), - m_compressionThreshold(0.5) + m_compressionThreshold(2) { STK_ThrowRequireMsg(bucketCapacity > 0, "BucketConnDynamic must have bucketCapacity strictly greater than 0"); } @@ -258,7 +258,7 @@ class BucketConnDynamic size_t total_num_connectivity() const { return m_connectivity.size() - m_numUnusedEntries; } size_t num_unused_entries() const { return m_numUnusedEntries; } - void compress_connectivity(unsigned suggestedCapacity = 0) + void compress_connectivity() { if (m_numUnusedEntries == 0) { return; @@ -356,9 +356,10 @@ class BucketConnDynamic Permutation perm = INVALID_PERMUTATION) { static constexpr unsigned minSizeHeuristic = 256; - if (total_num_connectivity() > minSizeHeuristic && (static_cast(m_numUnusedEntries)/total_num_connectivity()) > m_compressionThreshold) + if ((total_num_connectivity() > minSizeHeuristic) && + (total_num_connectivity() < m_numUnusedEntries*m_compressionThreshold)) { - compress_connectivity(total_num_connectivity()+m_numUnusedEntries/2); + compress_connectivity(); } grow_if_necessary(bktOrdinal); @@ -501,7 +502,7 @@ class BucketConnDynamic std::vector m_ordinals; std::vector m_permutations; unsigned m_numUnusedEntries; - double m_compressionThreshold; + int m_compressionThreshold; }; } // namespace impl diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp index 9f58a66dd2fc..afde826b7583 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp @@ -37,10 +37,10 @@ #include // for operator new #include // for operator<<, etc #include // for runtime_error -#include // for Bucket, raw_part_equal +#include // for Bucket #include // for BulkData, etc #include -#include // for Partition, lower_bound +#include // for Partition, upper_bound #include #include #include "stk_mesh/base/BucketConnectivity.hpp" // for BucketConnectivity @@ -162,85 +162,31 @@ void BucketRepository::ensure_data_structures_sized() } } -//// -//// Note that we need to construct a key vector that the particular -//// format so we can use the lower_bound(..) function to lookup the -//// partition. Because we are using partitions now instead of -//// buckets, it should be possible to do without that vector and -//// instead do the lookup directly from the OrdinalVector. -//// - Partition *BucketRepository::get_or_create_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts) { - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - std::vector::iterator ik; - Partition* partition = get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + Partition* partition = get_partition(arg_entity_rank, parts, ik); if(partition == nullptr) { - partition = create_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + partition = create_partition(arg_entity_rank, parts, ik); } return partition; } -void BucketRepository::fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec) -{ - const size_t part_count = parts.size(); - - const size_t keyLen = 1 + part_count; - - *keyPtr = keyTmpBuffer; - *keyEnd = *keyPtr+keyLen; - - if (keyLen >= maxKeyTmpBufferSize) { - keyTmpVec.resize(keyLen); - *keyPtr = keyTmpVec.data(); - *keyEnd = *keyPtr+keyLen; - } - - //---------------------------------- - // Key layout: - // { part_count , { part_ordinals } } - // - (*keyPtr)[0] = part_count; - - for ( unsigned i = 0 ; i < part_count ; ++i ) { - (*keyPtr)[i+1] = parts[i]; - } -} - Partition *BucketRepository::get_partition(const EntityRank arg_entity_rank, const OrdinalVector &parts) { - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; std::vector::iterator ik; - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - - return get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + return get_partition(arg_entity_rank, parts, ik); } Partition *BucketRepository::get_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { STK_ThrowAssertMsg(m_mesh.mesh_meta_data().check_rank(arg_entity_rank), "Entity rank " << arg_entity_rank << " is invalid"); @@ -249,12 +195,12 @@ Partition *BucketRepository::get_partition( std::vector & partitions = m_partitions[ arg_entity_rank ]; - ik = lower_bound( partitions , keyPtr ); - const bool partition_exists = (ik != partitions.end()) && raw_part_equal( (*ik)->key() , keyPtr ); + ik = upper_bound( partitions , parts ); + const bool partition_exists = (ik != partitions.begin() && (ik[-1])->get_legacy_partition_id() == parts ); if (partition_exists) { - return *ik; + return ik[-1]; } return nullptr; @@ -263,11 +209,9 @@ Partition *BucketRepository::get_partition( Partition* BucketRepository::create_partition( const EntityRank arg_entity_rank, const OrdinalVector& parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { - Partition *partition = new Partition(m_mesh, this, arg_entity_rank, keyPtr, keyEnd); + Partition *partition = new Partition(m_mesh, this, arg_entity_rank, parts.data(), parts.data()+parts.size()); STK_ThrowRequire(partition != nullptr); m_need_sync_from_partitions[arg_entity_rank] = true; @@ -411,8 +355,7 @@ Bucket *BucketRepository::allocate_bucket(EntityRank entityRank, unsigned initialCapacity, unsigned maximumCapacity) { - std::vector tmp(key.begin()+1,key.end()); - STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(tmp,std::less()), + STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(key,std::less()), "bucket created with 'key' vector that's not sorted and unique"); BucketVector &bucket_vec = m_buckets[entityRank]; const unsigned bucket_id = bucket_vec.size(); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp index 27366e248101..801eb0076cf8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp @@ -120,15 +120,11 @@ class BucketRepository Partition *get_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); Partition *create_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); // For use by BulkData::internal_modification_end(). void internal_modification_end(); @@ -168,14 +164,9 @@ class BucketRepository void ensure_data_structures_sized(); - void fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec); + BulkData & m_mesh ; - - BulkData & m_mesh ; // Associated Bulk Data Aggregate - - // Vector of bucket pointers by rank. This is now a cache and no longer the primary - // location of Buckets when USE_STK_MESH_IMPL_PARTITION is #defined. + // Vector of bucket pointers for each rank. std::vector< BucketVector > m_buckets ; std::vector > m_partitions; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp index a1a0cbfd8afe..51feaff89523 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp @@ -640,11 +640,11 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, OrdinalVector entity_node_ordinals(entity_top.num_nodes()); elem_top.sub_topology_node_ordinals(mesh.entity_rank(entity), relationOrdinal, entity_node_ordinals.data()); - const stk::mesh::Entity *elem_nodes = mesh.begin_nodes(elem); + stk::mesh::EntityVector elem_nodes(mesh.begin_nodes(elem),mesh.end_nodes(elem)); EntityVector entity_top_nodes(entity_top.num_nodes()); - elem_top.sub_topology_nodes(elem_nodes, mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); + elem_top.sub_topology_nodes(elem_nodes.data(), mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); - Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes, entity_top, entity_top_nodes.data(), relationOrdinal); + Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes.data(), entity_top, entity_top_nodes.data(), relationOrdinal); OrdinalVector scratch1, scratch2, scratch3; @@ -676,10 +676,9 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, if(0 == num_side_nodes) { Permutation node_perm = stk::mesh::Permutation::INVALID_PERMUTATION; - Entity const *elem_nodes_local = mesh.begin_nodes(elem); for(unsigned i = 0; i < entity_top.num_nodes(); ++i) { - Entity node = elem_nodes_local[entity_node_ordinals[i]]; + Entity node = elem_nodes[entity_node_ordinals[i]]; mesh.declare_relation(entity, node, i, node_perm, scratch1, scratch2, scratch3); } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp index 2566e59bb9ee..a1fa3b4eecd8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp @@ -55,8 +55,13 @@ bool MeshModification::modification_begin(const std::string description) const stk::mesh::FieldVector allFields = m_bulkData.mesh_meta_data().get_fields(); for (FieldBase * stkField : allFields) { stkField->sync_to_host(); - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_begin(); + } + + if (m_bulkData.mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : allFields) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_begin(); + } } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp index 7af403e25567..8f3f1d0f49ea 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp @@ -53,11 +53,6 @@ template struct NgpMeshHostData : NgpMeshHostDataBase { typename EntityKeyViewTypeT::HostMirror hostEntityKeys; - typename BucketEntityOffsetsViewTypeT::HostMirror hostBucketEntityOffsets[stk::topology::NUM_RANKS]; - typename UnsignedViewTypeT::HostMirror hostEntityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename EntityViewTypeT::HostMirror hostSparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename OrdinalViewTypeT::HostMirror hostSparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename PermutationViewTypeT::HostMirror hostSparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; typename UnsignedViewTypeT::HostMirror hostVolatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; typename NgpCommMapIndicesT::HostMirror hostVolatileFastSharedCommMap[stk::topology::NUM_RANKS]; unsigned volatileFastSharedCommMapSyncCount = 0; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp index cc95d6223656..8d9a0387d908 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp @@ -326,7 +326,7 @@ stk::mesh::FieldVector get_fields_for_bucket(const stk::mesh::BulkData& mesh, void Partition::sort(const EntitySorterBase& sorter) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); std::vector entities(m_size); @@ -540,7 +540,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() clear_pending_removes_by_filling_from_end(); if (no_buckets()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); Bucket *bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); @@ -554,7 +554,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() if (bucket->size() == bucket->capacity()) { if (bucket->size() == m_repository->get_maximum_bucket_capacity()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp index a10a754fb2a2..97916dc9525e 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp @@ -209,35 +209,26 @@ class Partition std::ostream &operator<<(std::ostream &, const stk::mesh::impl::Partition &); -inline -bool partition_key_less( const unsigned * lhs , const unsigned * rhs ) -{ -// The following (very old) code is clever... So I'm adding some comments. -// -// A partition key is an array of unsigned, laid out like this: -// key[num-part-ordinals, first-part-ordinal, ..., last-part-ordinal] - - if (*lhs == *rhs) { //num-part-ordinals is equal for lhs and rhs... - const unsigned * const last_lhs = lhs + *lhs; - do { - ++lhs ; ++rhs ; - } while ( last_lhs != lhs && *lhs == *rhs ); - } - return *lhs < *rhs; -} - struct PartitionLess { - bool operator()( const Partition * lhs_Partition , const unsigned * rhs ) const - { return partition_key_less( lhs_Partition->key() , rhs ); } + bool operator()( const Partition * lhs_Partition , const OrdinalVector& rhs ) const + { + return lhs_Partition->get_legacy_partition_id().size() != rhs.size() ? + lhs_Partition->get_legacy_partition_id().size() < rhs.size() : + lhs_Partition->get_legacy_partition_id() < rhs; + } - bool operator()( const unsigned * lhs , const Partition * rhs_Partition ) const - { return partition_key_less( lhs , rhs_Partition->key() ); } + bool operator()( const OrdinalVector& lhs , const Partition * rhs_Partition ) const + { + return lhs.size() != rhs_Partition->get_legacy_partition_id().size() ? + lhs.size() < rhs_Partition->get_legacy_partition_id().size() : + lhs < rhs_Partition->get_legacy_partition_id(); + } }; inline std::vector::iterator -lower_bound( std::vector & v , const unsigned * key ) -{ return std::lower_bound( v.begin() , v.end() , key , PartitionLess() ); } +upper_bound( std::vector & v , const OrdinalVector& key ) +{ return std::upper_bound( v.begin() , v.end() , key , PartitionLess() ); } } // impl } // mesh diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp index 4f19954f4e60..6953533e6049 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp @@ -10,6 +10,10 @@ namespace impl { class AbstractCDTInterface { + public: + virtual ~AbstractCDTInterface() = default; + + private: virtual void triangulate(const utils::impl::Projection& proj) = 0; }; diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp index bddbe88eb692..de5dea0daa9a 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp @@ -288,20 +288,20 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi constexpr unsigned maxNumEdgeNodes = 3; std::vector edgeNodes(maxNumEdgeNodes); std::vector edgeVerts(maxNumEdgeNodes); - + const std::vector& surfaceElems = mesh->get_elements(); for(const mesh::MeshEntityPtr& elem : surfaceElems) { if (elem) { const stk::mesh::SideSetEntry& ssetEntry = (*stkElsField)(elem, 0, 0); stk::mesh::Entity stkEl = ssetEntry.element; - + const bool stkElemIsFace = ssetEntry.side != stk::mesh::INVALID_CONNECTIVITY_ORDINAL; if (stkElemIsFace) { stkEl = stk::mesh::get_side_entity_for_elem_side_pair(bulk, stkEl, ssetEntry.side); } - + stk::topology stkTopo = bulk.bucket(stkEl).topology(); - + const stk::mesh::Entity* nodes = bulk.begin_nodes(stkEl); for(int dn=0; dncount_down(); ++dn) { @@ -309,7 +309,7 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi STK_ThrowRequire((edgeEnt && edgeEnt->get_type() == mesh::MeshEntityType::Edge)); edgeNodes.resize(edgeEnt->count_down()); stkTopo.edge_nodes(nodes, dn, edgeNodes.data()); - + edgeVerts.resize(edgeEnt->count_down()); for(int n=0; ncount_down(); ++n) { diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp index f66daa9cacae..075445307309 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp @@ -28,7 +28,7 @@ stk::mesh::Field* StkFieldCopier::create_stk_field(mesh::FieldPtrmesh_meta_data_ptr(); stk::mesh::Field* stkField = &(metaData->declare_field(stk::topology::NODE_RANK, name)); - stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), + stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), middleMeshField->get_field_shape().get_num_nodes(0), 0); return stkField; @@ -38,7 +38,8 @@ void StkFieldCopier::copy(const stk::mesh::Field& stkField, mesh::FieldP { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -62,7 +63,8 @@ void StkFieldCopier::copy(const mesh::FieldPtr middleMeshFieldPtr, stk:: { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -109,7 +111,7 @@ void StkFieldCopier::check_field_shapes(const stk::mesh::Field& stkField std::string("Field shapes not compatible: stk field has ") + std::to_string(stk_field_dims.second) + " components per node, while the middle mesh field has " + std::to_string(meshField->get_num_comp()) ); - } + } } std::pair StkFieldCopier::get_field_shape_and_num_components(const stk::mesh::Field& stkField) diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp index 617d1d095b46..f318776a5239 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp @@ -5,6 +5,12 @@ #include "stk_util/ngp/NgpSpaces.hpp" #include "NgpTestDeviceMacros.hpp" +// RDC is required for HIP build since registering a static global variable +// on an inline variable is not functional as of rocm 6.2.7 +#if defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) +#error "Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE is required for HIP build" +#endif + namespace ngp_testing { template diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp index 0aac6f0aa609..605d54f699dc 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp @@ -6,10 +6,4 @@ #define NGP_TEST_FUNCTION KOKKOS_FUNCTION #define NGP_TEST_INLINE KOKKOS_INLINE_FUNCTION -#ifdef STK_ENABLE_GPU -#define NGP_TEST_DEVICE_ONLY __device__ -#else -#define NGP_TEST_DEVICE_ONLY -#endif - #endif diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp index 0f25e3439677..744c2666c36e 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp @@ -63,12 +63,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NUM_TO_STR(x) NGP_TEST_STRINGIZE(x) #define LOCATION __FILE__ ":" NUM_TO_STR(__LINE__) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_TRUE(cond) -#define NGP_ASSERT_TRUE(cond) - -#else #define NGP_EXPECT_TRUE(cond) \ do { \ if (!(cond)) { \ @@ -83,7 +77,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif #define NGP_EXPECT_FALSE(cond) NGP_EXPECT_TRUE(!(cond)) #define NGP_ASSERT_FALSE(cond) NGP_ASSERT_TRUE(!(cond)) @@ -106,12 +99,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NGP_EXPECT_GE(a, b) NGP_EXPECT_TRUE((a) >= (b)) #define NGP_ASSERT_GE(a, b) NGP_ASSERT_TRUE((a) >= (b)) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_NEAR(a, b, tolerance) -#define NGP_ASSERT_NEAR(a, b, tolerance) - -#else #define NGP_EXPECT_NEAR(a, b, tolerance) \ do { \ if (!::ngp_testing::internal::expect_near(a, b, tolerance)) { \ @@ -126,7 +113,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif namespace ngp_testing { diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp index 42dbab9fc055..05a429c2a069 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp @@ -74,9 +74,14 @@ class NgpMeshChangeElementPartMembership : public stk::unit_test_util::MeshFixtu void batch_change_element_part_membership(int cycle) { + Kokkos::Profiling::pushRegion("BulkData::batch_change_entity_parts"); get_bulk().batch_change_entity_parts(stk::mesh::EntityVector{get_element(cycle)}, stk::mesh::PartVector{get_part()}, {}); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); stk::mesh::get_updated_ngp_mesh(get_bulk()); + Kokkos::Profiling::popRegion(); } private: @@ -232,9 +237,14 @@ TEST_F( NgpMeshChangeElementPartMembership, TimingBatch ) batchTimer.start_batch_timer(); setup_host_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + Kokkos::Profiling::pushRegion("batch_change_element_part_membership"); + for (int i = 0; i < NUM_ITERS; i++) { batch_change_element_part_membership(i); } + + Kokkos::Profiling::popRegion(); + batchTimer.stop_batch_timer(); reset_mesh(); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp new file mode 100644 index 000000000000..44a7a65b42ce --- /dev/null +++ b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp @@ -0,0 +1,136 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkNgpField, multiStateRotation) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + if (stk::parallel_machine_size(comm) > 1) { GTEST_SKIP(); } + + const unsigned NUM_RUNS = 5; + const unsigned NUM_ITERS = 3000; + std::string meshSpec = "generated:80x80x80"; + + std::cout << "Using mesh-spec: " << meshSpec << std::endl; + + stk::unit_test_util::BatchTimer batchTimer(comm); + + batchTimer.initialize_batch_timer(); + + std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(comm) + .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) + .set_spatial_dimension(3) + .create(); + + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + const int numFieldStates = 3; + stk::mesh::Field& tensorField1 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField1", numFieldStates); + stk::mesh::Field& tensorField2 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField2", numFieldStates); + stk::mesh::Field& vectorField1 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField1", numFieldStates); + stk::mesh::Field& vectorField2 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField2", numFieldStates); + stk::mesh::put_field_on_mesh(tensorField1, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(tensorField2, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(vectorField1, meta.universal_part(), 3, nullptr); + stk::mesh::put_field_on_mesh(vectorField2, meta.universal_part(), 3, nullptr); + + stk::io::fill_mesh(meshSpec, *bulkPtr); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(*bulkPtr); + EXPECT_FALSE(ngpMesh.need_sync_to_host()); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("initialize fields"); + stk::ngp::ExecSpace execSpace; + constexpr double initValue1 = 1.14; + constexpr double initValue2 = 3.14; + for(int s=0; s(s); + stk::mesh::Field& tensorField1_state = tensorField1.field_of_state(state); + stk::mesh::Field& tensorField2_state = tensorField2.field_of_state(state); + stk::mesh::Field& vectorField1_state = vectorField1.field_of_state(state); + stk::mesh::Field& vectorField2_state = vectorField2.field_of_state(state); + stk::mesh::field_fill(initValue1, tensorField1_state, execSpace); + stk::mesh::field_fill(initValue2, tensorField2_state, execSpace); + stk::mesh::field_fill(initValue1, vectorField1_state, execSpace); + stk::mesh::field_fill(initValue2, vectorField2_state, execSpace); + } + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("multiStateRotation test"); + + for (unsigned j = 0; j < NUM_RUNS; j++) { + + batchTimer.start_batch_timer(); + + for(unsigned i=0; iupdate_field_data_states(rotateNgpFieldViews); + Kokkos::Profiling::popRegion(); + } + + batchTimer.stop_batch_timer(); + } + + Kokkos::Profiling::popRegion(); + batchTimer.print_batch_timing(NUM_ITERS); +} + +} diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp index 3cc7847388fd..a4b6c074c663 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp @@ -114,14 +114,18 @@ void run_volume_to_one_test_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, IdentProc(pRank, pRank)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, IdentProc(pRank, pRank)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { @@ -219,14 +223,19 @@ void run_volume_to_one_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; - + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; + for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; @@ -259,14 +268,19 @@ void run_one_to_volume_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index ff76f6ad9c5d..0b6adaae9421 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -628,6 +628,8 @@ struct UpdateInteriorNodeBVs KOKKOS_INLINE_FUNCTION void operator()(unsigned argIdx) const; + KOKKOS_INLINE_FUNCTION + void check_tree(unsigned argIdx) const; KOKKOS_FORCEINLINE_FUNCTION void get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_3d_view_amt &boxesMinMax) const; @@ -661,10 +663,71 @@ template void UpdateInteriorNodeBVs::apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { const UpdateInteriorNodeBVs op(tree); - const size_t numLeaves = tree.hm_numLeaves(); + const size_t numLeaves = tree.hm_numLeaves(); auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); - Kokkos::parallel_for(policy, op); + Kokkos::parallel_for("UpdateInteriorNodeBVs", policy, op); + Kokkos::parallel_for("check_tree", policy, KOKKOS_LAMBDA(const unsigned& argIdx){op.check_tree(argIdx);}); +} + +template +KOKKOS_INLINE_FUNCTION +void UpdateInteriorNodeBVs::check_tree(unsigned argIdx) const +{ + if (m_numLeaves > 1) { + LocalOrdinal idx = static_cast(argIdx); + + RealType bvMinMax[6]; + + LocalOrdinal parent = tm_nodeParents(idx); + RealType sibMinMax[6]; + + constexpr RealType tol = std::numeric_limits::epsilon(); + bool fixedBox = false; + + while (idx != parent) { + const LocalOrdinal parentIdx = parent - m_numLeaves; + + const bool boxIsAllZeros = ((m_nodeMinMaxs(parentIdx, 0) < tol) + &&(m_nodeMinMaxs(parentIdx, 1) < tol) + &&(m_nodeMinMaxs(parentIdx, 2) < tol) + &&(m_nodeMinMaxs(parentIdx, 3) < tol) + &&(m_nodeMinMaxs(parentIdx, 4) < tol) + &&(m_nodeMinMaxs(parentIdx, 5) < tol)); + if (boxIsAllZeros || fixedBox) { + const LocalOrdinal sib0 = tm_nodeChildren(parent, 0); + const LocalOrdinal sib1 = tm_nodeChildren(parent, 1); + + if (sib0 < m_numLeaves) { + get_stk_box(bvMinMax, sib0, m_leafMinMaxs); + } + else { + get_box(bvMinMax, sib0-m_numLeaves, m_nodeMinMaxs); + } + + if (sib1 < m_numLeaves) { + get_stk_box(sibMinMax, sib1, m_leafMinMaxs); + } + else { + get_box(sibMinMax, sib1-m_numLeaves, m_nodeMinMaxs); + } + + m_nodeMinMaxs(parentIdx, 0) = AABB_MIN(bvMinMax[0], sibMinMax[0]); + m_nodeMinMaxs(parentIdx, 1) = AABB_MIN(bvMinMax[1], sibMinMax[1]); + m_nodeMinMaxs(parentIdx, 2) = AABB_MIN(bvMinMax[2], sibMinMax[2]); + m_nodeMinMaxs(parentIdx, 3) = AABB_MAX(bvMinMax[3], sibMinMax[3]); + m_nodeMinMaxs(parentIdx, 4) = AABB_MAX(bvMinMax[4], sibMinMax[4]); + m_nodeMinMaxs(parentIdx, 5) = AABB_MAX(bvMinMax[5], sibMinMax[5]); + fixedBox = true; + } + + idx = parent; + parent = tm_nodeParents(parent); + if (idx == parent) { + return; + } + } + } } template diff --git a/packages/stk/stk_topology/stk_topology/topology_defn.hpp b/packages/stk/stk_topology/stk_topology/topology_defn.hpp index 319635eb70bf..e72e05cb7db2 100644 --- a/packages/stk/stk_topology/stk_topology/topology_defn.hpp +++ b/packages/stk/stk_topology/stk_topology/topology_defn.hpp @@ -76,13 +76,7 @@ void topology::sub_topology_node_ordinals(unsigned sub_rank, unsigned sub_ordina { case NODE_RANK: *output_ordinals = sub_ordinal; break; case EDGE_RANK: edge_node_ordinals(sub_ordinal, output_ordinals); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_node_ordinals(sub_ordinal - num_faces(), output_ordinals); - } else { - face_node_ordinals(sub_ordinal, output_ordinals); - } - break; + case FACE_RANK: face_node_ordinals(sub_ordinal, output_ordinals); break; default: break; } } @@ -95,13 +89,7 @@ void topology::sub_topology_nodes(const NodeArray & nodes, unsigned sub_rank, un { case NODE_RANK: *output_nodes = nodes[sub_ordinal]; break; case EDGE_RANK: edge_nodes(nodes, sub_ordinal, output_nodes); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_nodes(nodes, sub_ordinal - num_faces(), output_nodes); - } else { - face_nodes(nodes, sub_ordinal, output_nodes); - } - break; + case FACE_RANK: face_nodes(nodes, sub_ordinal, output_nodes); break; default: break; } } @@ -126,11 +114,7 @@ topology topology::sub_topology(unsigned sub_rank, unsigned sub_ordinal) const { case NODE_RANK: return NODE; case EDGE_RANK: return edge_topology(sub_ordinal); - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - return edge_topology(sub_ordinal - num_faces()); - } - return face_topology(sub_ordinal); + case FACE_RANK: return face_topology(sub_ordinal); default: break; } return INVALID_TOPOLOGY; @@ -140,22 +124,20 @@ template STK_INLINE_FUNCTION void topology::side_node_ordinals(unsigned side_ordinal, OrdinalOutputIterator output_ordinals) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_node_ordinals(EDGE_RANK, side_ordinal-num_faces(), output_ordinals); - } else { - sub_topology_node_ordinals( side_rank(), side_ordinal, output_ordinals); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_node_ordinals(side_rank(side_ordinal), adjusted_ordinal, output_ordinals); } template STK_INLINE_FUNCTION void topology::side_nodes(const NodeArray & nodes, unsigned side_ordinal, NodeOutputIterator output_nodes) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_nodes( nodes, EDGE_RANK, side_ordinal-num_faces(), output_nodes); - } else { - sub_topology_nodes( nodes, side_rank(), side_ordinal, output_nodes); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_nodes(nodes, side_rank(side_ordinal), adjusted_ordinal, output_nodes); } STK_INLINE_FUNCTION @@ -165,7 +147,7 @@ unsigned topology::num_sides() const if (side_rank() != INVALID_RANK) { num_sides_out = side_rank() > NODE_RANK ? num_sub_topology(side_rank()) : num_vertices(); - if (has_mixed_rank_sides()) { + if (has_mixed_rank_sides() && side_rank() > EDGE_RANK) { num_sides_out += num_sub_topology(EDGE_RANK); } } @@ -175,10 +157,10 @@ unsigned topology::num_sides() const STK_INLINE_FUNCTION topology topology::side_topology(unsigned side_ordinal) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) - return shell_side_topology(side_ordinal-num_faces()); + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; - return sub_topology(side_rank(), side_ordinal); + return sub_topology(side_rank(side_ordinal), adjusted_ordinal); } STK_INLINE_FUNCTION diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp index 49ec7291e66f..383d4132448b 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp @@ -50,6 +50,7 @@ class SearchById { using KeyToTargetProcessor = std::vector>; using MeshIDSet = std::set; + virtual ~SearchById() = default; virtual void intialize(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb) =0; virtual void do_search(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb, diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp index 02edd92df1c3..1648da606e66 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp @@ -111,6 +111,7 @@ class TranslatorBase TranslatorBase() {} virtual void translate(const void* srcAddr, unsigned srcDataByteSize, DataTypeKey::data_t destType, void* destAddr, unsigned destDataByteSize) const = 0; + virtual ~TranslatorBase() = default; }; struct TranslatorInfo diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp index 1f52825509ad..977e40aa4fca 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp @@ -45,7 +45,18 @@ struct StkTopologyMapEntry { bool operator!=(const StkTopologyMapEntry &rhs) const { return !(*this == rhs); } - int num_sides() const { return topology.num_sides(); } + int num_face_sides() const { + return 2; // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + + int num_sides() const { + if (topology.is_shell()) { + if (topology.dimension() == 3) { + return num_face_sides(); // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + } + return topology.num_sides(); + } bool valid_side(unsigned side) const { diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp index cd873a952e1c..a53ab4ac9fdb 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp @@ -33,14 +33,12 @@ // #include // for to_string -#include // for Utils #include // for size_t -#include // for ostream +#include #include #include // for Field #include // for MetaData #include -#include // for allocator, operator+, etc #include // for vector #include "gtest/gtest.h" // for AssertHelper #include "stk_mesh/base/Types.hpp" // for PartVector @@ -49,9 +47,10 @@ enum { SpaceDim = 3 }; TEST(UnitTestGmeshFixture, testUnit) { + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 4) { GTEST_SKIP(); } const size_t num_x = 1; const size_t num_y = 2; - const size_t num_z = 3; + const size_t num_z = 4; const size_t num_surf = 6; std::string config_mesh = std::to_string(num_x) + "x" + std::to_string(num_y) + "x" + diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp index c2957d78e73e..0a07a1462b7b 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp @@ -33,6 +33,7 @@ // #include "gtest/gtest.h" // for AssertHelper, ASSERT_TRUE +#include #include // for is_part_io_part #include // for StkMeshIoBroker #include // for BulkData @@ -130,4 +131,40 @@ TEST(StkMeshIoBroker, missingInputField) { unlink(fieldDataFile.c_str()); } +TEST(StkMeshIoBroker, testMissingInputField) { + const std::string fieldDataFile = "testMeshWithMissingFieldData.e"; + std::vector transientTimeSteps = {0.0, 1.0, 2.0}; + std::string transientFieldName = "transient_field"; + + write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); + + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + stk::mesh::MetaData& meta = bulk->mesh_meta_data(); + + const stk::mesh::EntityRank rank = stk::topology::NODE_RANK; + + const std::string fieldNameBad = transientFieldName+"_scalar_bad_field"; + stk::mesh::Field &scalarFieldBad = meta.declare_field(rank, fieldNameBad, 1); + stk::mesh::put_field_on_mesh(scalarFieldBad, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldBad(&scalarFieldBad, fieldNameBad); + + const std::string fieldNameGood = transientFieldName+"_scalar"; + stk::mesh::Field &scalarFieldGood = meta.declare_field(rank, fieldNameGood, 1); + stk::mesh::put_field_on_mesh(scalarFieldGood, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldGood(&scalarFieldGood, fieldNameGood); + + stk::io::StkMeshIoBroker broker(MPI_COMM_WORLD); + + broker.set_bulk_data(*bulk); + broker.add_mesh_database(fieldDataFile, stk::io::READ_MESH); + broker.create_input_mesh(); + + EXPECT_FALSE(stk::io::verify_field_request(broker, meshFieldBad)); + EXPECT_TRUE(stk::io::verify_field_request(broker, meshFieldGood)); + + broker.populate_bulk_data(); + + unlink(fieldDataFile.c_str()); +} + } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp index dd064bd2b7a7..6aa92e3d9685 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp @@ -125,7 +125,7 @@ TEST(UnitTestingOfBucket, testBucket) std::stringstream out1_str; out1_str << (*b1); bool equal = (gold1 == out1_str.str()); - ASSERT_TRUE(equal); + ASSERT_TRUE(equal)<<"expected str="< bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + const std::string meshDesc = + "0,1,SHELL_TRI_3_ALL_FACE_SIDES, 1,2,3, block_1\n\ + 0,2,SHELL_TRI_3_ALL_FACE_SIDES, 2,4,3, block_1"; + + std::vector coords = {0,1,0, 1,0,0, 1,2,0, 2,1,0}; + +//FIXME! text-mesh doesn't recognize the all-face-sides topologies. + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); +} + +void check_ordinal_and_permutation(const stk::mesh::BulkData& bulk, + stk::mesh::Entity elem, + stk::mesh::EntityRank rank, + const stk::mesh::EntityVector& sideNodes, + stk::mesh::ConnectivityOrdinal expectedSideOrdinal, + stk::mesh::Permutation expectedPerm) +{ + stk::mesh::OrdinalAndPermutation ordPerm = + stk::mesh::get_ordinal_and_permutation(bulk, elem, rank, sideNodes); + EXPECT_EQ(expectedSideOrdinal, ordPerm.first); + EXPECT_EQ(expectedPerm, ordPerm.second); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_no_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + + stk::mesh::EntityVector sideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 2), + bulk->get_entity(stk::topology::NODE_RANK, 3) + }; + stk::mesh::ConnectivityOrdinal expectedSideOrdinal = sideOrdinal; + stk::mesh::Permutation expectedPerm = static_cast(0); + std::cout<<"checking elem1/sideNodes"<get_entity(stk::topology::ELEM_RANK, 2); + expectedSideOrdinal = 4; + stk::mesh::EntityVector reversedSideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 3), + bulk->get_entity(stk::topology::NODE_RANK, 2) + }; + expectedPerm = static_cast(0); + std::cout<<"checking elem2/reversedSideNodes"<(1); + std::cout<<"checking elem2/sideNodes"<declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + +//FIXME! +//The following expect should be '2u' but that fails because the side is only +//onnected to 1 element. (Note that this is the 'no-graph' version of this +//test, so the issue is not related to the face-adjacent-elem-graph.) + EXPECT_EQ(1u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_with_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + bulk->initialize_face_adjacent_element_graph(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + EXPECT_EQ(2u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(GetSides, hex8) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + stk::io::fill_mesh("generated:1x1x1|sideset:xXyYzZ", *bulk); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::HEX_8, bulk->bucket(elem1).topology()); + + EXPECT_EQ(6u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(6u, sides.size()); + ASSERT_EQ(6u, sideOrds.size()); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[1])); +} + +TEST(GetSides, textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); //shell-quad-4 mesh: // 6 @@ -1165,7 +1359,18 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_EdgeSides) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); - stk::io::write_mesh("shellq4_edge_sides.g", *bulk); + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::SHELL_QUAD_4, bulk->bucket(elem1).topology()); + + EXPECT_EQ(2u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(2u, sides.size()); + ASSERT_EQ(2u, sideOrds.size()); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[1])); } TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_FullExteriorSkin) @@ -1607,7 +1812,6 @@ TEST(Skinning, createSidesForShellQuad4Block) // 1*----*----*7 // 4 // - stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); const std::string meshDesc = "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ @@ -1620,8 +1824,42 @@ TEST(Skinning, createSidesForShellQuad4Block) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + auto skinPart = bulk->mesh_meta_data().get_part("surface_1"); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, *skinPart)); + EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, *skinPart)); +} + +TEST(Skinning, createSidesForShellQuad4BlockExposedBoundary) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +//shell-quad-4 mesh: +// 6 +// 3*----*----*9 +// | E2 | E4 | +// | | | +// 2*---5*----*8 +// | E1 | E3 | +// | | | +// 1*----*----*7 +// 4 +// + stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1|sideset:name=surface_1"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + stk::mesh::create_exposed_block_boundary_sides(*bulk, bulk->mesh_meta_data().universal_part(), stk::mesh::PartVector{&skinPart}); EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, skinPart)); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, skinPart)); } TEST(Skinning, createSidesForShellQuad8Block) @@ -1768,3 +2006,132 @@ TEST(CreateAndConvert, read_write_shell_4_all_face_sides) unlink(fileName.c_str()); } + +class CreateReadAndWrite : public stk::unit_test_util::MeshFixture +{ + protected: + std::string get_meshspec_single_shell_quad4_with_all_sides() { + //shell-quad-4 mesh: + // + // 4*---3* + // | E1 | + // | | + // 1*---2* + // + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,2,3,4, block_1\n\ + |sideset:name=surface_1; data=1,1, 1,2, 1,3, 1,4, 1,5, 1,6; split=topology"; + + std::vector coords = {0,0,0, 1,0,0, 1,1,0, 0,1,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + std::string get_meshspec_four_shell_quad4_with_sideset() { + //shell-quad-4 mesh: + // 6 + // 3*----*----*9 + // | E2 | E4 | + // | | | + // 2*---5*----*8 + // | E1 | E3 | + // | | | + // 1*----*----*7 + // 4 + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1\ + |sideset:name=surface_1; data=1,3, 3,3, 3,4, 4,4, 4,5, 2,5, 2,6, 1,6; split=topology"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + void create_1_shell_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_single_shell_quad4_with_all_sides(), bulk); + } + + void create_4_shells_using_stk_text_mesh(stk::mesh::BulkData& bulk) { + stk::unit_test_util::setup_text_mesh(bulk, get_meshspec_four_shell_quad4_with_sideset()); + } + + void create_4_shells_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_four_shell_quad4_with_sideset(), bulk); + } + + void check_mesh_properties(stk::mesh::BulkData& bulk, std::vector val) { + stk::mesh::EntityVector entities; + stk::mesh::get_entities(bulk, stk::topology::ELEM_RANK, entities); + + for (auto entity : entities) { + EXPECT_EQ(val[0], bulk.num_nodes(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[1], bulk.num_edges(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[2], bulk.num_faces(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[3], bulk.num_sides(entity)) << bulk.entity_key(entity); + } + + EXPECT_EQ(val[4], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::EDGE_RANK))); + EXPECT_EQ(val[5], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::FACE_RANK))); + } +}; + +TEST_F(CreateReadAndWrite, DISABLED_stk_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_stk_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_FaceAndEdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_face_and_edge_sides_test.g"); + create_1_shell_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 4, 2, 6, 4, 2}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 4, 2, 6, 4, 2}); + + unlink(fileName.c_str()); +} diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp index cb07bb52eb81..df30626384ae 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp @@ -43,8 +43,11 @@ #include #include -template using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; -template using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; +template +using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; + +template +using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; void extract_warning(std::string & stdoutString, int numExpectedOccurrences, const std::string & warningString); @@ -107,6 +110,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture stk::mesh::Selector & fieldParts, unsigned numStates = 1) { + get_meta().enable_field_sync_debugger(); const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); stk::mesh::put_field_on_mesh(field, fieldParts, &init); @@ -119,6 +123,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture unsigned numComponents, stk::mesh::Selector & fieldParts) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const std::vector init(numComponents, 1); stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -200,13 +205,13 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture template void initialize_ngp_field(stk::mesh::Field & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template void initialize_ngp_field(stk::mesh::FieldBase & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template @@ -393,7 +398,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -412,7 +417,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void write_vector_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -434,7 +439,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void device_field_set_all(stk::mesh::Field & stkField, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.set_all(ngpMesh, value); } @@ -490,7 +495,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), selector); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -568,7 +573,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_entity_field_data(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -592,7 +597,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_mesh_index(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp index e96770e3cbe3..712c0b47d652 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -96,18 +99,82 @@ class NgpMeshTest : public stk::mesh::fixtures::TestHexFixture numNodesVec.copy_device_to_host(); ASSERT_EQ(8u, numNodesVec[0]); } + + void run_edge_check(unsigned numExpectedEdgesPerElem) + { + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); + stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, get_meta().universal_part(), + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entityIndex) { + stk::mesh::ConnectedEntities edges = ngpMesh.get_edges(stk::topology::ELEM_RANK, entityIndex); + NGP_EXPECT_EQ(numExpectedEdgesPerElem, edges.size()); + } + ); + } + + void delete_edge_on_each_element() + { + get_bulk().modification_begin(); + + stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); + stk::mesh::ConnectedEntities edges = get_bulk().get_connected_entities(elem1, stk::topology::EDGE_RANK); + stk::mesh::ConnectedEntities edgeElems = get_bulk().get_connected_entities(edges[0], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem1, edgeElems[0]); + + const stk::mesh::ConnectivityOrdinal* edgeElemOrds = get_bulk().begin_ordinals(edges[0], stk::topology::ELEM_RANK); + stk::mesh::Entity edge = edges[0]; + EXPECT_TRUE(get_bulk().destroy_relation(elem1, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); + edges = get_bulk().get_connected_entities(elem2, stk::topology::EDGE_RANK); + EXPECT_EQ(12u, edges.size()); + edgeElems = get_bulk().get_connected_entities(edges[5], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem2, edgeElems[0]); + edgeElemOrds = get_bulk().begin_ordinals(edges[5], stk::topology::ELEM_RANK); + edge = edges[5]; + EXPECT_TRUE(get_bulk().destroy_relation(elem2, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + get_bulk().modification_end(); + } }; -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) { run_get_nodes_using_FastMeshIndex_test(); } -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) { run_get_nodes_using_FastMeshIndex_test(); } +NGP_TEST_F(NgpMeshTest, hexes_with_edges_update_connectivity) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + setup_mesh(1,1,2); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + stk::mesh::Part& edgePart = get_meta().declare_part("edges", stk::topology::EDGE_RANK); + + stk::mesh::create_edges(get_bulk(), get_meta().universal_part(), &edgePart); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + EXPECT_EQ(20u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + unsigned numExpectedEdgesPerElement = 12; + run_edge_check(numExpectedEdgesPerElement); + + delete_edge_on_each_element(); + EXPECT_EQ(18u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + numExpectedEdgesPerElement = 11; + run_edge_check(numExpectedEdgesPerElement); +} + class NgpMeshRankLimit : public stk::mesh::fixtures::TestHexFixture {}; TEST_F(NgpMeshRankLimit, tooManyRanksThrowWithMessage) @@ -247,6 +314,60 @@ NGP_TEST_F(NgpMeshTest, volatileFastSharedCommMap_custom_NgpMemSpace) } } +void test_ngp_permutations_1side_2perms(const stk::mesh::BulkData& mesh, + const stk::mesh::Part& sidePart) +{ + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(mesh); + + stk::mesh::EntityRank sideRank = mesh.mesh_meta_data().side_rank(); + stk::mesh::EntityVector sides; + stk::mesh::get_entities(mesh, sideRank, sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh.num_connectivity(sides[0], stk::topology::ELEM_RANK)); + const stk::mesh::Permutation* hostPerms = mesh.begin_permutations(sides[0], stk::topology::ELEM_RANK); + stk::mesh::Permutation expectedPerm1 = hostPerms[0]; + stk::mesh::Permutation expectedPerm2 = hostPerms[1]; + + stk::mesh::for_each_entity_run(ngpMesh, sideRank, sidePart, + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& sideIndex) { + stk::mesh::NgpMesh::Permutations perms = ngpMesh.get_permutations(sideRank, sideIndex, stk::topology::ELEM_RANK); + NGP_EXPECT_EQ(2u, perms.size()); + NGP_EXPECT_EQ(expectedPerm1, perms[0]); + NGP_EXPECT_EQ(expectedPerm2, perms[1]); + }); +} + +NGP_TEST(TestNgpMesh, permutations) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + std::string meshDesc = + "0,1,TRI_3_2D,1,2,3,block_1\n" + "0,2,TRI_3_2D,2,4,3,block_2\n" + "|dimension:2|sideset:name=surface_1; data=1,2"; + + std::shared_ptr mesh = stk::mesh::MeshBuilder(MPI_COMM_WORLD) + .set_spatial_dimension(2).create(); + stk::unit_test_util::setup_text_mesh(*mesh, meshDesc); + + stk::mesh::EntityRank sideRank = mesh->mesh_meta_data().side_rank(); + stk::mesh::Part* sidePart = mesh->mesh_meta_data().get_part("surface_1"); + STK_ThrowAssertMsg(sidePart != nullptr, "failed to find part for surface_1"); + + stk::mesh::EntityVector sides; + stk::mesh::get_entities(*mesh, sideRank, *sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh->num_connectivity(sides[0], stk::topology::ELEM_RANK)); + + stk::mesh::Permutation expectedPerm1 = static_cast(0); + stk::mesh::Permutation expectedPerm2 = static_cast(1); + const stk::mesh::Permutation* permutations = mesh->begin_permutations(sides[0], stk::topology::ELEM_RANK); + EXPECT_EQ(expectedPerm1, permutations[0]); + EXPECT_EQ(expectedPerm2, permutations[1]); + + test_ngp_permutations_1side_2perms(*mesh, *sidePart); +} + namespace { double reduce_on_host(stk::mesh::BulkData& bulk) { @@ -279,6 +400,27 @@ TEST(NgpHostMesh, FieldForEachEntityReduceOnHost_fromTylerVoskuilen) EXPECT_EQ(1.0, maxZ); } +TEST(NgpDeviceMesh, dont_let_stacksize_get_out_of_control) +{ + constexpr size_t tol = 50; + +#ifdef SIERRA_MIGRATION + constexpr size_t expectedBulkDataSize = 1320; +#else + constexpr size_t expectedBulkDataSize = 1256; +#endif + EXPECT_NEAR(expectedBulkDataSize, sizeof(stk::mesh::BulkData), tol); + + constexpr size_t expectedBucketSize = 1120; + EXPECT_NEAR(expectedBucketSize, sizeof(stk::mesh::Bucket), tol); + + constexpr size_t expectedDeviceMeshSize = 472; + EXPECT_NEAR(expectedDeviceMeshSize, sizeof(stk::mesh::DeviceMesh), tol); + + constexpr size_t expectedDeviceBucketSize = 264; + EXPECT_NEAR(expectedDeviceBucketSize, sizeof(stk::mesh::DeviceBucket), tol); +} + void add_elements(std::unique_ptr& bulk) { stk::mesh::MetaData& meta = bulk->mesh_meta_data(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp index 4c869fb317de..89ef24dfc2ca 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp @@ -16,7 +16,7 @@ namespace { -using IntDualViewType = Kokkos::DualView; +using UnsignedDualViewType = Kokkos::DualView; void test_view_of_fields(const stk::mesh::BulkData& bulk, stk::mesh::Field& field1, @@ -39,19 +39,19 @@ void test_view_of_fields(const stk::mesh::BulkData& bulk, Kokkos::deep_copy(fields, hostFields); unsigned numResults = 2; - IntDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); + UnsignedDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 2), KOKKOS_LAMBDA(const unsigned& i) { - result.d_view(i) = fields(i).get_ordinal() == i ? 1 : 0; + result.d_view(i) = fields(i).get_ordinal(); }); - result.modify(); - result.sync(); + result.modify(); + result.sync(); - EXPECT_EQ(1, result.h_view(0)); - EXPECT_EQ(1, result.h_view(1)); + EXPECT_EQ(hostFields(0).get_ordinal(), result.h_view(0)); + EXPECT_EQ(hostFields(1).get_ordinal(), result.h_view(1)); #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) for (unsigned i = 0; i < 2; ++i) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp index 78a62cadccb6..2ac1ae71994c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp @@ -172,8 +172,8 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture } } - template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> - void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) + template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> + void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) { const int component = 0; stk::mesh::HostMesh hostMesh(get_bulk()); @@ -205,7 +205,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -226,7 +226,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, meta.locally_owned_part(), KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entity) { @@ -269,7 +269,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -334,6 +334,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -344,6 +345,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_multistate_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 2; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -354,6 +356,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_vector_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; unsigned numScalarsPerEntity = 3; const T init[] = {1, 2, 3}; @@ -375,6 +378,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -395,6 +399,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -416,6 +421,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_vector_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -2237,7 +2243,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingEnti { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2251,7 +2257,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingBuck { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2267,6 +2273,7 @@ class NgpDebugFieldSync_SeparateFieldRestrictions : public NgpDebugFieldSyncFixt void setup_mesh_and_field_with_multiple_restrictions(const std::string& fieldName) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Part& part1 = get_meta().declare_part_with_topology("Part1", stk::topology::HEX_8); stk::mesh::Part& part2 = get_meta().declare_part_with_topology("Part2", stk::topology::HEX_8); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp index 815c3f86c40b..e8cf1e4bb8d8 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp @@ -102,7 +102,7 @@ class NgpDebugFieldSync_AccessDuringMeshModification : public NgpDebugFieldSyncF { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1372,7 +1372,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1392,7 +1392,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1413,7 +1413,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -1433,7 +1433,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_M declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1459,7 +1459,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1486,7 +1486,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2111,7 +2111,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Change declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2133,7 +2133,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Create declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -2155,7 +2155,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Delete declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2446,7 +2446,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2469,7 +2469,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -2493,7 +2493,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp index a4064c4fa6fc..d7784e7b2e12 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp @@ -389,7 +389,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ChangeBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -411,7 +411,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -434,7 +434,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -455,7 +455,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -482,7 +482,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -510,7 +510,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -536,7 +536,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -563,7 +563,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -591,7 +591,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1138,7 +1138,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_ChangeBucket_Chang declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1162,7 +1162,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_CreateBucket_Creat declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -1186,7 +1186,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_DeleteBucket_Delet declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1788,7 +1788,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_ChangeBucket_ChangeBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1813,7 +1813,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_CreateBucket_CreateBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -1839,7 +1839,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_DeleteBucket_DeleteBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp index 8c7e854e1c9e..455fe72e0f43 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp @@ -45,7 +45,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::Selector fieldSelector(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, fieldSelector, @@ -64,7 +64,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1324,6 +1324,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, EmptyField_MeshModification_Properly if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); create_parts({"Part1", "Part2", "Part3"}); + get_meta().enable_field_sync_debugger(); get_meta().declare_field(stk::topology::ELEM_RANK, "doubleScalarField", 1); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); @@ -1560,7 +1561,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1582,7 +1583,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); @@ -1603,7 +1604,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1628,7 +1629,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 2}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); batch_modify_element_part_membership({{3, "Part3", "Part2"}}); @@ -1654,7 +1655,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp index 7e70384d89f9..a9edbc02b662 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp @@ -441,4 +441,33 @@ TEST_F(NgpBatchChangeEntityParts, failedHostAccessAfterDeviceMeshMod) } } +TEST_F(NgpBatchChangeEntityParts, impl_addPartToNode_ngpDevice) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) GTEST_SKIP(); + + build_empty_mesh(1, 1); + + stk::mesh::Part & part1 = m_meta->declare_part_with_topology("part1", stk::topology::NODE); + stk::mesh::Part & part2 = m_meta->declare_part_with_topology("part2", stk::topology::NODE); + const unsigned nodeId = 1; + const stk::mesh::Entity node1 = create_node(*m_bulk, nodeId, {&part1}); + check_bucket_layout(*m_bulk, {{{"part1"}, {nodeId}}}, stk::topology::NODE_RANK); + + DeviceEntitiesType entities("deviceEntities", 1); + DevicePartOrdinalsType addPartOrdinals("deviceAddParts", 1); + DevicePartOrdinalsType removePartOrdinals("deviceRemoveParts", 0); + + stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(*m_bulk); + fill_device_views_add_remove_part_from_node(entities, addPartOrdinals, removePartOrdinals, ngpMesh, + node1, &part2, nullptr); + + ngpMesh.impl_batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); +// confirm_host_mesh_is_not_synchronized_from_device(ngpMesh); +// +// ngpMesh.sync_to_host(); +// confirm_host_mesh_is_synchronized_from_device(ngpMesh); +// +// check_bucket_layout(*m_bulk, {{{"part1", "part2"}, {nodeId}}}, stk::topology::NODE_RANK); +} + } // namespace diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp index 6031e1b1ac84..d0b61df0ec13 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp @@ -108,7 +108,7 @@ class TestTranspose : public ::testing::Test void setup_views(unsigned numBuckets, double overallocationFactor) { - deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); + deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); goldHostFieldData = stk::mesh::FieldDataHostViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "goldHostFieldData"), numBuckets, ORDER_INDICES(bucketCapacity,numPerEntity)); fill_gold_host_field_data(numBuckets); @@ -171,7 +171,7 @@ class TestTranspose : public ::testing::Test stk::mesh::FieldDataPointerHostViewType hostBucketPtrData; stk::mesh::FieldDataPointerDeviceViewType deviceBucketPtrData; - stk::mesh::FieldDataDeviceViewType deviceFieldData; + stk::mesh::FieldDataDeviceViewType deviceFieldData; stk::mesh::FieldDataHostViewType goldHostFieldData; stk::mesh::UnsignedViewType deviceBucketSizes; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp index 8cd3fb2dafad..40fa498d66fb 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp @@ -1768,6 +1768,7 @@ TEST_F(NgpFieldFixture, LateFieldUsage) get_meta().enable_late_fields(); stk::mesh::Field & stkLateIntField = create_field(stk::topology::ELEM_RANK, "lateIntField"); + initialize_ngp_field(stkIntField); // Must update early fields after adding late field initialize_ngp_field(stkLateIntField); int multiplier = 2; @@ -2039,17 +2040,6 @@ TEST(DeviceField, checkSizeof) EXPECT_TRUE(sizeof(stk::mesh::DeviceField) <= expectedNumBytes); } -TEST(DeviceBucket, checkSizeof) -{ -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after 2024/06/26 - size_t expectedNumBytes = 176; -#else - size_t expectedNumBytes = 152; // Value after removing DeviceBucket::m_hostEntities -#endif - std::cout << "sizeof(stk::mesh::DeviceBucket): " << sizeof(stk::mesh::DeviceBucket) << std::endl; - EXPECT_TRUE(sizeof(stk::mesh::DeviceBucket) <= expectedNumBytes); -} - enum PartIds : int { part_1 = 1, @@ -2632,4 +2622,76 @@ TEST_F(NgpFieldUpdate, MoveBackwardForwardBackward) check_field_values(); } +class NgpFieldExecSpaceTestFixture : public stk::unit_test_util::MeshFixture +{ +public: + void setup_empty_mesh_and_field() + { + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + + const std::vector init(1, 1); + stk::mesh::Field& field = get_meta().declare_field(stk::topology::ELEM_RANK, "", 1); + stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), 1, init.data()); + } + + auto get_default_field() + { + return get_meta().get_field(stk::topology::ELEM_RANK, ""); + } +}; + +TEST_F(NgpFieldExecSpaceTestFixture, CheckValidMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + +TEST_F(NgpFieldExecSpaceTestFixture, CheckSameMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + auto& ngpField1 = stk::mesh::get_updated_ngp_field(*field); + auto& ngpField2 = stk::mesh::get_updated_ngp_field(*field); + + EXPECT_TRUE((std::is_same_v::MemSpace, stk::mesh::NgpMeshDefaultMemSpace>)); + EXPECT_TRUE((std::is_same_v::MemSpace, std::remove_reference_t::MemSpace>)); +} + +TEST_F(NgpFieldExecSpaceTestFixture, UseNonDefaultMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + } // namespace ngp_field_test diff --git a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp index ec39418446e8..0089d3514845 100644 --- a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp +++ b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp @@ -74,7 +74,8 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi { const stk::mesh::FieldBase& coordField = *(bulkDataPtr->mesh_meta_data_ptr()->coordinate_field()); - stk::mesh::Selector selector(field); + auto meshMetaDataPtr = bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(field & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); for (stk::mesh::Bucket* bucket : buckets) @@ -100,9 +101,9 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi TEST(StkFieldCopier, MiddleMeshToStk) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; - stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); + stk_interface::StkMeshCreator creator1(meshFileName1, "RCB", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); mesh::FieldPtr meshField = mesh::create_field(meshPart.mesh, mesh::FieldShape(2, 0, 0), 3); @@ -121,7 +122,7 @@ TEST(StkFieldCopier, MiddleMeshToStk) TEST(StkFieldCopier, StkToMiddleMesh) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); @@ -139,4 +140,4 @@ TEST(StkFieldCopier, StkToMiddleMesh) copier.copy(meshField, *stkField); check_field(creator1.get_bulk_data_ptr(), *stkField); -} \ No newline at end of file +} diff --git a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp index 0f975a2adcb6..811691769c87 100644 --- a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp +++ b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp @@ -70,6 +70,7 @@ struct SimpleStruct { }; struct BaseStruct { + virtual ~BaseStruct() = default; virtual void set_i(const int) = 0; KOKKOS_FUNCTION virtual void print() { diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp index 25ced2655de8..8ac8f984a259 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp @@ -401,4 +401,44 @@ TEST(CoarseSearchCorrectness, Ngp_Local_NotQuiteEdgeOverlappingBoxes_ARBORX) device_local_runTwoBoxTest(stk::search::ARBORX, distanceBetweenBoxCenters, boxSize, expectedNumOverlap); } +TEST(CoarseSearchCorrectness, UpdateInteriorNodeBVsAtomicsIssueReproducer) +{ + std::vector> boxes(256); + + double coord_min = -2.1; + double coord_max = 2.1; + + int x_points = 5; + int y_points = 5; + int z_points = 9; + + for (int i=0; i < z_points; i++) { + double z_coord_min = coord_min + 1*i; + double z_coord_max = coord_max + 1*i; + + for (int j=0; j < y_points; j++) { + double y_coord_min = coord_min + 1*j; + double y_coord_max = coord_max + 1*j; + + for (int k=0; k < x_points; k++) { + double x_coord_min = coord_min + 1*k; + double x_coord_max = coord_max + 1*k; + + int index = k + x_points*j + x_points*y_points*i; + boxes[index] = stk::search::Box(x_coord_min, y_coord_min, z_coord_min, + x_coord_max, y_coord_max, z_coord_max); + } + } + } + + using ExecSpace = Kokkos::DefaultExecutionSpace; + stk::search::CollisionList collisions("collision_list"); + stk::search::morton_lbvh_search::value_type, ExecSpace, stk::search::Box>(boxes, boxes, collisions); + collisions.sync_from_device(); + + int numExpectedCollisions = 38125; + EXPECT_EQ(collisions.get_num_collisions(), numExpectedCollisions); + +} + } diff --git a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp index 5266109ae525..04074a484175 100644 --- a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp +++ b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp @@ -130,7 +130,10 @@ inline void check_side_node_ordinals(stk::topology topology, const std::vector 0) ? sideTopo.num_nodes() : 1; std::vector side_node_ordinals(numSideNodes); topology.side_node_ordinals(side, side_node_ordinals.data()); - EXPECT_EQ(gold_side_node_ordinals[side], side_node_ordinals); + + for (unsigned i = 0; i < numSideNodes; ++i) { + EXPECT_EQ(gold_side_node_ordinals[side][i], side_node_ordinals[i]); + } } } diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp index 8ed599943345..10aa5aa01785 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp @@ -71,7 +71,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad4() { }; } -TEST(stk_topology, shell_shell_quad4) +TEST(stk_topology, shell_quad_4) { stk::topology t = stk::topology::SHELL_QUAD_4; @@ -104,10 +104,10 @@ TEST(stk_topology, shell_shell_quad4) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad4()); @@ -159,10 +159,10 @@ void check_shell_quad_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -210,7 +210,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad8() { }; } -TEST(stk_topology, shell_shell_quad8) +TEST(stk_topology, shell_quad_8) { stk::topology t = stk::topology::SHELL_QUAD_8; @@ -243,10 +243,10 @@ TEST(stk_topology, shell_shell_quad8) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad8()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad8()); @@ -298,10 +298,10 @@ void check_shell_quad_8_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -349,7 +349,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad9() { }; } -TEST(stk_topology, shell_shell_quad9) +TEST(stk_topology, shell_quad_9) { stk::topology t = stk::topology::SHELL_QUAD_9; @@ -382,10 +382,10 @@ TEST(stk_topology, shell_shell_quad9) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad9()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad9()); @@ -437,10 +437,10 @@ void check_shell_quad_9_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp index 5bf526b6ec65..ff1cad2ab650 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp @@ -76,6 +76,7 @@ TEST(stk_topology, shell_tri_3) EXPECT_TRUE(t.is_valid()); EXPECT_TRUE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); //FIXME this will become false EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -101,9 +102,9 @@ TEST(stk_topology, shell_tri_3) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri3()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri3()); @@ -155,9 +156,9 @@ void check_shell_tri_3_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -235,9 +236,9 @@ TEST(stk_topology, shell_tri_4) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri4()); @@ -289,9 +290,9 @@ void check_shell_tri_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -369,9 +370,9 @@ TEST(stk_topology, shell_tri_6) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri6()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri6()); @@ -427,9 +428,9 @@ void check_shell_tri_6_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp index eb357fe92c4a..dad3e3e13ae4 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp @@ -35,7 +35,7 @@ #include "Kokkos_Core.hpp" // for parallel_for, KOKKOS_LAMBDA #include "gtest/gtest.h" // for AssertionResult, Message, TestPartResult #include "stk_ngp_test/ngp_test.hpp" // for NGP_EXPECT_EQ, NGP_EXPECT_FALSE, NGP_EXPECT_... -#include "stk_topology/topology.hpp" // for topology, topology::QUAD_4, topology::QUAD_8 +#include "stk_topology/topology.hpp" #include "topology_test_utils.hpp" // for check_edge_node_ordinals, check_edge_node_or... #include // for size_t #include // for operator<<, basic_ostream, basic_ostream<>::... @@ -78,6 +78,7 @@ TEST(stk_topology, shell_tri_3_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -135,6 +136,7 @@ void check_shell_tri_3_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -221,6 +223,7 @@ TEST(stk_topology, shell_tri_4_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -278,6 +281,7 @@ void check_shell_tri_4_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -364,6 +368,7 @@ TEST(stk_topology, shell_tri_6_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -421,6 +426,7 @@ void check_shell_tri_6_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp new file mode 100644 index 000000000000..d28a0568e7d8 --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp @@ -0,0 +1,255 @@ +#include "gtest/gtest.h" +#include "stk_util/diag/ParallelTimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/parallel/Parallel.hpp" + +namespace { +stk::diag::impl::ParallelTimer create_timer(const std::string& name, double val) +{ + stk::diag::impl::ParallelTimer timer; + timer.m_name = name; + timer.m_cpuTime.m_value = val; + timer.m_cpuTime.m_sum = val; + timer.m_cpuTime.m_min = val; + timer.m_cpuTime.m_max = val; + + return timer; +} +} + +namespace stk::diag { + +class TimerTester +{ + public: + TimerTester(Timer& timer) : + m_timer(timer) + {} + + double getCPUTime() const + { + return m_timer.getMetric().m_accumulatedLap; + } + + void setCPUTime(double val) + { + m_timer.m_timerImpl->m_cpuTime.m_accumulatedLap = val; + } + + private: + Timer& m_timer; +}; +} + +TEST(ParallelTimer, MergeSingleLevelTimers) +{ + double val1 = 1.0, val2 = 2.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + + stk::diag::impl::merge_parallel_timer(t1, t2, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val2); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val2); +} + +TEST(ParallelTimer, MergeTwoLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer2", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val4); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeTwoLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + + stk::diag::impl::ParallelTimer t4Copy = *(++t1.m_subtimerList.begin()); + EXPECT_EQ(t4Copy.m_cpuTime.m_value, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_sum, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_min, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeThreeLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer1", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer2", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer3", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val5); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val5); + EXPECT_EQ(t2Merged.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Merged = t2Merged.m_subtimerList.front(); + EXPECT_EQ(t3Merged.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_sum, val3 + val6); + EXPECT_EQ(t3Merged.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_max, val6); + EXPECT_EQ(t3Merged.m_subtimerList.size(), 0U); +} + +TEST(ParallelTimer, MergeThreeLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer5", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer6", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + EXPECT_EQ(t2Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t5Copy = t1.m_subtimerList.back(); + EXPECT_EQ(t5Copy.m_cpuTime.m_value, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_sum, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_min, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_max, val5); + EXPECT_EQ(t5Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Copy = t2Copy.m_subtimerList.front(); + EXPECT_EQ(t3Copy.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_sum, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_max, val3); + EXPECT_EQ(t3Copy.m_subtimerList.size(), 0U); + + stk::diag::impl::ParallelTimer t6Copy = t5Copy.m_subtimerList.front(); + EXPECT_EQ(t6Copy.m_cpuTime.m_value, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_sum, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_min, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_max, val6); + EXPECT_EQ(t6Copy.m_subtimerList.size(), 0U); +} + + +TEST(ParallelTimer, CollectTimersChunkSize1) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 1; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 0.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize - 1); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (commSize - 1) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} + +TEST(ParallelTimer, CollectTimersChunkSize2) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank + 1; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 2; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 1.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (1 + commSize) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} \ No newline at end of file diff --git a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp index 64098c3ae44f..bdf5390c59e2 100644 --- a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp +++ b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp @@ -245,10 +245,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - recvLists[src].resize(this->get_size(src, myrank)); + rcvLists[src].resize(this->get_size(src, myrank)); } } @@ -256,10 +256,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester virtual int get_num_recvs() override { return commSize; } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -275,9 +275,9 @@ class DenseParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector< std::vector >& sendLists) + void test_send_ranks(std::vector< std::vector >& sndLists) { - test_ranks_inner(sendLists); + test_ranks_inner(sndLists); } @@ -348,22 +348,22 @@ class NeighborParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { int src1 = (myrank - 1 + commSize) % commSize; int src2 = (myrank - 2 + commSize) % commSize; - recvLists[src1].resize(this->get_size(src1, myrank)); - recvLists[src2].resize(this->get_size(src2, myrank)); + rcvLists[src1].resize(this->get_size(src1, myrank)); + rcvLists[src2].resize(this->get_size(src2, myrank)); } virtual int get_num_sends() override { return std::min(2, commSize); } virtual int get_num_recvs() override { return std::min(2, commSize); } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -382,10 +382,10 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector>& sendLists) + void test_send_ranks(std::vector>& sndLists) { - std::vector sendRanks = get_ranks(sendLists); + std::vector sendRanks = get_ranks(sndLists); int len = sendRanks.size(); int dest1 = (myrank + 1) % commSize; @@ -404,9 +404,9 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_recv_ranks(std::vector>& recvLists) + void test_recv_ranks(std::vector>& rcvLists) { - auto recvRanks = get_ranks(recvLists); + auto recvRanks = get_ranks(rcvLists); int len = recvRanks.size(); int dest1 = (myrank - 1 + commSize) % commSize; diff --git a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp index eb5d63d7f9f5..3fdf583b1926 100644 --- a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp +++ b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp @@ -32,8 +32,9 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -#include "gtest/gtest.h" #include "stk_util/util/StridedArray.hpp" +#include "Kokkos_Core.hpp" +#include "gtest/gtest.h" #include TEST( StridedArray, ptr_and_size) diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 4f4efaf1655d..6d7b47a1ec5a 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5210601 +#define STK_VERSION 5230200 namespace stk diff --git a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp index a3c4a8c17459..a5316a474ee6 100644 --- a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp +++ b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp @@ -52,15 +52,14 @@ struct CommandLineOption class CommandLineParser { public: - enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; - CommandLineParser() : CommandLineParser("Options") {} - explicit CommandLineParser(const std::string &usagePreamble) - : optionsSpec(usagePreamble), - parsedOptions(), - positionalIndex(0) - { - add_flag("help,h", "display this help message and exit"); - add_flag("version,v", "display version information and exit"); + virtual ~CommandLineParser() = default; + enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; + CommandLineParser() : CommandLineParser("Options") {} + explicit CommandLineParser(const std::string &usagePreamble) + : optionsSpec(usagePreamble), parsedOptions(), positionalIndex(0) + { + add_flag("help,h", "display this help message and exit"); + add_flag("version,v", "display version information and exit"); } void disallow_unrecognized() diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp new file mode 100644 index 000000000000..e8dca41f2527 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp @@ -0,0 +1,254 @@ +#include "ParallelTimerImpl.hpp" +#include "stk_util/util/Marshal.hpp" + +namespace stk::diag::impl { + +ParallelTimer::ParallelTimer() + : m_name(), + m_timerMask(0), + m_subtimerLapCount(0), + m_lapCount(), + m_cpuTime(), + m_wallTime(), + m_MPICount(), + m_MPIByteCount(), + m_heapAlloc(), + m_subtimerList() +{} + +ParallelTimer::ParallelTimer(const ParallelTimer ¶llel_timer) + : m_name(parallel_timer.m_name), + m_timerMask(parallel_timer.m_timerMask), + m_subtimerLapCount(parallel_timer.m_subtimerLapCount), + m_lapCount(parallel_timer.m_lapCount), + m_cpuTime(parallel_timer.m_cpuTime), + m_wallTime(parallel_timer.m_wallTime), + m_MPICount(parallel_timer.m_MPICount), + m_MPIByteCount(parallel_timer.m_MPIByteCount), + m_heapAlloc(parallel_timer.m_heapAlloc), + m_subtimerList(parallel_timer.m_subtimerList) +{} + +ParallelTimer &ParallelTimer::operator=(const ParallelTimer ¶llel_timer) { + m_name = parallel_timer.m_name; + m_timerMask = parallel_timer.m_timerMask; + m_subtimerLapCount = parallel_timer.m_subtimerLapCount; + m_lapCount = parallel_timer.m_lapCount; + m_cpuTime = parallel_timer.m_cpuTime; + m_wallTime = parallel_timer.m_wallTime; + m_MPICount = parallel_timer.m_MPICount; + m_heapAlloc = parallel_timer.m_heapAlloc; + m_subtimerList = parallel_timer.m_subtimerList; + + return *this; +} + + +Writer & +ParallelTimer::dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "ParallelTimer " << m_name << push << dendl; + dout << "m_name " << m_name << dendl; + dout << "m_timerMask " << hex << m_timerMask << dendl; + dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; + dout << "m_lapCount " << m_lapCount << dendl; + dout << "m_cpuTime " << m_cpuTime << dendl; + dout << "m_wallTime " << m_wallTime << dendl; + dout << "m_MPICount " << m_MPICount << dendl; + dout << "m_MPIByteCount " << m_MPIByteCount << dendl; + dout << "m_heapAlloc " << m_heapAlloc << dendl; + dout << "m_subtimerList " << m_subtimerList << dendl; + dout << pop; + } + return dout; +} + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint) +{ + p0.m_timerMask = p1.m_timerMask; + p0.m_subtimerLapCount += p1.m_subtimerLapCount; + p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); + p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); + p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); + p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); + p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); + p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); + + + for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { + std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); + if (p0_it == p0.m_subtimerList.end()) { + p0.m_subtimerList.push_back((*p1_it)); + } + else + merge_parallel_timer(*p0_it, *p1_it, checkpoint); + } +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t) { + min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount + >> t.m_lapCount.m_value + >> t.m_lapCount.m_checkpoint + >> t.m_cpuTime.m_value + >> t.m_cpuTime.m_checkpoint + >> t.m_wallTime.m_value + >> t.m_wallTime.m_checkpoint + >> t.m_MPICount.m_value + >> t.m_MPICount.m_checkpoint + >> t.m_MPIByteCount.m_value + >> t.m_MPIByteCount.m_checkpoint + >> t.m_heapAlloc.m_value + >> t.m_heapAlloc.m_checkpoint; + + min >> t.m_subtimerList; + + return min; +} + + +#ifdef STK_HAS_MPI +size_t round_up_to_next_word(size_t value) +{ + const size_t SIZE_OF_WORD = 4; + size_t remainder = value % SIZE_OF_WORD; + if (remainder == 0) { + return value; + } + return value + SIZE_OF_WORD - remainder; +} +#endif + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather) +{ + Marshal mout; + mout << root_timer; + impl::ParallelTimer root_parallel_timer; + +#ifdef STK_HAS_MPI + const int parallel_root = 0 ; + const int parallel_size = parallel_machine_size(comm); + const int parallel_rank = parallel_machine_rank(comm); + + // Gather the send counts on root processor + std::string send_string(mout.str()); + int send_count = send_string.size(); + send_string.resize(round_up_to_next_word(send_count)); + int padded_send_count = send_string.size(); + + + //We need to gather the timer data in a number of 'cycles' where we + //only receive from a portion of the other processors each cycle. + //This is because buffer allocation-failures have been observed for + //runs on very large numbers of processors if the 'root' processor tries + //to allocate a buffer large enough to hold timing data from all other + //procesors. + //We will set an arbitrary limit for now, making sure that no more than + //a given number of processors' worth of timer data is gathered at a time. + int num_cycles = parallel_size/max_procs_per_gather; + if (parallel_size < max_procs_per_gather || num_cycles < 1) { + num_cycles = 1; + } + + std::vector recv_buffer; + + for(int ii=0; ii recv_count(parallel_size, 0); + std::vector padded_recv_count(parallel_size, 0); + + { + int result = MPI_Gather(&send_count_this_cycle, 1, MPI_INT, + recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + { + int result = MPI_Gather(&padded_send_count_this_cycle, 1, MPI_INT, + padded_recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + // Receive counts are only non-zero on the root processor: + std::vector recv_displ(parallel_size + 1, 0); + std::vector recv_end(parallel_size + 1, 0); + + for (int i = 0 ; i < parallel_size ; ++i) { + recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; + recv_end[i] = recv_displ[i] + recv_count[i] ; + } + + const int recv_size = recv_displ[parallel_size] ; + + recv_buffer.assign(recv_size, 0); + + { + int result = MPI_Gatherv(send_string.data(), padded_send_count_this_cycle, MPI_CHAR, + recv_buffer.data(), padded_recv_count.data(), recv_displ.data(), MPI_CHAR, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; + throw std::runtime_error(message.str()); + } + + std::vector parallel_timer_vector; + parallel_timer_vector.reserve(parallel_size); + + if (parallel_rank == parallel_root) { + for (int j = 0; j < parallel_size; ++j) { + int received_count = recv_displ[j+1] - recv_displ[j]; + if (received_count > 0) { + //grow parallel_timer_vector by 1: + parallel_timer_vector.resize(parallel_timer_vector.size()+1); + Marshal min(std::string(recv_buffer.data() + recv_displ[j], recv_buffer.data() + recv_end[j])); + //put this data into the last entry of parallel_timer_vector: + min >> parallel_timer_vector[parallel_timer_vector.size()-1]; + } + } + + if (parallel_rank==parallel_root && send_count_this_cycle>0) + { + root_parallel_timer = parallel_timer_vector[0]; + } + + for (size_t j = 0; j < parallel_timer_vector.size(); ++j) + { + merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); + } + } + } + } +#else + Marshal min(mout.str()); + min >> root_parallel_timer; + merge_parallel_timer(root_parallel_timer, root_parallel_timer, checkpoint); +#endif + + return root_parallel_timer; +} + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp new file mode 100644 index 000000000000..c18de9b4774b --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp @@ -0,0 +1,210 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_ParallelTimerImpl_hpp +#define STK_UTIL_DIAG_ParallelTimerImpl_hpp + +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" +#include "WriterExt.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "TimerMetricTraits.hpp" +#include +#include +#include + +namespace stk { struct Marshal; } + +namespace stk::diag { +namespace impl { + +struct ParallelTimer +{ + template + struct Metric + { + Metric() + : m_value(0), + m_sum(0.0), + m_min(std::numeric_limits::max()), + m_max(0.0) + {} + + typename MetricTraits::Type m_value; ///< Metric value + typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value + double m_sum; ///< Reduction sum + double m_min; ///< Reduction min + double m_max; ///< Reduction max + + void accumulate(const Metric &metric, bool checkpoint) { + double value = static_cast(metric.m_value); + if (checkpoint) + value -= static_cast(metric.m_checkpoint); + + m_sum += value; + m_min = std::min(m_min, value); + m_max = std::max(m_max, value); + } + + Writer &dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; + dout << "m_value " << m_value << dendl; + dout << "m_checkpoint " << m_value << dendl; + dout << "m_sum " << m_sum << dendl; + dout << "m_min " << m_min << dendl; + dout << "m_max " << m_max << dendl; + dout << pop; + } + return dout; + } + }; + + ParallelTimer(); + + ParallelTimer(const ParallelTimer ¶llel_timer); + + ParallelTimer &operator=(const ParallelTimer ¶llel_timer); + + template + const Metric &getMetric() const; + + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; + double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + + Metric m_lapCount; ///< Number of laps accumulated + Metric m_cpuTime; ///< CPU time + Metric m_wallTime; ///< Wall time + Metric m_MPICount; ///< MPI call count + Metric m_MPIByteCount; ///< MPI byte count + Metric m_heapAlloc; ///< MPI byte count + + std::list m_subtimerList; ///< Sub timers + + Writer &dump(Writer &dout) const; +}; + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_lapCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_wallTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPICount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_heapAlloc; +} + + +template +Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { + return t.dump(dout); +} + +inline Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { + return parallel_timer.dump(dout); +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t); + +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif +class finder +{ +public: + finder(const std::string &name) + : m_name(name) + {} + + bool operator()(const ParallelTimer ¶llel_timer) const { + return equal_case(parallel_timer.m_name, m_name); + } + +private: + std::string m_name; +}; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint); + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather = 64); + +} +} + +#endif diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp index 209a947bf996..48b2ec3a988a 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,19 +30,19 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/PrintTimer.hpp" #include "stk_util/diag/PrintTable.hpp" // for operator<<, PrintTable, end_col, PrintT... #include "stk_util/diag/Timer.hpp" // for getEnabledTimerMetricsMask, Timer, Time... #include "stk_util/diag/WriterExt.hpp" // for operator<< +#include "stk_util/diag/ParallelTimerImpl.hpp" #include "stk_util/environment/WallTime.hpp" // for wall_time #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, MPI_Gather, para... #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Marshal.hpp" // for operator>>, Marshal, operator<< #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push #include "stk_util/util/WriterManip.hpp" // for hex -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for size_t #include // for find_if, max, min #include // for unary_function @@ -54,17 +54,6 @@ #include // for string, char_traits, operator<< #include // for vector -namespace stk { namespace diag { namespace { struct ParallelTimer; } } } - -namespace stk { - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t); - -Marshal &operator<<(Marshal &mout, const diag::Timer &t); - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t); -} namespace stk { namespace diag { @@ -120,7 +109,7 @@ Percent::operator()( strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)"; else if (ratio >= 10.0) strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)"; - else + else strout << "(" << std::setw(5) << std::setprecision(3) << std::fixed << ratio << "%)"; } @@ -131,370 +120,6 @@ inline std::ostream &operator<<(std::ostream &os, const Percent &p) { return p(os); } -struct ParallelTimer -{ - template - struct Metric - { - Metric() - : m_value(0), - m_sum(0.0), - m_min(std::numeric_limits::max()), - m_max(0.0) - {} - - typename MetricTraits::Type m_value; ///< Metric value - typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value - double m_sum; ///< Reduction sum - double m_min; ///< Reduction min - double m_max; ///< Reduction max - - void accumulate(const Metric &metric, bool checkpoint) { - double value = static_cast(metric.m_value); - if (checkpoint) - value -= static_cast(metric.m_checkpoint); - - m_sum += value; - m_min = std::min(m_min, value); - m_max = std::max(m_max, value); - } - - Writer &dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; - dout << "m_value " << m_value << dendl; - dout << "m_checkpoint " << m_value << dendl; - dout << "m_sum " << m_sum << dendl; - dout << "m_min " << m_min << dendl; - dout << "m_max " << m_max << dendl; - dout << pop; - } - return dout; - } - }; - - ParallelTimer() - : m_name(), - m_timerMask(0), - m_subtimerLapCount(0), - m_lapCount(), - m_cpuTime(), - m_wallTime(), - m_MPICount(), - m_MPIByteCount(), - m_heapAlloc(), - m_subtimerList() - {} - - ParallelTimer(const ParallelTimer ¶llel_timer) - : m_name(parallel_timer.m_name), - m_timerMask(parallel_timer.m_timerMask), - m_subtimerLapCount(parallel_timer.m_subtimerLapCount), - m_lapCount(parallel_timer.m_lapCount), - m_cpuTime(parallel_timer.m_cpuTime), - m_wallTime(parallel_timer.m_wallTime), - m_MPICount(parallel_timer.m_MPICount), - m_MPIByteCount(parallel_timer.m_MPIByteCount), - m_heapAlloc(parallel_timer.m_heapAlloc), - m_subtimerList(parallel_timer.m_subtimerList) - {} - - ParallelTimer &operator=(const ParallelTimer ¶llel_timer) { - m_name = parallel_timer.m_name; - m_timerMask = parallel_timer.m_timerMask; - m_subtimerLapCount = parallel_timer.m_subtimerLapCount; - m_lapCount = parallel_timer.m_lapCount; - m_cpuTime = parallel_timer.m_cpuTime; - m_wallTime = parallel_timer.m_wallTime; - m_MPICount = parallel_timer.m_MPICount; - m_heapAlloc = parallel_timer.m_heapAlloc; - m_subtimerList = parallel_timer.m_subtimerList; - - return *this; - } - - template - const Metric &getMetric() const; - - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; - double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - - Metric m_lapCount; ///< Number of laps accumulated - Metric m_cpuTime; ///< CPU time - Metric m_wallTime; ///< Wall time - Metric m_MPICount; ///< MPI call count - Metric m_MPIByteCount; ///< MPI byte count - Metric m_heapAlloc; ///< MPI byte count - - std::list m_subtimerList; ///< Sub timers - - Writer &dump(Writer &dout) const; -}; - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_lapCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_cpuTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_wallTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPICount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_heapAlloc; -} - - -template -Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { - return t.dump(dout); -} - -Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { - return parallel_timer.dump(dout); -} - -Writer & -ParallelTimer::dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "ParallelTimer " << m_name << push << dendl; - dout << "m_name " << m_name << dendl; - dout << "m_timerMask " << hex << m_timerMask << dendl; - dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; - dout << "m_lapCount " << m_lapCount << dendl; - dout << "m_cpuTime " << m_cpuTime << dendl; - dout << "m_wallTime " << m_wallTime << dendl; - dout << "m_MPICount " << m_MPICount << dendl; - dout << "m_MPIByteCount " << m_MPIByteCount << dendl; - dout << "m_heapAlloc " << m_heapAlloc << dendl; - dout << "m_subtimerList " << m_subtimerList << dendl; - dout << pop; - } - return dout; -} - -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif -class finder -{ -public: - finder(const std::string &name) - : m_name(name) - {} - - bool operator()(const ParallelTimer ¶llel_timer) const { - return equal_case(parallel_timer.m_name, m_name); - } - -private: - std::string m_name; -}; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - - -void -merge_parallel_timer( - ParallelTimer & p0, - const ParallelTimer & p1, - bool checkpoint) -{ - p0.m_timerMask = p1.m_timerMask; - p0.m_subtimerLapCount += p1.m_subtimerLapCount; - p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); - p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); - p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); - p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); - p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); - p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); - - - for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { - std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); - if (p0_it == p0.m_subtimerList.end()) { - p0.m_subtimerList.push_back((*p1_it)); - p0_it = --p0.m_subtimerList.end(); - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } - else - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } -} - -#ifdef STK_HAS_MPI -size_t round_up_to_next_word(size_t value) -{ - const size_t SIZE_OF_WORD = 4; - size_t remainder = value % SIZE_OF_WORD; - if (remainder == 0) { - return value; - } - return value + SIZE_OF_WORD - remainder; -} -#endif - -void -collect_timers( - Timer & root_timer, - ParallelTimer & parallel_timer, - bool checkpoint, - ParallelMachine comm) -{ - Marshal mout; - mout << root_timer; - -#ifdef STK_HAS_MPI - const int parallel_root = 0 ; - const int parallel_size = parallel_machine_size(comm); - const int parallel_rank = parallel_machine_rank(comm); - - // Gather the send counts on root processor - std::string send_string(mout.str()); - - ParallelTimer root_parallel_timer; - - //We need to gather the timer data in a number of 'cycles' where we - //only receive from a portion of the other processors each cycle. - //This is because buffer allocation-failures have been observed for - //runs on very large numbers of processors if the 'root' processor tries - //to allocate a buffer large enough to hold timing data from all other - //procesors. - //We will set an arbitrary limit for now, making sure that no more than - //64 processors' worth of timer data is gathered at a time. - const int max_procs_per_gather = 64; - int num_cycles = parallel_size/max_procs_per_gather; - if (parallel_size < max_procs_per_gather || num_cycles < 1) { - num_cycles = 1; - } - - std::vector buffer; - - for(int ii=0; ii recv_count(parallel_size, 0); - int * const recv_count_ptr = recv_count.data() ; - std::vector padded_recv_count(parallel_size, 0); - int * const padded_recv_count_ptr = padded_recv_count.data() ; - - //should this processor send on the current cycle ? If not, set send_count to 0. - if ((parallel_rank+ii)%num_cycles!=0) { - send_count = 0; - } - - { - int result = MPI_Gather(&send_count, 1, MPI_INT, - recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - { - int result = MPI_Gather(&padded_send_count, 1, MPI_INT, - padded_recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - // Receive counts are only non-zero on the root processor: - std::vector recv_displ(parallel_size + 1, 0); - std::vector recv_end(parallel_size + 1, 0); - - for (int i = 0 ; i < parallel_size ; ++i) { - recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; - recv_end[i] = recv_displ[i] + recv_count[i] ; - } - - const int recv_size = recv_displ[parallel_size] ; - - buffer.assign(recv_size, 0); - - { - const char * const send_ptr = send_string.data(); - char * const recv_ptr = recv_size ? buffer.data() : nullptr; - int * const recv_displ_ptr = recv_displ.data() ; - - int result = MPI_Gatherv(const_cast(send_ptr), padded_send_count, MPI_CHAR, - recv_ptr, padded_recv_count_ptr, recv_displ_ptr, MPI_CHAR, - parallel_root, comm); -// int result = MPI_Gather(const_cast(send_ptr), padded_send_count, MPI_CHAR, -// recv_ptr, padded_send_count, MPI_CHAR, -// parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; - throw std::runtime_error(message.str()); - } - - std::vector parallel_timer_vector; - parallel_timer_vector.reserve(parallel_size); - - if (parallel_rank == parallel_root) { - for (int j = 0; j < parallel_size; ++j) { - int received_count = recv_displ[j+1] - recv_displ[j]; - if (received_count > 0) { - //grow parallel_timer_vector by 1: - parallel_timer_vector.resize(parallel_timer_vector.size()+1); - Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_end[j])); - //put this data into the last entry of parallel_timer_vector: - min >> parallel_timer_vector[parallel_timer_vector.size()-1]; - } - } - - if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0]; - - for (size_t j = 0; j < parallel_timer_vector.size(); ++j) - merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); - } - } - } - parallel_timer = root_parallel_timer; -#endif -} - // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const; PrintTable & @@ -546,8 +171,8 @@ printSubtable( PrintTable & printSubtable( PrintTable & table, - const ParallelTimer & root_timer, - const ParallelTimer & timer, + const impl::ParallelTimer & root_timer, + const impl::ParallelTimer & timer, MetricsMask metrics_mask, int depth, bool timer_checkpoint) @@ -593,14 +218,14 @@ printSubtable( << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits::format(timer.getMetric().m_max) << " " << std::setw(8) << Percent(timer.getMetric().m_max, root_timer.getMetric().m_sum) << end_col; } - else + else table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col; table << end_row; depth++; } - for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) + for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint); return table; @@ -661,9 +286,7 @@ printTable( root_timer.accumulateSubtimerLapCounts(); - ParallelTimer parallel_timer; - - stk::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine); + impl::ParallelTimer parallel_timer = stk::diag::impl::collect_timers(root_timer, timer_checkpoint, parallel_machine); int parallel_rank = parallel_machine_rank(parallel_machine); if (parallel_rank == 0) { @@ -671,7 +294,7 @@ printTable( table.setAutoEndCol(false); table << end_col << end_col; - + if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits::METRIC) table << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col @@ -722,7 +345,7 @@ printTable( printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint); } - + if (timer_checkpoint) root_timer.checkpoint(); } @@ -756,15 +379,15 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m { double startTimeToPrintTable = stk::wall_time(); stk::PrintTable print_table; - + int parallel_size = parallel_machine_size(parallel_machine); if (parallel_size == 1) printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint); else printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine); - + os << print_table; - + double durationToPrintTable = stk::wall_time() - startTimeToPrintTable; if (parallel_machine_rank(parallel_machine) == 0) printTimeToPrintTable(os, durationToPrintTable); @@ -773,43 +396,5 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m } // namespace diag -Marshal &operator<<(stk::Marshal &mout, const diag::Timer &t); - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t) { - mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); - - return mout; -} - -Marshal &operator<<(Marshal &mout, const diag::Timer &t) { - mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() - << t.getMetric() << t.getMetric() << t.getMetric() - << t.getMetric() << t.getMetric() << t.getMetric(); - - mout << t.getTimerList(); - - return mout; -} - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) { - min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount - >> t.m_lapCount.m_value - >> t.m_lapCount.m_checkpoint - >> t.m_cpuTime.m_value - >> t.m_cpuTime.m_checkpoint - >> t.m_wallTime.m_value - >> t.m_wallTime.m_checkpoint - >> t.m_MPICount.m_value - >> t.m_MPICount.m_checkpoint - >> t.m_MPIByteCount.m_value - >> t.m_MPIByteCount.m_checkpoint - >> t.m_heapAlloc.m_value - >> t.m_heapAlloc.m_checkpoint; - - min >> t.m_subtimerList; - - return min; -} } // namespace stk diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp index fe381e7b2900..8743068c96c2 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_PrintTimer_hpp #define STK_UTIL_DIAG_PrintTimer_hpp diff --git a/packages/stk/stk_util/stk_util/diag/Timer.cpp b/packages/stk/stk_util/stk_util/diag/Timer.cpp index ef85027841ba..2f43b4e378b7 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.cpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,13 +30,13 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" #include "stk_util/diag/WriterExt.hpp" // for operator<< #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for find_if #include // for exception #include // for unary_function @@ -47,22 +47,8 @@ namespace stk { namespace diag { -namespace { - MetricsMask s_enabledMetricsMask = METRICS_LAP_COUNT | METRICS_CPU_TIME | METRICS_WALL_TIME; ///< Bit mask of enabled metrics -template -typename MetricTraits::Type -value_now() { - if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) - return MetricTraits::value_now(); - else - return 0; -} - -} // namespace - - MetricsMask getEnabledTimerMetricsMask() { return s_enabledMetricsMask; @@ -75,284 +61,6 @@ setEnabledTimerMetricsMask( s_enabledMetricsMask = timer_mask | METRICS_LAP_COUNT; } - -/** - * Class TimerImpl is the core timer class. The Timer class is a - * wrapper around TimerImpl so that the buried references can be constructed more easily. - * - * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is - * started, the cpu start time, wall start time and other metrics, set to the process' current - * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other - * values are accumulated with the difference between now and the start time. - * - * Each timer may have a list of subordinate timers. The relationship is purely - * hierarchical in that a there is no timing relationship assumed between the timers other - * than the grouping. There is no relation between the starting and stopping of parent - * and subordinate timers. - * - * The subordinate timers are stored as pointers to a new timer on the heap, since the - * calling function will be receiving a reference to this memory which can never change - * location. The subordinate timers are not sorted in the list as they should very - * rarely be created or looked up by name, rather the calling function stores the - * reference via the Timer class. - * - */ -class TimerImpl -{ - friend class Timer; - -public: - static void updateRootTimer(TimerImpl *root_timer); - - static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); - - static void deleteRootTimer(TimerImpl *root_timer); - - static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); - -private: - /** - * Static function reg returns a reference to an existing timer or newly - * created timer of the specified name which is subordinate to the - * parent timer. - * - * @return a TimerImpl reference to the timer with the - * specified name that is subordinate to the - * parent timer. - */ - static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { - return parent_timer->addSubtimer(name, timer_mask, timer_set); - } - - /** - * Creates a new Timer instance. - * - * @param name a std::string const reference to the name of - * the timer. - * - */ - TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); - - /** - * Destroys a TimerImpl instance. - * - */ - ~TimerImpl(); - - TimerImpl(const TimerImpl &TimerImpl); - TimerImpl &operator=(const TimerImpl &TimerImpl); - - /** - * Class finder is a binary predicate for finding a subordinate timer. - * - * Note that the subordinate timer is an unsorted list as there are very few timers - * created and should rarely be looked up by name. - */ -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif - class finder - { - public: - explicit finder(const std::string &name) - : m_name(name) - {} - - bool operator()(Timer timer) const { - return equal_case(timer.getName(), m_name); - } - - private: - std::string m_name; - }; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - -public: - /** - * Member function getName returns the name of the timer. - * - * @return a std::string const reference to the timer's - * name. - */ - const std::string &getName() const { - return m_name; - } - - /** - * Member function getTimerMask returns the timer mask of the timer. - * - * @return a TimerMask value to the timer mask. - */ - TimerMask getTimerMask() const { - return m_timerMask; - } - - /** - * Member function getTimerSet returns the timer set of the timer. - * - * @return a TimerSet const reference to the timer set. - */ - const TimerSet &getTimerSet() const { - return m_timerSet; - } - - /** - * Member function shouldRecord returns true if any of the specified timer - * bit masks are set in the enable timer bit mask. - */ - bool shouldRecord() const { - return m_timerSet.shouldRecord(m_timerMask) && s_enabledMetricsMask; - } - - /** - * Member function getSubtimerLapCount returns the subtimer lap counter. - * - * @return a Counter value of the subtimer lap counter. - */ - double getSubtimerLapCount() const { - return m_subtimerLapCount; - } - - void setSubtimerLapCount(double value) { - m_subtimerLapCount = value; - } - - /** - * Member function getLapCount returns the lap counter metric. The lap - * count metric is the number of times the stop function has been executed. - * - * @return a CounterMetric const reference of the lap counter - * metric. - */ - template - const Timer::Metric &getMetric() const; - - /** - * Member function getTimerList returns the subtimers associated with - * this timer. - * - * @return a TimerList const reference to the sub - * time list. - */ - const TimerList &getTimerList() const { - return m_subtimerList; - } - - TimerList::iterator begin() { - return m_subtimerList.begin(); - } - - TimerList::const_iterator begin() const { - return m_subtimerList.begin(); - } - - TimerList::iterator end() { - return m_subtimerList.end(); - } - - TimerList::const_iterator end() const { - return m_subtimerList.end(); - } - - /** - * Member function reset resets the accumulated time and lap times. - * - */ - void reset(); - - /** - * Member function checkpoint checkpoints the timer and all subtimers. - * - */ - void checkpoint() const; - - /** - * Member function start sets the start timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &start(); - - /** - * Member function lap sets the stop timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &lap(); - - /** - * Member function stop sets the stop timer and sums the just completed lap - * time to the timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &stop(); - - /** - * Member function accumulateSubtimerLapCounts sums the lap counter of all - * subordinate timers. This is used to determin which timers have been activated at all. - * - * @return an int value of the number of subordinate - * timer laps. - */ - double accumulateSubtimerLapCounts() const; - - Timer getSubtimer(const std::string &name); - -public: - /** - * Member function dump writes the timer to the specified - * diagnostic writer. - * - * @param dout a Writer variable reference to write the timer to. - * - * @return a Writer reference to dout. - */ - Writer &dump(Writer &dout) const; - -private: - /** - * Member function addSubtimer returns a reference to an existing or new - * subtimer with the specified name. - * - * @param name a std::string value of the timer's name. - * - * @param timer_mask a TimerMask value of the class of the timer. - * - * @return a TimerImpl reference to the timer with - * specified name. - */ - TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); - TimerImpl & child_notifies_of_start(); - TimerImpl & child_notifies_of_stop(); - -private: - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; ///< Bit mask to enable timer - TimerImpl * m_parentTimer; ///< Parent timer - mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - unsigned m_lapStartCount; ///< Number of pending lap stops - unsigned m_activeChildCount; ///< How many children timers have been started - bool m_childCausedStart; ///< Was this timer started because a child was started? - - TimerList m_subtimerList; ///< List of subordinate timers - - const TimerSet & m_timerSet; ///< Timer enabled mask - Timer::Metric m_lapCount; ///< Number of laps accumulated - Timer::Metric m_cpuTime; ///< CPU time - Timer::Metric m_wallTime; ///< Wall time - Timer::Metric m_MPICount; ///< MPI call count - Timer::Metric m_MPIByteCount; ///< MPI byte count - Timer::Metric m_heapAlloc; ///< Heap allocated -}; - -inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { - return timer.dump(dout); -} - void updateRootTimer( Timer root_timer) @@ -379,321 +87,6 @@ deleteRootTimer( } -TimerImpl::TimerImpl( - const std::string & name, - TimerMask timer_mask, - TimerImpl * parent_timer, - const TimerSet & timer_set) - : m_name(name), - m_timerMask(timer_mask), - m_parentTimer(parent_timer), - m_subtimerLapCount(0.0), - m_lapStartCount(0), - m_activeChildCount(0), - m_childCausedStart(false), - m_subtimerList(), - m_timerSet(timer_set) -{} - - -TimerImpl::~TimerImpl() -{ - try { - for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - delete (*it).m_timerImpl; - } - catch (std::exception &) { - } -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_lapCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_cpuTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_wallTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPICount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_heapAlloc; -} - - -void -TimerImpl::reset() -{ - m_lapStartCount = 0; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_lapCount.reset(); - m_cpuTime.reset(); - m_wallTime.reset(); - m_MPICount.reset(); - m_MPIByteCount.reset(); - m_heapAlloc.reset(); -} - - -Timer -TimerImpl::getSubtimer( - const std::string & name) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) - throw std::runtime_error("Timer not found"); - else - return *it; -} - - -TimerImpl * -TimerImpl::addSubtimer( - const std::string & name, - TimerMask timer_mask, - const TimerSet & timer_set) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) { - TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); - m_subtimerList.push_back(Timer(timer_impl)); - return timer_impl; - } - else - return (*it).m_timerImpl; -} - - -TimerImpl & -TimerImpl::start() -{ - if (shouldRecord()) { - if (m_lapStartCount == 0) { - ++m_lapStartCount; - m_lapCount.m_lapStart = m_lapCount.m_lapStop; - - m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); - m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); - m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); - m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); - m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_start(); - } - } - - return *this; -} - - -TimerImpl & -TimerImpl::lap() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - } - } - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_start() -{ - //Start only if not already started and this isn't a root timer - if(m_lapStartCount == 0 && m_parentTimer) - { - start(); - m_childCausedStart = true; - } - m_activeChildCount++; - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_stop() -{ - m_activeChildCount--; - if(m_activeChildCount == 0 && m_childCausedStart) - { - stop(); - } - return *this; -} - -TimerImpl & -TimerImpl::stop() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_lapStartCount = 0; - m_lapCount.m_lapStop++; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - - m_lapCount.addLap(); - m_cpuTime.addLap(); - m_wallTime.addLap(); - m_MPICount.addLap(); - m_MPIByteCount.addLap(); - m_heapAlloc.addLap(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_stop(); - } - } - - return *this; -} - - -double -TimerImpl::accumulateSubtimerLapCounts() const -{ - m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->accumulateSubtimerLapCounts(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; - - return m_subtimerLapCount; -} - - -void -TimerImpl::checkpoint() const -{ - m_lapCount.checkpoint(); - m_cpuTime.checkpoint(); - m_wallTime.checkpoint(); - m_MPICount.checkpoint(); - m_MPIByteCount.checkpoint(); - m_heapAlloc.checkpoint(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->checkpoint(); -} - - -void -TimerImpl::updateRootTimer(TimerImpl *root_timer) -{ - root_timer->m_lapCount.m_lapStop = value_now(); - root_timer->m_cpuTime.m_lapStop = value_now(); - root_timer->m_wallTime.m_lapStop = value_now(); - root_timer->m_MPICount.m_lapStop = value_now(); - root_timer->m_MPIByteCount.m_lapStop = value_now(); - root_timer->m_heapAlloc.m_lapStop = value_now(); - - root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; - root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; - root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; - root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; - root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; - root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; -} - - - -Timer -TimerImpl::createRootTimer( - const std::string & name, - const TimerSet & timer_set) -{ - TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); - return Timer(timer_impl); -} - - -void -TimerImpl::deleteRootTimer( - TimerImpl * root_timer) -{ - delete root_timer; -} - - -void -TimerImpl::findTimer( - TimerImpl * timer, - std::vector & path_tail_vector, - std::vector & found_timers) -{ - if (timer->begin() == timer->end()) { // at leaf - } - else - for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) - findTimer((*it).m_timerImpl, path_tail_vector, found_timers); -} - - -Writer & -TimerImpl::dump( - Writer & dout) const -{ - if (dout.shouldPrint()) { - dout << "TimerImpl" << push << dendl; - dout << "m_name, " << m_name << dendl; - dout << "m_timerMask, " << m_timerMask << dendl; - dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; - dout << "m_lapStartCount, " << m_lapStartCount << dendl; - - dout << "m_lapCount, " << m_lapCount << dendl; - dout << "m_cpuTime, " << m_cpuTime << dendl; - dout << "m_wallTime, " << m_wallTime << dendl; - dout << "m_MPICount, " << m_MPICount << dendl; - dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; - dout << "m_heapAlloc, " << m_heapAlloc << dendl; - - dout << "m_subtimerList, " << m_subtimerList << dendl; - dout << pop; - } - - return dout; -} Timer::~Timer() {} @@ -765,25 +158,25 @@ Timer::begin() { return m_timerImpl->begin(); } - + TimerList::const_iterator Timer::begin() const { return m_timerImpl->begin(); } - + TimerList::iterator Timer::end() { return m_timerImpl->end(); } - + TimerList::const_iterator Timer::end() const { return m_timerImpl->end(); } - + double Timer::accumulateSubtimerLapCounts() const { return m_timerImpl->accumulateSubtimerLapCounts(); @@ -891,9 +284,9 @@ TimeBlockSynchronized::stop() namespace sierra { namespace Diag { -// +// // SierraRootTimer member functions: -// +// SierraRootTimer::SierraRootTimer() : m_sierraTimer(stk::diag::createRootTimer("Sierra", sierraTimerSet())) { } @@ -1057,14 +450,14 @@ TimerParser::parse( m_metricsSetMask = 0; m_metricsMask = 0; m_optionMask = getEnabledTimerMask(); - + m_optionMask = OptionMaskParser::parse(option_mask); setEnabledTimerMask(m_optionMask); - + if (m_metricsSetMask != 0) stk::diag::setEnabledTimerMetricsMask(m_metricsMask); - + return m_optionMask; } diff --git a/packages/stk/stk_util/stk_util/diag/Timer.hpp b/packages/stk/stk_util/stk_util/diag/Timer.hpp index 466c06d52e75..f4f9c391d4e0 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.hpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_Timer_hpp #define STK_UTIL_DIAG_Timer_hpp @@ -38,6 +38,7 @@ #include "stk_util/diag/Option.hpp" // for OptionMask, OptionMaskParser, OptionMaskP... #include "stk_util/diag/TimerMetricTraits.hpp" // for MetricTraits, MetricsMask, CPUTime (ptr o... #include "stk_util/environment/FormatTime.hpp" // for TimeFormat +#include "stk_util/util/Marshal.hpp" #include "stk_util/parallel/Parallel.hpp" // for ParallelMachine, ompi_communicator_t #include // for size_t #include // for list @@ -182,6 +183,7 @@ class Timer friend class TimerImpl; friend class TimeBlock; friend class TimeBlockSynchronized; + friend class TimerTester; friend void updateRootTimer(Timer); friend Timer createRootTimer(const std::string &, const TimerSet &); friend void deleteRootTimer(Timer); @@ -469,6 +471,53 @@ class Timer TimerImpl * m_timerImpl; ///< Reference to the actual timer }; +template +Marshal &operator<<(Marshal &mout, const Timer::Metric &t) { + mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); + + return mout; +} + +inline Marshal &operator<<(Marshal &mout, const Timer &t) { + mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() + << t.getMetric() << t.getMetric() << t.getMetric() + << t.getMetric() << t.getMetric() << t.getMetric(); + + mout << t.getTimerList(); + + return mout; +} + +/** + * @brief Function operator<< writes a timer to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +template +inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { + return timer.dump(dout); +} + +/** + * Function operator<< writes a timer metric to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +inline Writer &operator<<(Writer &dout, const Timer &timer) { + return timer.dump(dout); +} /** @@ -604,36 +653,6 @@ class TimeBlockSynchronized }; -/** - * @brief Function operator<< writes a timer to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -template -inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { - return timer.dump(dout); -} - -/** - * Function operator<< writes a timer metric to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -inline Writer &operator<<(Writer &dout, const Timer &timer) { - return timer.dump(dout); -} } // namespace diag } // namespace stk @@ -780,14 +799,14 @@ class TimerParser : public OptionMaskParser * @param arg a std::string const reference to the argument * values. */ - virtual void parseArg(const std::string &name, const std::string &arg) const; + virtual void parseArg(const std::string &name, const std::string &arg) const; mutable stk::diag::MetricsMask m_metricsSetMask; mutable stk::diag::MetricsMask m_metricsMask; }; -class SierraRootTimer +class SierraRootTimer { public: SierraRootTimer(); @@ -795,7 +814,7 @@ class SierraRootTimer stk::diag::Timer & sierraTimer(); private: - stk::diag::Timer m_sierraTimer; + stk::diag::Timer m_sierraTimer; }; } // namespace Diag diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp new file mode 100644 index 000000000000..39181c702ab5 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp @@ -0,0 +1,333 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" + +namespace stk::diag { + +namespace { + +template +typename MetricTraits::Type +value_now() { + if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) + return MetricTraits::value_now(); + else + return 0; +} + +} // namespace + + +TimerImpl::TimerImpl( + const std::string & name, + TimerMask timer_mask, + TimerImpl * parent_timer, + const TimerSet & timer_set) + : m_name(name), + m_timerMask(timer_mask), + m_parentTimer(parent_timer), + m_subtimerLapCount(0.0), + m_lapStartCount(0), + m_activeChildCount(0), + m_childCausedStart(false), + m_subtimerList(), + m_timerSet(timer_set) +{} + + +TimerImpl::~TimerImpl() +{ + try { + for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + delete (*it).m_timerImpl; + } + catch (std::exception &) { + } +} + +bool TimerImpl::shouldRecord() const { + return m_timerSet.shouldRecord(m_timerMask) && getEnabledTimerMetricsMask(); +} + +void +TimerImpl::reset() +{ + m_lapStartCount = 0; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_lapCount.reset(); + m_cpuTime.reset(); + m_wallTime.reset(); + m_MPICount.reset(); + m_MPIByteCount.reset(); + m_heapAlloc.reset(); +} + + +Timer +TimerImpl::getSubtimer( + const std::string & name) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) + throw std::runtime_error("Timer not found"); + else + return *it; +} + + +TimerImpl * +TimerImpl::addSubtimer( + const std::string & name, + TimerMask timer_mask, + const TimerSet & timer_set) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) { + TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); + m_subtimerList.push_back(Timer(timer_impl)); + return timer_impl; + } + else + return (*it).m_timerImpl; +} + + +TimerImpl & +TimerImpl::start() +{ + if (shouldRecord()) { + if (m_lapStartCount == 0) { + ++m_lapStartCount; + m_lapCount.m_lapStart = m_lapCount.m_lapStop; + + m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); + m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); + m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); + m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); + m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_start(); + } + } + + return *this; +} + + +TimerImpl & +TimerImpl::lap() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + } + } + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_start() +{ + //Start only if not already started and this isn't a root timer + if(m_lapStartCount == 0 && m_parentTimer) + { + start(); + m_childCausedStart = true; + } + m_activeChildCount++; + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_stop() +{ + m_activeChildCount--; + if(m_activeChildCount == 0 && m_childCausedStart) + { + stop(); + } + return *this; +} + +TimerImpl & +TimerImpl::stop() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_lapStartCount = 0; + m_lapCount.m_lapStop++; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + + m_lapCount.addLap(); + m_cpuTime.addLap(); + m_wallTime.addLap(); + m_MPICount.addLap(); + m_MPIByteCount.addLap(); + m_heapAlloc.addLap(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_stop(); + } + } + + return *this; +} + + +double +TimerImpl::accumulateSubtimerLapCounts() const +{ + m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->accumulateSubtimerLapCounts(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; + + return m_subtimerLapCount; +} + + +void +TimerImpl::checkpoint() const +{ + m_lapCount.checkpoint(); + m_cpuTime.checkpoint(); + m_wallTime.checkpoint(); + m_MPICount.checkpoint(); + m_MPIByteCount.checkpoint(); + m_heapAlloc.checkpoint(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->checkpoint(); +} + + +void +TimerImpl::updateRootTimer(TimerImpl *root_timer) +{ + root_timer->m_lapCount.m_lapStop = value_now(); + root_timer->m_cpuTime.m_lapStop = value_now(); + root_timer->m_wallTime.m_lapStop = value_now(); + root_timer->m_MPICount.m_lapStop = value_now(); + root_timer->m_MPIByteCount.m_lapStop = value_now(); + root_timer->m_heapAlloc.m_lapStop = value_now(); + + root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; + root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; + root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; + root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; + root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; + root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; +} + + + +Timer +TimerImpl::createRootTimer( + const std::string & name, + const TimerSet & timer_set) +{ + TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); + return Timer(timer_impl); +} + + +void +TimerImpl::deleteRootTimer( + TimerImpl * root_timer) +{ + delete root_timer; +} + + +void +TimerImpl::findTimer( + TimerImpl * timer, + std::vector & path_tail_vector, + std::vector & found_timers) +{ + if (timer->begin() == timer->end()) { // at leaf + } + else + for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) + findTimer((*it).m_timerImpl, path_tail_vector, found_timers); +} + + +Writer & +TimerImpl::dump( + Writer & dout) const +{ + if (dout.shouldPrint()) { + dout << "TimerImpl" << push << dendl; + dout << "m_name, " << m_name << dendl; + dout << "m_timerMask, " << m_timerMask << dendl; + dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; + dout << "m_lapStartCount, " << m_lapStartCount << dendl; + + dout << "m_lapCount, " << m_lapCount << dendl; + dout << "m_cpuTime, " << m_cpuTime << dendl; + dout << "m_wallTime, " << m_wallTime << dendl; + dout << "m_MPICount, " << m_MPICount << dendl; + dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; + dout << "m_heapAlloc, " << m_heapAlloc << dendl; + + dout << "m_subtimerList, " << m_subtimerList << dendl; + dout << pop; + } + + return dout; +} + + + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp new file mode 100644 index 000000000000..e17493e51d5f --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp @@ -0,0 +1,370 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_TimerImpl_hpp +#define STK_UTIL_DIAG_TimerImpl_hpp + +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push +#include "stk_util/diag/WriterExt.hpp" // for operator<< + + +namespace stk::diag { + + +/** + * Class TimerImpl is the core timer class. The Timer class is a + * wrapper around TimerImpl so that the buried references can be constructed more easily. + * + * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is + * started, the cpu start time, wall start time and other metrics, set to the process' current + * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other + * values are accumulated with the difference between now and the start time. + * + * Each timer may have a list of subordinate timers. The relationship is purely + * hierarchical in that a there is no timing relationship assumed between the timers other + * than the grouping. There is no relation between the starting and stopping of parent + * and subordinate timers. + * + * The subordinate timers are stored as pointers to a new timer on the heap, since the + * calling function will be receiving a reference to this memory which can never change + * location. The subordinate timers are not sorted in the list as they should very + * rarely be created or looked up by name, rather the calling function stores the + * reference via the Timer class. + * + */ +class TimerImpl +{ + friend class Timer; + friend class TimerTester; + +public: + static void updateRootTimer(TimerImpl *root_timer); + + static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); + + static void deleteRootTimer(TimerImpl *root_timer); + + static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); + +private: + /** + * Static function reg returns a reference to an existing timer or newly + * created timer of the specified name which is subordinate to the + * parent timer. + * + * @return a TimerImpl reference to the timer with the + * specified name that is subordinate to the + * parent timer. + */ + static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { + return parent_timer->addSubtimer(name, timer_mask, timer_set); + } + + /** + * Creates a new Timer instance. + * + * @param name a std::string const reference to the name of + * the timer. + * + */ + TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); + + /** + * Destroys a TimerImpl instance. + * + */ + ~TimerImpl(); + + TimerImpl(const TimerImpl &TimerImpl); + TimerImpl &operator=(const TimerImpl &TimerImpl); + + /** + * Class finder is a binary predicate for finding a subordinate timer. + * + * Note that the subordinate timer is an unsorted list as there are very few timers + * created and should rarely be looked up by name. + */ +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif + class finder + { + public: + explicit finder(const std::string &name) + : m_name(name) + {} + + bool operator()(Timer timer) const { + return equal_case(timer.getName(), m_name); + } + + private: + std::string m_name; + }; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +public: + /** + * Member function getName returns the name of the timer. + * + * @return a std::string const reference to the timer's + * name. + */ + const std::string &getName() const { + return m_name; + } + + /** + * Member function getTimerMask returns the timer mask of the timer. + * + * @return a TimerMask value to the timer mask. + */ + TimerMask getTimerMask() const { + return m_timerMask; + } + + /** + * Member function getTimerSet returns the timer set of the timer. + * + * @return a TimerSet const reference to the timer set. + */ + const TimerSet &getTimerSet() const { + return m_timerSet; + } + + /** + * Member function shouldRecord returns true if any of the specified timer + * bit masks are set in the enable timer bit mask. + */ + bool shouldRecord() const; + + /** + * Member function getSubtimerLapCount returns the subtimer lap counter. + * + * @return a Counter value of the subtimer lap counter. + */ + double getSubtimerLapCount() const { + return m_subtimerLapCount; + } + + void setSubtimerLapCount(double value) { + m_subtimerLapCount = value; + } + + /** + * Member function getLapCount returns the lap counter metric. The lap + * count metric is the number of times the stop function has been executed. + * + * @return a CounterMetric const reference of the lap counter + * metric. + */ + template + const Timer::Metric &getMetric() const; + + /** + * Member function getTimerList returns the subtimers associated with + * this timer. + * + * @return a TimerList const reference to the sub + * time list. + */ + const TimerList &getTimerList() const { + return m_subtimerList; + } + + TimerList::iterator begin() { + return m_subtimerList.begin(); + } + + TimerList::const_iterator begin() const { + return m_subtimerList.begin(); + } + + TimerList::iterator end() { + return m_subtimerList.end(); + } + + TimerList::const_iterator end() const { + return m_subtimerList.end(); + } + + /** + * Member function reset resets the accumulated time and lap times. + * + */ + void reset(); + + /** + * Member function checkpoint checkpoints the timer and all subtimers. + * + */ + void checkpoint() const; + + /** + * Member function start sets the start timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &start(); + + /** + * Member function lap sets the stop timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &lap(); + + /** + * Member function stop sets the stop timer and sums the just completed lap + * time to the timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &stop(); + + /** + * Member function accumulateSubtimerLapCounts sums the lap counter of all + * subordinate timers. This is used to determin which timers have been activated at all. + * + * @return an int value of the number of subordinate + * timer laps. + */ + double accumulateSubtimerLapCounts() const; + + Timer getSubtimer(const std::string &name); + +public: + /** + * Member function dump writes the timer to the specified + * diagnostic writer. + * + * @param dout a Writer variable reference to write the timer to. + * + * @return a Writer reference to dout. + */ + Writer &dump(Writer &dout) const; + +private: + /** + * Member function addSubtimer returns a reference to an existing or new + * subtimer with the specified name. + * + * @param name a std::string value of the timer's name. + * + * @param timer_mask a TimerMask value of the class of the timer. + * + * @return a TimerImpl reference to the timer with + * specified name. + */ + TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); + TimerImpl & child_notifies_of_start(); + TimerImpl & child_notifies_of_stop(); + +private: + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; ///< Bit mask to enable timer + TimerImpl * m_parentTimer; ///< Parent timer + mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + unsigned m_lapStartCount; ///< Number of pending lap stops + unsigned m_activeChildCount; ///< How many children timers have been started + bool m_childCausedStart; ///< Was this timer started because a child was started? + + TimerList m_subtimerList; ///< List of subordinate timers + + const TimerSet & m_timerSet; ///< Timer enabled mask + Timer::Metric m_lapCount; ///< Number of laps accumulated + Timer::Metric m_cpuTime; ///< CPU time + Timer::Metric m_wallTime; ///< Wall time + Timer::Metric m_MPICount; ///< MPI call count + Timer::Metric m_MPIByteCount; ///< MPI byte count + Timer::Metric m_heapAlloc; ///< Heap allocated +}; + +inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { + return timer.dump(dout); +} + + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_lapCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_wallTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPICount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_heapAlloc; +} + + +} + +#endif \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.cpp b/packages/stk/stk_util/stk_util/environment/EnvData.cpp index b09aff4f1650..6a27223777d0 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.cpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.cpp @@ -63,7 +63,6 @@ namespace stk { m_inputFileRequired(true), m_checkSubCycle(false), m_checkSmRegion(false), - m_isZapotec(false), m_worldComm(MPI_COMM_NULL), m_parallelComm(MPI_COMM_NULL), m_parallelSize(-1), diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.hpp b/packages/stk/stk_util/stk_util/environment/EnvData.hpp index 21ba461baa46..7afce19069a5 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.hpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.hpp @@ -108,7 +108,6 @@ struct EnvData bool m_inputFileRequired; bool m_checkSubCycle; bool m_checkSmRegion; - bool m_isZapotec; MPI_Comm m_worldComm; diff --git a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp index 94c49b1a56df..00d1837b7ea4 100644 --- a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp +++ b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp @@ -196,7 +196,7 @@ bool Scheduler::internal_is_it_time(Time time) // called multiple times with the same argument, it will return the // same response. - assert(time >= lastTime_); + STK_ThrowAssertMsg(time >= lastTime_, "time = " << time << ", lastTime_ = " << lastTime_); // If this is a restart, then calculate what the lastTime_ setting would // have been for this scheduler (based only on start time and deltas). diff --git a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp index c0dc9a2d8f34..12bc0522d186 100644 --- a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp +++ b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp @@ -72,6 +72,8 @@ using MemSpace = Kokkos::HIPSpace; using MemSpace = ExecSpace::memory_space; #endif +using HostMemSpace = HostExecSpace::memory_space; + #ifdef KOKKOS_ENABLE_HIP template using RangePolicy = Kokkos::RangePolicy>; diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index c2b9f9ded275..fe5e52134cdd 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.23.1-605-g31b54b7f" +#define STK_VERSION_STRING "5.23.2-429-g07a311ce" #endif namespace stk { diff --git a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp index 3d65d0a6017a..e2f94a533d09 100644 --- a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp +++ b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp @@ -32,13 +32,20 @@ constexpr bool have_errexcept() #endif } +constexpr int FE_EXCEPT_CHECKS = FE_ALL_EXCEPT & ~FE_INEXACT; + std::string get_fe_except_string(int fe_except_bitmask); inline void clear_fp_errors() { if constexpr (have_errexcept()) { - std::feclearexcept(FE_ALL_EXCEPT); + // experimental results show calling std::feclearexcept is *very* + // expensive, so dont call it unless needed. + if (std::fetestexcept(FE_EXCEPT_CHECKS) > 0) + { + std::feclearexcept(FE_EXCEPT_CHECKS); + } } else if constexpr (have_errno()) { errno = 0; @@ -49,7 +56,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal { if constexpr (have_errexcept()) { - int fe_except_bitmask = std::fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT); + int fe_except_bitmask = std::fetestexcept(FE_EXCEPT_CHECKS); if (fe_except_bitmask != 0) { std::string msg = std::string(fname ? fname : "") + " raised floating point error(s): " + get_fe_except_string(fe_except_bitmask); @@ -76,6 +83,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal } } } + } inline void warn_on_fp_error(const char* fname = nullptr, std::ostream& os = std::cerr) diff --git a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp index 567e4f875024..f905bb7f171d 100644 --- a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp +++ b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp @@ -44,17 +44,14 @@ class NgpVector { using HostSpace = Kokkos::DefaultHostExecutionSpace; public: - NgpVector(const std::string &n) : NgpVector(n, 0) - { - } - NgpVector() : NgpVector(get_default_name()) - { - } - NgpVector(const std::string &n, size_t s) - : mSize(s), - deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), - hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) - { + virtual ~NgpVector() = default; + NgpVector(const std::string &n) : NgpVector(n, 0) {} + NgpVector() : NgpVector(get_default_name()) {} + NgpVector(const std::string &n, size_t s) + : mSize(s), + deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), + hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) + { } NgpVector(size_t s) : NgpVector(get_default_name(), s) { diff --git a/packages/stk/stk_util/stk_util/util/StridedArray.hpp b/packages/stk/stk_util/stk_util/util/StridedArray.hpp index 69881b38abeb..0e19d0de940d 100644 --- a/packages/stk/stk_util/stk_util/util/StridedArray.hpp +++ b/packages/stk/stk_util/stk_util/util/StridedArray.hpp @@ -36,7 +36,7 @@ #include #include -#include +#include "Kokkos_Macros.hpp" namespace stk {