diff --git a/packages/amesos2/cmake/Amesos2_config.h.in b/packages/amesos2/cmake/Amesos2_config.h.in index 273f9f461c57..4a6e76ce8f67 100644 --- a/packages/amesos2/cmake/Amesos2_config.h.in +++ b/packages/amesos2/cmake/Amesos2_config.h.in @@ -116,4 +116,3 @@ #ifdef HAVE_AMESOS2_ZOLTAN2CORE # define HAVE_AMESOS2_ZOLTAN2 #endif -#cmakedefine HAVE_AMESOS2_GALERI diff --git a/packages/amesos2/cmake/Dependencies.cmake b/packages/amesos2/cmake/Dependencies.cmake index b8bc60738faf..9c5d753ee8ee 100644 --- a/packages/amesos2/cmake/Dependencies.cmake +++ b/packages/amesos2/cmake/Dependencies.cmake @@ -5,7 +5,7 @@ SET(LIB_REQUIRED_DEP_PACKAGES Teuchos Tpetra TrilinosSS Kokkos) SET(LIB_OPTIONAL_DEP_PACKAGES Epetra EpetraExt ShyLU_NodeBasker ShyLU_NodeTacho) SET(TEST_REQUIRED_DEP_PACKAGES) -SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS Xpetra Zoltan2Core Galeri) +SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS Xpetra Zoltan2Core) # SET(LIB_REQUIRED_DEP_TPLS SuperLU) SET(LIB_REQUIRED_DEP_TPLS ) SET(LIB_OPTIONAL_DEP_TPLS MPI SuperLU SuperLUMT SuperLUDist LAPACK UMFPACK PARDISO_MKL CSS_MKL ParMETIS METIS Cholmod MUMPS STRUMPACK CUSPARSE CUSOLVER) diff --git a/packages/amesos2/example/SimpleSolve.cpp b/packages/amesos2/example/SimpleSolve.cpp index 7a722431aa66..a1b3472ad7d8 100644 --- a/packages/amesos2/example/SimpleSolve.cpp +++ b/packages/amesos2/example/SimpleSolve.cpp @@ -23,9 +23,6 @@ #include #include #include -#include -#include -#include #include #include @@ -34,10 +31,7 @@ #include "Amesos2.hpp" #include "Amesos2_Version.hpp" -#if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_GALERI) - #include "Galeri_XpetraMaps.hpp" - #include "Galeri_XpetraProblemFactory.hpp" -#endif + int main(int argc, char *argv[]) { Tpetra::ScopeGuard tpetraScope(&argc,&argv); @@ -46,7 +40,6 @@ int main(int argc, char *argv[]) { typedef Tpetra::Map<>::local_ordinal_type LO; typedef Tpetra::Map<>::global_ordinal_type GO; - typedef Tpetra::Map MAP; typedef Tpetra::CrsMatrix MAT; typedef Tpetra::MultiVector MV; @@ -55,24 +48,9 @@ int main(int argc, char *argv[]) { using Teuchos::RCP; using Teuchos::rcp; - bool verbose = false; - GO nx = 1; - std::string GaleriName3D {"Laplace3D"}; - std::string solvername("Superlu"); - std::string xml_filename(""); - Teuchos::CommandLineProcessor cmdp(false,true); - cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); - cmdp.setOption("solvername",&solvername,"Name of solver."); - cmdp.setOption("xml_filename",&xml_filename,"XML Filename for Solver parameters."); - cmdp.setOption("nx",&nx,"Dimension of 3D problem."); - cmdp.setOption ("galeriMatrixName", &GaleriName3D, "Name of 3D Galeri Matrix"); - if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { - return -1; - } - // Before we do anything, check that SuperLU is enabled - if( !Amesos2::query(solvername) ){ - std::cerr << solvername << " not enabled. Exiting..." << std::endl; + if( !Amesos2::query("SuperLU") ){ + std::cerr << "SuperLU not enabled. Exiting..." << std::endl; return EXIT_SUCCESS; // Otherwise CTest will pick it up as // failure, which it isn't really } @@ -88,115 +66,82 @@ int main(int argc, char *argv[]) { const size_t numVectors = 1; - RCP A; - RCP map; - if (nx > 0) { - #if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_GALERI) - typedef Galeri::Xpetra::Problem Galeri_t; - - Teuchos::ParameterList galeriList; - Tpetra::global_size_t nGlobalElements = nx * nx * nx; - galeriList.set("nx", nx); - galeriList.set("ny", nx); - galeriList.set("nz", nx); - if (GaleriName3D == "Elasticity3D") { - GO mx = 1; - galeriList.set("mx", mx); - galeriList.set("my", mx); - galeriList.set("mz", mx); - nGlobalElements *= 3; - } - map = rcp(new MAP(nGlobalElements, 0, comm)); - RCP galeriProblem = - Galeri::Xpetra::BuildProblem - (GaleriName3D, map, galeriList); - A = galeriProblem->BuildMatrix(); - #else - std::cerr << "Galeri or Xpetra not enabled. Exiting..." << std::endl; - return EXIT_SUCCESS; // Otherwise CTest will pick it up as - #endif - } else { - // create a Map - global_size_t nrows = 6; - map = rcp( new MAP(nrows,0,comm) ); - - RCP A = rcp( new MAT(map,3) ); // max of three entries in a row - - /* - * We will solve a system with a known solution, for which we will be using - * the following matrix: - * - * [ [ 7, 0, -3, 0, -1, 0 ] - * [ 2, 8, 0, 0, 0, 0 ] - * [ 0, 0, 1, 0, 0, 0 ] - * [ -3, 0, 0, 5, 0, 0 ] - * [ 0, -1, 0, 0, 4, 0 ] - * [ 0, 0, 0, -2, 0, 6 ] ] - * - */ - // Construct matrix - if( myRank == 0 ){ - A->insertGlobalValues(0,tuple(0,2,4),tuple(7,-3,-1)); - A->insertGlobalValues(1,tuple(0,1),tuple(2,8)); - A->insertGlobalValues(2,tuple(2),tuple(1)); - A->insertGlobalValues(3,tuple(0,3),tuple(-3,5)); - A->insertGlobalValues(4,tuple(1,4),tuple(-1,4)); - A->insertGlobalValues(5,tuple(3,5),tuple(-2,6)); - } - A->fillComplete(); + // create a Map + global_size_t nrows = 6; + RCP > map + = rcp( new Tpetra::Map(nrows,0,comm) ); + + RCP A = rcp( new MAT(map,3) ); // max of three entries in a row + + /* + * We will solve a system with a known solution, for which we will be using + * the following matrix: + * + * [ [ 7, 0, -3, 0, -1, 0 ] + * [ 2, 8, 0, 0, 0, 0 ] + * [ 0, 0, 1, 0, 0, 0 ] + * [ -3, 0, 0, 5, 0, 0 ] + * [ 0, -1, 0, 0, 4, 0 ] + * [ 0, 0, 0, -2, 0, 6 ] ] + * + */ + // Construct matrix + if( myRank == 0 ){ + A->insertGlobalValues(0,tuple(0,2,4),tuple(7,-3,-1)); + A->insertGlobalValues(1,tuple(0,1),tuple(2,8)); + A->insertGlobalValues(2,tuple(2),tuple(1)); + A->insertGlobalValues(3,tuple(0,3),tuple(-3,5)); + A->insertGlobalValues(4,tuple(1,4),tuple(-1,4)); + A->insertGlobalValues(5,tuple(3,5),tuple(-2,6)); } + A->fillComplete(); - // Create X + // Create random X RCP X = rcp(new MV(map,numVectors)); - X->putScalar(1); - - /* Create B */ - RCP B = rcp(new MV(map,numVectors)); - A->apply(*X, *B); X->randomize(); - // Create solver interface with Amesos2 factory method - RCP > solver = Amesos2::create(solvername, A, X, B); - if (xml_filename != "") { - Teuchos::ParameterList test_params = - Teuchos::ParameterXMLFileReader(xml_filename).getParameters(); - Teuchos::ParameterList& amesos2_params = test_params.sublist("Amesos2"); - solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); + /* Create B + * + * Use RHS: + * + * [[-7] + * [18] + * [ 3] + * [17] + * [18] + * [28]] + */ + RCP B = rcp(new MV(map,numVectors)); + int data[6] = {-7,18,3,17,18,28}; + for( int i = 0; i < 6; ++i ){ + if( B->getMap()->isNodeGlobalElement(i) ){ + B->replaceGlobalValue(i,0,data[i]); + } } - RCP stackedTimer; - stackedTimer = rcp(new Teuchos::StackedTimer("Amesos2 SimpleSolve-File")); - Teuchos::TimeMonitor::setStackedTimer(stackedTimer); - { - solver->symbolicFactorization().numericFactorization().solve(); - } - stackedTimer->stopBaseTimer(); - { - Teuchos::StackedTimer::OutputOptions options; - options.num_histogram=3; - options.print_warnings = false; - options.output_histogram = true; - options.output_fraction=true; - options.output_minmax = true; - stackedTimer->report(std::cout, comm, options); - } - if (verbose) { - /* Print the solution - * - * Should be: - * - * [[1] - * [1] - * [1] - * [1] - * [1] - * [1]] - */ - RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); - *fos << "Solution :" << std::endl; - X->describe(*fos,Teuchos::VERB_EXTREME); - *fos << std::endl; - } + // Create solver interface to Superlu with Amesos2 factory method + RCP > solver = Amesos2::create("Superlu", A, X, B); + + solver->symbolicFactorization().numericFactorization().solve(); + + + /* Print the solution + * + * Should be: + * + * [[1] + * [2] + * [3] + * [4] + * [5] + * [6]] + */ + RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); + + *fos << "Solution :" << std::endl; + X->describe(*fos,Teuchos::VERB_EXTREME); + *fos << std::endl; + // We are done. return 0; } diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index 724b551d95a5..b0191f7c7921 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -264,7 +264,7 @@ namespace Amesos2 { } return( 0 ); - } +} template @@ -470,7 +470,7 @@ CssMKL::loadA_impl(EPhase current_phase) #if 1 // Only reinex GIDs css_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve - Teuchos::RCP > contig_mat = this->matrixA_->reindex(css_contig_rowmap_, css_contig_colmap_, current_phase); + Teuchos::RCP > contig_mat = this->matrixA_->reindex(css_contig_rowmap_, css_contig_colmap_); #else // Redistribued matrixA into contiguous GIDs Teuchos::RCP > contig_mat = this->matrixA_->get(ptrInArg(*css_rowmap_)); diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp index 59aa203dddc0..c75d3c42c035 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -27,11 +27,6 @@ #ifdef HAVE_AMESOS2_EPETRAEXT #include #endif -#include -#include -#ifdef HAVE_MPI -# include -#endif #include "Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp" #include "Amesos2_MatrixAdapter_decl.hpp" @@ -63,196 +58,22 @@ namespace Amesos2 { typedef Epetra_CrsMatrix matrix_t; private: typedef AbstractConcreteMatrixAdapter super_t; + Epetra_CrsMatrix> super_t; public: // 'import' superclass types - typedef super_t::scalar_t scalar_t; - typedef super_t::local_ordinal_t local_ordinal_t; - typedef super_t::global_ordinal_t global_ordinal_t; - typedef super_t::node_t node_t; - typedef super_t::global_size_t global_size_t; - - typedef Tpetra::Map map_t; - typedef ConcreteMatrixAdapter type; + typedef super_t::scalar_t scalar_t; + typedef super_t::local_ordinal_t local_ordinal_t; + typedef super_t::global_ordinal_t global_ordinal_t; + typedef super_t::node_t node_t; + typedef super_t::global_size_t global_size_t; + + typedef ConcreteMatrixAdapter type; ConcreteMatrixAdapter(RCP m); - // get & reindex - RCP > get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; - RCP > reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase current_phase) const; - - // gather - template - local_ordinal_t gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { - local_ordinal_t ret = -1; - { - auto rowMap = this->getRowMap(); - auto colMap = this->getColMap(); -#ifdef HAVE_MPI - auto comm = rowMap->getComm(); - auto nRanks = comm->getSize(); - auto myRank = comm->getRank(); -#else - RCP> comm = Teuchos::createSerialComm(); - int nRanks = 1; - int myRank = 0; -#endif // HAVE_MPI - - int *lclRowptr = nullptr; - int *lclColind = nullptr; - double *lclNzvals = nullptr; - this->mat_->ExtractCrsDataPointers(lclRowptr, lclColind, lclNzvals); - - int nRows = this->mat_->NumGlobalRows(); - int myNRows = this->mat_->NumMyRows(); - int myNnz = this->mat_->NumMyNonzeros(); - if(current_phase == PREORDERING || current_phase == SYMBFACT) { - // workspace for column major - KV_GS pointers_t; - KV_GO indices_t; - - // gether rowptr - Kokkos::resize(recvCounts, nRanks); - Kokkos::resize(recvDispls, nRanks+1); - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(rowptr)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - Teuchos::gather (&myNRows, 1, recvCounts.data(), 1, 0, *comm); - if (myRank == 0) { - Kokkos::resize(recvDispls, nRanks+1); - recvDispls(0) = 0; - for (int p = 1; p <= nRanks; p++) { - recvDispls(p) = recvDispls(p-1) + recvCounts(p-1); - } - if (recvDispls(nRanks) != nRows) { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_TpetraCrsMatrix_MatrixAdapter::gather_impl : mismatch between gathered(local nrows) and global nrows."); - } - } else { - for (int p = 0; p < nRanks; p++) { - recvCounts(p) = 0; - recvDispls(p) = 0; - } - recvDispls(nRanks) = 0; - } - if (myRank == 0 && column_major) { - Kokkos::resize(pointers_t, nRows+1); - } else if (myRank != 0) { - Kokkos::resize(pointers_t, 2); - } - int *pointers_ = (myRank != 0 || column_major ? pointers_t.data() : pointers.data()); - Teuchos::gatherv (&lclRowptr[1], myNRows, &pointers_[1], - recvCounts.data(), recvDispls.data(), - 0, *comm); - if (myRank == 0) { - // shift to global pointers - pointers_[0] = 0; - recvCounts(0) = pointers_[recvDispls(1)]; - local_ordinal_t displs = recvCounts(0); - for (int p = 1; p < nRanks; p++) { - // save recvCounts from pth MPI - recvCounts(p) = pointers_[recvDispls(p+1)]; - // shift pointers for pth MPI to global - for (int i = 1+recvDispls(p); i <= recvDispls(p+1); i++) { - pointers_[i] += displs; - } - displs += recvCounts(p); - } - ret = pointers_[nRows]; - } - } - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(colind)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - // gather colinds & nzvals - if (myRank == 0) { - recvDispls(0) = 0; - for (int p = 0; p < nRanks; p++) { - recvDispls(p+1) = recvDispls(p) + recvCounts(p); - } - } - // -- convert to global colids - KV_GO lclColind_ ("localColind_", myNnz); - for (int i = 0; i < int(myNnz); i++) lclColind_(i) = colMap->getGlobalElement((lclColind[i])); - if (column_major) { - Kokkos::resize(indices_t, indices.extent(0)); - Teuchos::gatherv (lclColind_.data(), myNnz, indices_t.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } else { - Teuchos::gatherv (lclColind_.data(), myNnz, indices.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } - } - if (myRank == 0 && column_major) { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose index)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - // Map to transpose - Kokkos::resize(transpose_map, ret); - // Transopose to convert to CSC - for (int i=0; i<=nRows; i++) { - pointers(i) = 0; - } - for (int k=0; k gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(nzvals)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - if (column_major) { - Kokkos::resize(nzvals_t, nzvals.extent(0)); - Teuchos::gatherv (lclNzvals, myNnz, nzvals_t.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } else { - Teuchos::gatherv (lclNzvals, myNnz, nzvals.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } - } - if (myRank == 0 && column_major) { - // Insert Numerical values to transopose matrix - ret = pointers(nRows); - for (int k=0; k(*comm, 0, 1, &ret); - } - return ret; - } + RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + RCP > reindex_impl(Teuchos::RCP> &contigRowMap, + Teuchos::RCP> &contigColMap) const; //! Print a description of this adapter to the given output stream void @@ -268,4 +89,4 @@ namespace Amesos2 { } // end namespace Amesos2 -#endif // AMESOS2_EPETRACRSMATRIX_MATRIXADAPTER_DECL_HPP +#endif // AMESOS2_EPETRACRSMATRIX_MATRIXADAPTER_DECL_HPP diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp index f49201c0824d..d256ab9864d5 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp @@ -26,7 +26,7 @@ namespace Amesos2 { {} Teuchos::RCP > - ConcreteMatrixAdapter::get_impl(const Teuchos::Ptr map, EDistribution distribution) const + ConcreteMatrixAdapter::get_impl(const Teuchos::Ptr > map, EDistribution distribution) const { using Teuchos::as; using Teuchos::rcp; @@ -78,9 +78,8 @@ namespace Amesos2 { } Teuchos::RCP > - ConcreteMatrixAdapter::reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase /*current_phase*/) const + ConcreteMatrixAdapter::reindex_impl(Teuchos::RCP > &contigRowMap, + Teuchos::RCP > &contigColMap) const { #if defined(HAVE_AMESOS2_EPETRAEXT) using Teuchos::RCP; @@ -109,7 +108,6 @@ namespace Amesos2 { #endif } - void ConcreteMatrixAdapter::describe (Teuchos::FancyOStream& os, const Teuchos::EVerbosityLevel verbLevel) const diff --git a/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp index df78798afa75..d97d060cc53f 100644 --- a/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp @@ -57,14 +57,13 @@ namespace Amesos2 { public: // Give our base class access to our private implementation functions friend class MatrixAdapter< DerivedMat >; - + typedef MatrixTraits::scalar_t scalar_t; typedef MatrixTraits::local_ordinal_t local_ordinal_t; typedef MatrixTraits::global_ordinal_t global_ordinal_t; typedef MatrixTraits::node_t node_t; typedef DerivedMat matrix_t; - typedef Tpetra::Map map_t; private: typedef MatrixAdapter< DerivedMat > super_t; @@ -81,18 +80,18 @@ namespace Amesos2 { AbstractConcreteMatrixAdapter(RCP m); - public: // these functions should technically be private + public: // these functions should technically be private // implementation functions void getGlobalRowCopy_impl(global_ordinal_t row, - const Teuchos::ArrayView& indices, - const Teuchos::ArrayView& vals, - size_t& nnz) const; + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& vals, + size_t& nnz) const; void getGlobalColCopy_impl(global_ordinal_t col, - const Teuchos::ArrayView& indices, - const Teuchos::ArrayView& vals, - size_t& nnz) const; + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& vals, + size_t& nnz) const; template void getGlobalRowCopy_kokkos_view_impl(global_ordinal_t row, @@ -122,13 +121,19 @@ namespace Amesos2 { // Brunt of the work is put on the implementation for converting // their maps to a Tpetra::Map - const RCP + const RCP > getMap_impl() const; - const RCP + const RCP > getRowMap_impl() const; - const RCP + const RCP > getColMap_impl() const; const RCP > getComm_impl() const; @@ -140,15 +145,7 @@ namespace Amesos2 { // Because instantiation of the subclasses could be wildly // different (cf subclasses of Epetra_RowMatrix), this method // hands off implementation to the adapter for the subclass - RCP get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; - RCP reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase current_phase) const; - template - local_ordinal_t gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const; + RCP get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; using spmtx_ptr_t = typename MatrixTraits::sparse_ptr_type; using spmtx_idx_t = typename MatrixTraits::sparse_idx_type; @@ -185,4 +182,4 @@ namespace Amesos2 { } // end namespace Amesos2 -#endif // AMESOS2_EPETRAROWMATRIX_ABSTRACTMATRIXADAPTER_DECL_HPP +#endif // AMESOS2_EPETRAROWMATRIX_ABSTRACTMATRIXADAPTER_DECL_HPP diff --git a/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_def.hpp index 10d7c3ff0dd6..34a35f2eb4ed 100644 --- a/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_def.hpp @@ -335,39 +335,6 @@ namespace Amesos2 { #endif } - template - RCP > - AbstractConcreteMatrixAdapter::reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase current_phase) const - { - // Delegate implementation to subclass -#ifdef __CUDACC__ - // NVCC doesn't seem to like the static_cast, even though it is valid - return dynamic_cast*>(this)->reindex_impl(contigRowMap, contigColMap, current_phase); -#else - return static_cast*>(this)->reindex_impl(contigRowMap, contigColMap, current_phase); -#endif - } - - template - template - typename AbstractConcreteMatrixAdapter::local_ordinal_t - AbstractConcreteMatrixAdapter::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { -#ifdef __CUDACC__ - // NVCC doesn't seem to like the static_cast, even though it is valid - return dynamic_cast*>(this)->gather_impl(nzvals, indices, pointers, recvCounts, recvDispls, transpose_map, nzvals_t, - column_major, current_phase); -#else - return static_cast*>(this)->gather_impl(nzvals, indices, pointers, recvCounts, recvDispls, transpose_map, nzvals_t, - column_major, current_phase); -#endif - } - template typename AbstractConcreteMatrixAdapter ::spmtx_ptr_t diff --git a/packages/amesos2/src/Amesos2_KLU2_def.hpp b/packages/amesos2/src/Amesos2_KLU2_def.hpp index 1244a262230f..b786c324b393 100644 --- a/packages/amesos2/src/Amesos2_KLU2_def.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_def.hpp @@ -432,9 +432,6 @@ bool KLU2::loadA_impl(EPhase current_phase) { using Teuchos::as; -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_); -#endif if(current_phase == SOLVE)return(false); @@ -443,53 +440,40 @@ KLU2::loadA_impl(EPhase current_phase) } else { - // Only the root image needs storage allocated - if( this->root_ ) { - if (host_nzvals_view_.extent(0) != this->globalNumNonZeros_) - Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_); - if (host_rows_view_.extent(0) != this->globalNumNonZeros_) - Kokkos::resize(host_rows_view_, this->globalNumNonZeros_); - if (host_col_ptr_view_.extent(0) != (this->globalNumRows_ + 1)) - Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1); - } - local_ordinal_type nnz_ret = 0; - bool gather_supported = (this->matrixA_->getComm()->getSize() > 1 && (std::is_same::value || std::is_same::value)); - { #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); + Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_); #endif - if (gather_supported) { - bool column_major = true; - if (!is_contiguous_) { - auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase); - nnz_ret = contig_mat->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->recvCounts, this->recvDispls, this->transpose_map, this->nzvals_t, - column_major, current_phase); - } else { - nnz_ret = this->matrixA_->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->recvCounts, this->recvDispls, this->transpose_map, this->nzvals_t, - column_major, current_phase); - } - // gather failed (e.g., not implemened for KokkosCrsMatrix) - // in case of the failure, it falls back to the original "do_get" - if (nnz_ret < 0) gather_supported = false; - } - if (!gather_supported) { - Util::get_ccs_helper_kokkos_view< - MatrixAdapter,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array> - ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret, - (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, - ARBITRARY, - this->rowIndexBase_); - } - } - // gather return the total nnz_ret on every MPI process - if (gather_supported || this->root_) { - TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as(this->globalNumNonZeros_), - std::runtime_error, - "Amesos2_KLU2 loadA_impl: Did not get the expected number of non-zero vals(" - +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")"); - } + // Only the root image needs storage allocated + if( this->root_ ) { + host_nzvals_view_ = host_value_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_nzvals_view_"), this->globalNumNonZeros_); + host_rows_view_ = host_ordinal_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_rows_view_"), this->globalNumNonZeros_); + host_col_ptr_view_ = host_ordinal_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_col_ptr_view_"), this->globalNumRows_ + 1); + } + + local_ordinal_type nnz_ret = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); +#endif + + Util::get_ccs_helper_kokkos_view< + MatrixAdapter,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array> + ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret, + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + ARBITRARY, + this->rowIndexBase_); + } + + if( this->root_ ) { + TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as(this->globalNumNonZeros_), + std::runtime_error, + "Did not get the expected number of non-zero vals"); + } } //end else single_process_optim_check = false diff --git a/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_decl.hpp index 648dc640405d..80087aa7e9ea 100644 --- a/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_decl.hpp @@ -51,7 +51,6 @@ namespace Amesos2 { typedef typename MatrixTraits::global_ordinal_t global_ordinal_t; typedef typename MatrixTraits::node_t node_t; typedef typename MatrixTraits::global_size_t global_size_t; - typedef Tpetra::Map map_t; typedef no_special_impl get_crs_spec; typedef no_special_impl get_ccs_spec; typedef ConcreteMatrixAdapter type; @@ -59,10 +58,7 @@ namespace Amesos2 { ConcreteMatrixAdapter(Teuchos::RCP m); - Teuchos::RCP > get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; - Teuchos::RCP > reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase current_phase) const; + Teuchos::RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; const Teuchos::RCP > getComm_impl() const; global_size_t getGlobalNumRows_impl() const; @@ -92,13 +88,19 @@ namespace Amesos2 { global_size_t getRowIndexBase() const; global_size_t getColumnIndexBase() const; - const Teuchos::RCP + const Teuchos::RCP > getMap_impl() const; - const Teuchos::RCP + const Teuchos::RCP > getRowMap_impl() const; - const Teuchos::RCP + const Teuchos::RCP > getColMap_impl() const; // implementation functions @@ -119,11 +121,6 @@ namespace Amesos2 { KV_S & vals, size_t& nnz) const; - template - LocalOrdinal gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const; }; } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_def.hpp index 17cc125eb5b1..7182d091ae54 100644 --- a/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_KokkosCrsMatrix_MatrixAdapter_def.hpp @@ -47,18 +47,18 @@ namespace Amesos2 { } template - const Teuchos::RCP>::local_ordinal_t, - typename MatrixTraits>::global_ordinal_t, - typename MatrixTraits>::node_t> > + const RCP>::local_ordinal_t, + typename MatrixTraits>::global_ordinal_t, + typename MatrixTraits>::node_t> > ConcreteMatrixAdapter>::getRowMap_impl() const { return Teuchos::null; // not going to use this right now - serial } template - const Teuchos::RCP>::local_ordinal_t, - typename MatrixTraits>::global_ordinal_t, - typename MatrixTraits>::node_t> > + const RCP>::local_ordinal_t, + typename MatrixTraits>::global_ordinal_t, + typename MatrixTraits>::node_t> > ConcreteMatrixAdapter>::getColMap_impl() const { return Teuchos::null; // not going to use this right now - serial @@ -104,9 +104,9 @@ namespace Amesos2 { template const Teuchos::RCP>::local_ordinal_t, - typename MatrixTraits>::global_ordinal_t, - typename MatrixTraits>::node_t> > - ConcreteMatrixAdapter< + typename MatrixTraits>::global_ordinal_t, + typename MatrixTraits>::node_t> > + ConcreteMatrixAdapter< KokkosSparse::CrsMatrix>::getMap_impl() const { return( Teuchos::null ); @@ -176,29 +176,6 @@ namespace Amesos2 { "Please contact the Amesos2 developers." ); } - template - Teuchos::RCP>> - ConcreteMatrixAdapter< - KokkosSparse::CrsMatrix - >::reindex_impl(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap, const EPhase current_phase) const - { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "KokkosCrsMatrixAdapter has not implemented reindex_impl."); - return RCP (this); - } - - template - template - LocalOrdinal - ConcreteMatrixAdapter< - KokkosSparse::CrsMatrix - >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { - //TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "KokkosCrsMatrixAdapter has not been implemented gather_impl."); - return -1; - } } // end namespace Amesos2 #endif // AMESOS2_KOKKOS_CRSMATRIX_MATRIXADAPTER_DEF_HPP diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp index a2fe7608da69..812e6ac7321e 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp @@ -219,14 +219,7 @@ namespace Amesos2 { /// Reindex the GIDs such that they are contiguous without gaps (0, .., n-1) /// This is called in loadA for the matrix with (DISTRIBUTED_NO_OVERLAP && !is_contiguous_) - Teuchos::RCP reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap, const EPhase current_phase) const; - - /// Gather matrix to MPI-0 - template - local_ordinal_t gather(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const; + Teuchos::RCP reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; /// Returns a short description of this Solver std::string description() const; diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index 987da75cfec6..14fa5dd3305a 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -516,22 +516,11 @@ namespace Amesos2 { template < class Matrix > Teuchos::RCP > - MatrixAdapter::reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap, const EPhase current_phase) const + MatrixAdapter::reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const { - return static_cast(this)->reindex_impl(contigRowMap, contigColMap, current_phase); + return static_cast(this)->reindex_impl(contigRowMap, contigColMap); } - template < class Matrix > - template - typename MatrixAdapter::local_ordinal_t - MatrixAdapter::gather(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { - return static_cast(this)->gather_impl(nzvals, indices, pointers, recvCounts, recvDispls, transpose_map, nzvals_t, - column_major, current_phase); - } template Teuchos::RCP > diff --git a/packages/amesos2/src/Amesos2_ShyLUBasker_decl.hpp b/packages/amesos2/src/Amesos2_ShyLUBasker_decl.hpp index c2f33b243e58..f0bd89f7fef2 100644 --- a/packages/amesos2/src/Amesos2_ShyLUBasker_decl.hpp +++ b/packages/amesos2/src/Amesos2_ShyLUBasker_decl.hpp @@ -182,7 +182,8 @@ class ShyLUBasker : public SolverCore mutable host_solve_array_t bValues_; int ldb_; - /*Handle for ShyLUBasker object*/ + /*Handle for ShyLUBasker object*/ + #if defined( HAVE_AMESOS2_KOKKOS ) && defined( KOKKOS_ENABLE_OPENMP ) /* typedef typename node_type::device_type kokkos_device; diff --git a/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp b/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp index 9b2a64cee1b9..6a7195f6b412 100644 --- a/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp +++ b/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp @@ -306,35 +306,31 @@ ShyLUBasker::solve_impl( const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_; const bool initialize_data = true; const bool do_not_initialize_data = false; - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_); - Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); -#endif - if ( single_proc_optimization() && nrhs == 1 ) { - // no msp creation - Util::get_1d_copy_helper_kokkos_view, - host_solve_array_t>::do_get(initialize_data, B, bValues_, as(ld_rhs)); + if ( single_proc_optimization() && nrhs == 1 ) { - Util::get_1d_copy_helper_kokkos_view, - host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as(ld_rhs)); + // no msp creation + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(initialize_data, B, bValues_, as(ld_rhs)); - } // end if ( single_proc_optimization() && nrhs == 1 ) - else { - Util::get_1d_copy_helper_kokkos_view, - host_solve_array_t>::do_get(initialize_data, B, bValues_, - as(ld_rhs), - (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, - this->rowIndexBase_); + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as(ld_rhs)); - // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here. - Util::get_1d_copy_helper_kokkos_view, - host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, - as(ld_rhs), - (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, - this->rowIndexBase_); - } + } // end if ( single_proc_optimization() && nrhs == 1 ) + else { + + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(initialize_data, B, bValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + this->rowIndexBase_); + + // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here. + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + this->rowIndexBase_); } if ( this->root_ ) { // do solve @@ -345,6 +341,7 @@ ShyLUBasker::solve_impl( else ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true); } + /* All processes should have the same error code */ Teuchos::broadcast(*(this->getComm()), 0, &ierr); @@ -354,16 +351,12 @@ ShyLUBasker::solve_impl( TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1, std::runtime_error, "Could not alloc needed working memory for solve" ); - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_); - Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); -#endif - Util::put_1d_data_helper_kokkos_view< - MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, - as(ld_rhs), - (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED); - } + + Util::put_1d_data_helper_kokkos_view< + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED); + return(ierr); } @@ -588,42 +581,25 @@ ShyLUBasker::loadA_impl(EPhase current_phase) } local_ordinal_type nnz_ret = 0; - bool gather_supported = (this->matrixA_->getComm()->getSize() > 1 && (std::is_same::value || std::is_same::value)); { #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); #endif - if (gather_supported) { - bool column_major = true; - if (!is_contiguous_) { - auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase); - nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->recvCounts, this->recvDispls, this->transpose_map, this->nzvals_t, - column_major, current_phase); - } else { - nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->recvCounts, this->recvDispls, this->transpose_map, this->nzvals_t, - column_major, current_phase); - } - // gather failed (e.g., not implemened for KokkosCrsMatrix) - // in case of the failure, it falls back to the original "do_get" - if (nnz_ret < 0) gather_supported = false; - } - if (!gather_supported) { - Util::get_ccs_helper_kokkos_view< - MatrixAdapter, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array> - ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret, - (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, - ARBITRARY, - this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members - } + + Util::get_ccs_helper_kokkos_view< + MatrixAdapter, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array> + ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret, + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + ARBITRARY, + this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members } - // gather return the total nnz_ret on every MPI process - if (gather_supported || this->root_) { + if( this->root_ ){ TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as(this->globalNumNonZeros_), std::runtime_error, - "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals(" - +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")"); + "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals"); } + } //end alternative path return true; } diff --git a/packages/amesos2/src/Amesos2_SolverCore_decl.hpp b/packages/amesos2/src/Amesos2_SolverCore_decl.hpp index edc166f691b7..567d6df16537 100644 --- a/packages/amesos2/src/Amesos2_SolverCore_decl.hpp +++ b/packages/amesos2/src/Amesos2_SolverCore_decl.hpp @@ -474,21 +474,6 @@ namespace Amesos2 { /// Number of process images in the matrix communicator int nprocs_; - /// Contiguous GID map for reindex - typedef Tpetra::Map map_type; - Teuchos::RCP contig_rowmap_; - Teuchos::RCP contig_colmap_; - - /// Communication pattern for gather - typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; - typedef Kokkos::View host_ordinal_type_array; - typedef Kokkos::View< scalar_type*, HostExecSpaceType> host_scalar_type_array; - host_ordinal_type_array recvCounts; - host_ordinal_type_array recvDispls; - host_ordinal_type_array transpose_map; - host_scalar_type_array nzvals_t; }; // End class Amesos2::SolverCore diff --git a/packages/amesos2/src/Amesos2_Tacho_def.hpp b/packages/amesos2/src/Amesos2_Tacho_def.hpp index 5503dac163dd..e4f1bd98566b 100644 --- a/packages/amesos2/src/Amesos2_Tacho_def.hpp +++ b/packages/amesos2/src/Amesos2_Tacho_def.hpp @@ -154,10 +154,10 @@ TachoSolver::solve_impl(const Teuchos::Ptrroot_ ) { // Do solve! - // Bump up the workspace size if needed -#ifdef HAVE_AMESOS2_TIMERS +#ifdef HAVE_AMESOS2_TIMER Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); #endif + // Bump up the workspace size if needed if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) { workspace_ = device_solve_array_t( Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs); diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp index 06b7139925e1..9718c89e1025 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -67,7 +67,7 @@ namespace Amesos2 { typedef Tpetra::CrsMatrix matrix_t; + Node> matrix_t; private: typedef AbstractConcreteMatrixAdapter< Tpetra::RowMatrix, matrix_t> super_t; public: // 'import' superclass types - typedef typename super_t::scalar_t scalar_t; - typedef typename super_t::local_ordinal_t local_ordinal_t; - typedef typename super_t::global_ordinal_t global_ordinal_t; - typedef typename super_t::node_t node_t; - typedef typename super_t::global_size_t global_size_t; + typedef typename super_t::scalar_t scalar_t; + typedef typename super_t::local_ordinal_t local_ordinal_t; + typedef typename super_t::global_ordinal_t global_ordinal_t; + typedef typename super_t::node_t node_t; + typedef typename super_t::global_size_t global_size_t; - typedef Tpetra::Map map_t; - typedef ConcreteMatrixAdapter type; - - typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; + typedef ConcreteMatrixAdapter type; + typedef Tpetra::Map map_t; ConcreteMatrixAdapter(RCP m); RCP > get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; - RCP > reindex_impl(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap, const EPhase current_phase) const; - - template - LocalOrdinal gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const; + RCP > reindex_impl(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; //! Print a description of this adapter to the given output stream void diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 07dec268c48b..9bf58d9d82df 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -94,8 +94,7 @@ namespace Amesos2 { ConcreteMatrixAdapter< Tpetra::CrsMatrix >::reindex_impl(Teuchos::RCP &contigRowMap, - Teuchos::RCP &contigColMap, - const EPhase current_phase) const + Teuchos::RCP &contigColMap) const { typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; auto rowMap = this->mat_->getRowMap(); @@ -108,18 +107,20 @@ namespace Amesos2 { Teuchos::TimeMonitor ReindexTimer(*reindexTimer); #endif - GlobalOrdinal indexBase = rowMap->getIndexBase(); - GlobalOrdinal numDoFs = this->mat_->getGlobalNumRows(); - LocalOrdinal nRows = this->mat_->getLocalNumRows(); - LocalOrdinal nCols = colMap->getLocalNumElements(); + global_ordinal_t indexBase = rowMap->getIndexBase(); + global_ordinal_t numDoFs = this->mat_->getGlobalNumRows(); + local_ordinal_t nRows = this->mat_->getLocalNumRows(); + local_ordinal_t nCols = colMap->getLocalNumElements(); RCP contiguous_t_mat; - // check when to recompute contigRowMap & contigColMap - if(current_phase == PREORDERING || current_phase == SYMBFACT) { + // if-checks when to recompute contigRowMap & contigColMap + // TODO: this is currentlly based on the global matrix dimesions + if (contigRowMap->getGlobalNumElements() != numDoFs || contigColMap->getGlobalNumElements() != numDoFs) { auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); global_ordinal_t frow = tmpMap->getMinGlobalIndex(); // Create new GID list for RowMap + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; Kokkos::View rowIndexList ("indexList", nRows); for (local_ordinal_t k = 0; k < nRows; k++) { rowIndexList(k) = frow+k; @@ -155,263 +156,9 @@ namespace Amesos2 { auto exporter = this->mat_->getCrsGraph()->getExporter(); contiguous_t_mat = rcp( new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter)); } - - // Return new matrix adapter return rcp (new ConcreteMatrixAdapter (contiguous_t_mat)); } - - template - template - LocalOrdinal - ConcreteMatrixAdapter< - Tpetra::CrsMatrix - >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { - LocalOrdinal ret = 0; - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - auto rowMap = this->mat_->getRowMap(); - auto colMap = this->mat_->getColMap(); - auto comm = rowMap->getComm(); - auto nRanks = comm->getSize(); - auto myRank = comm->getRank(); - - global_ordinal_t nRows = this->mat_->getGlobalNumRows(); - auto lclMatrix = this->mat_->getLocalMatrixDevice(); - - // check when to recompute communication patterns - if(current_phase == PREORDERING || current_phase == SYMBFACT) { - // grab rowptr and colind on host - auto lclRowptr_d = lclMatrix.graph.row_map; - auto lclColind_d = lclMatrix.graph.entries; - auto lclRowptr = Kokkos::create_mirror_view(lclRowptr_d); - auto lclColind = Kokkos::create_mirror_view(lclColind_d); - Kokkos::deep_copy(lclRowptr, lclRowptr_d); - Kokkos::deep_copy(lclColind, lclColind_d); - - // map from global to local - host_ordinal_type_array perm_g2l; - // workspace to transpose - KV_GS pointers_t; - KV_GO indices_t; - // communication counts & displacements - LocalOrdinal myNRows = this->mat_->getLocalNumRows(); - Kokkos::resize(recvCounts, nRanks); - Kokkos::resize(recvDispls, nRanks+1); - bool need_to_perm = false; - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(rowptr)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - Teuchos::gather (&myNRows, 1, recvCounts.data(), 1, 0, *comm); - if (myRank == 0) { - Kokkos::resize(recvDispls, nRanks+1); - recvDispls(0) = 0; - for (int p = 1; p <= nRanks; p++) { - recvDispls(p) = recvDispls(p-1) + recvCounts(p-1); - } - if (recvDispls(nRanks) != nRows) { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_TpetraCrsMatrix_MatrixAdapter::gather_impl : mismatch between gathered(local nrows) and global nrows."); - } - } else { - for (int p = 0; p < nRanks; p++) { - recvCounts(p) = 0; - recvDispls(p) = 0; - } - recvDispls(nRanks) = 0; - } - // gether g2l perm - { - host_ordinal_type_array lclMap; - Kokkos::resize(lclMap, myNRows); - if (myRank == 0) { - Kokkos::resize(perm_g2l, nRows); - } else { - Kokkos::resize(perm_g2l, 1); - } - for (int i=0; i < myNRows; i++) { - lclMap(i) = rowMap->getGlobalElement(i); - } - Teuchos::gatherv (lclMap.data(), myNRows, perm_g2l.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - if (myRank == 0) { - for (int i=0; i < nRows; i++) { - if (i != perm_g2l(i)) need_to_perm = true; - } - } - } - // gether rowptr - // - making sure same ordinal type - KV_GS lclRowptr_ ("localRowptr_", lclRowptr.extent(0)); - for (int i = 0; i < int(lclRowptr.extent(0)); i++) lclRowptr_(i) = lclRowptr(i); - if (myRank == 0 && (column_major || need_to_perm)) { - Kokkos::resize(pointers_t, nRows+1); - } else if (myRank != 0) { - Kokkos::resize(pointers_t, 2); - } - LocalOrdinal *pointers_ = ((myRank != 0 || (column_major || need_to_perm)) ? pointers_t.data() : pointers.data()); - Teuchos::gatherv (&lclRowptr_(1), myNRows, &pointers_[1], - recvCounts.data(), recvDispls.data(), - 0, *comm); - - if (myRank == 0) { - // shift to global pointers - pointers_[0] = 0; - recvCounts(0) = pointers_[recvDispls(1)]; - LocalOrdinal displs = recvCounts(0); - for (int p = 1; p < nRanks; p++) { - // skip "Empty" submatrix (no rows) - // recvCounts(p) is zero, while pointers_[recvDispls(p+1)] now contains nnz from p-1 - if (recvDispls(p+1) > recvDispls(p)) { - // save recvCounts from pth MPI - recvCounts(p) = pointers_[recvDispls(p+1)]; - // shift pointers for pth MPI to global - for (int i = 1+recvDispls(p); i <= recvDispls(p+1); i++) { - pointers_[i] += displs; - } - displs += recvCounts(p); - } - } - ret = pointers_[nRows]; - } - } - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(colind)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - // gather colinds - if (myRank == 0) { - recvDispls(0) = 0; - for (int p = 0; p < nRanks; p++) { - recvDispls(p+1) = recvDispls(p) + recvCounts(p); - } - } - // -- convert to global colids - KV_GO lclColind_ ("localColind_", lclColind.extent(0)); - for (int i = 0; i < int(lclColind.extent(0)); i++) lclColind_(i) = colMap->getGlobalElement((lclColind(i))); - if (column_major || need_to_perm) { - Kokkos::resize(indices_t, indices.extent(0)); - Teuchos::gatherv (lclColind_.data(), lclColind_.extent(0), indices_t.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } else { - Teuchos::gatherv (lclColind_.data(), lclColind_.extent(0), indices.data(), - recvCounts.data(), recvDispls.data(), - 0, *comm); - } - } - if (myRank == 0) { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose index)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - if (column_major) { - // Map to transpose - Kokkos::resize(transpose_map, ret); - // Transopose to convert to CSC - for (int i=0; i<=nRows; i++) { - pointers(i) = 0; - } - for (int k=0; k gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(nzvals)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - // grab numerical values on host - auto lclNzvals_d = lclMatrix.values; - auto lclNzvals = Kokkos::create_mirror_view(lclNzvals_d);; - Kokkos::deep_copy(lclNzvals, lclNzvals_d); - - // gather nzvals - if (transpose_map.extent(0) > 0) { - Kokkos::resize(nzvals_t, nzvals.extent(0)); - Teuchos::gatherv (reinterpret_cast (lclNzvals.data()), lclNzvals.extent(0), - reinterpret_cast (nzvals_t.data()), recvCounts.data(), recvDispls.data(), - 0, *comm); - } else { - Teuchos::gatherv (reinterpret_cast (lclNzvals.data()), lclNzvals.extent(0), - reinterpret_cast (nzvals.data()), recvCounts.data(), recvDispls.data(), - 0, *comm); - } - } - if (myRank == 0) { - // Insert Numerical values to transopose matrix - ret = pointers(nRows); -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose values)"); - Teuchos::TimeMonitor GatherTimer(*gatherTime); -#endif - if (transpose_map.extent(0) > 0) { - for (int k=0; k(*comm, 0, 1, &ret); - } - return ret; - } - - template get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - RCP reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap, const EPhase current_phase) const; - - template - local_ordinal_t gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const; + RCP reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const; template void getSparseRowPtr_kokkos_view(KV & view) const { diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp index a6d477940689..9b8a0ec16d68 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp @@ -334,35 +334,16 @@ namespace Amesos2 { RCP > AbstractConcreteMatrixAdapter< Tpetra::RowMatrix, DerivedMat - >::reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap, const EPhase current_phase) const + >::reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const { #ifdef __CUDACC__ // NVCC doesn't seem to like the static_cast, even though it is valid - return dynamic_cast*>(this)->reindex_impl(contigRowMap, contigColMap, current_phase); + return dynamic_cast*>(this)->reindex_impl(contigRowMap, contigColMap); #else - return static_cast*>(this)->reindex_impl(contigRowMap, contigColMap, current_phase); + return static_cast*>(this)->reindex_impl(contigRowMap, contigColMap); #endif } - template - template - LocalOrdinal - AbstractConcreteMatrixAdapter< - Tpetra::RowMatrix, DerivedMat - >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers, - host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls, - host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t, - bool column_major, EPhase current_phase) const - { -#ifdef __CUDACC__ - // NVCC doesn't seem to like the static_cast, even though it is valid - return dynamic_cast*>(this)->gather_impl(nzvals, indices, pointers, recvCounts, recvDispls, transpose_map, nzvals_t, - column_major, current_phase); -#else - return static_cast*>(this)->gather_impl(nzvals, indices, pointers, recvCounts, recvDispls, transpose_map, nzvals_t, - column_major, current_phase); -#endif - } } // end namespace Amesos2 #endif // AMESOS2_TPETRAROWMATRIX_ABSTRACTMATRIXADAPTER_DEF_HPP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_inc.hpp index 3f13e1e33d01..59c1163b212e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_inc.hpp @@ -88,7 +88,7 @@ namespace BaskerNS init_value(thread_start, num_threads+1, (Int) BASKER_MAX_IDX); int nt = nfactor_domain_error(thread_start); - if(nt == BASKER_SUCCESS) + if((nt == BASKER_SUCCESS)) { FREE_INT_1DARRAY(thread_start); break; @@ -188,7 +188,7 @@ namespace BaskerNS (Int) BASKER_MAX_IDX); int nt = nfactor_sep_error(thread_start); //printf("AFTER SEP ERROR %d \n", nt); - if(nt == BASKER_SUCCESS) + if((nt == BASKER_SUCCESS)) { FREE_INT_1DARRAY(thread_start); break; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp index c4d352d349f4..e9cddba1b3bb 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp @@ -1161,7 +1161,7 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2) } return info_scotch; } else if(Options.verbose == BASKER_TRUE) { - printf( "\n part_scotch done (num_threads = %d,%lu)\n",num_threads,part_tree.leaf_nnz.extent(0) ); + printf( "\n part_scotch done (num_threads = %d,%d)\n",num_threads,part_tree.leaf_nnz.extent(0) ); //for (Int i = 0; i < num_threads; i++) printf( " nnz_leaf[%d] = %d\n",i,part_tree.leaf_nnz[i] ); printf( "\n" ); } nd_flag = BASKER_TRUE; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order_scotch.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order_scotch.hpp index 4fe7179cffd1..e30606385847 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order_scotch.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order_scotch.hpp @@ -1344,7 +1344,7 @@ namespace BaskerNS if(ws(m_tree_p) == 0) { //Not assigned yet - if(_tree(s_tree_p) == otree(m_tree_p)) + if((_tree(s_tree_p) == otree(m_tree_p))) { #ifdef BASKER_DEBUG_ORDER_SCOTCH printf("same case\n"); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index af889c5b77b1..7a91f6c8a577 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -719,7 +719,7 @@ int Basker::sfactor() #endif BASKER_MATRIX *MV = &M; - if(Options.symmetric == BASKER_TRUE) + if((Options.symmetric == BASKER_TRUE)) { #ifdef BASKER_DEBUG_SFACTOR printf("symmetric\n"); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index 0ea6467be927..784df704eb59 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -1352,7 +1352,7 @@ namespace BaskerNS double gperm_time = 0.0; Kokkos::Timer timer_gperm; #endif - if(Options.same_pattern == BASKER_TRUE) + if((Options.same_pattern == BASKER_TRUE)) { if(same_pattern_flag == BASKER_FALSE) { diff --git a/packages/teuchos/comm/src/Teuchos_CommHelpers.cpp b/packages/teuchos/comm/src/Teuchos_CommHelpers.cpp index bf4d8cf8d701..6172eb3d72b8 100644 --- a/packages/teuchos/comm/src/Teuchos_CommHelpers.cpp +++ b/packages/teuchos/comm/src/Teuchos_CommHelpers.cpp @@ -941,6 +941,7 @@ isend (const ArrayRCP >& sendBuffer, #endif // HAVE_TEUCHOS_COMPLEX #endif // if 0 + // Specialization for Ordinal=int and Packet=double. template<> void @@ -1009,20 +1010,6 @@ isend (const ArrayRCP& sendBuffer, return isendImpl (sendBuffer, destRank, tag, comm); } -template<> -void -gatherv (const double sendBuf[], - const int sendCount, - double recvBuf[], - const int recvCounts[], - const int displs[], - const int root, - const Comm& comm) -{ - gathervImpl (sendBuf, sendCount, recvBuf, recvCounts, displs, root, comm); -} - - // Specialization for Ordinal=int and Packet=float. template<> void @@ -1091,19 +1078,6 @@ isend (const ArrayRCP& sendBuffer, return isendImpl (sendBuffer, destRank, tag, comm); } -template<> -void -gatherv (const float sendBuf[], - const int sendCount, - float recvBuf[], - const int recvCounts[], - const int displs[], - const int root, - const Comm& comm) -{ - gathervImpl (sendBuf, sendCount, recvBuf, recvCounts, displs, root, comm); -} - // Specialization for Ordinal=int and Packet=long long. template<> diff --git a/packages/teuchos/comm/src/Teuchos_CommHelpers.hpp b/packages/teuchos/comm/src/Teuchos_CommHelpers.hpp index 06f217070518..136eef265080 100644 --- a/packages/teuchos/comm/src/Teuchos_CommHelpers.hpp +++ b/packages/teuchos/comm/src/Teuchos_CommHelpers.hpp @@ -1574,16 +1574,6 @@ isend (const ArrayRCP& sendBuffer, const int tag, const Comm& comm); -template<> -TEUCHOSCOMM_LIB_DLL_EXPORT void -gatherv (const double sendBuf[], - const int sendCount, - double recvBuf[], - const int recvCounts[], - const int displs[], - const int root, - const Comm& comm); - // Specialization for Ordinal=int and Packet=float. template<> TEUCHOSCOMM_LIB_DLL_EXPORT void @@ -1623,16 +1613,6 @@ isend (const ArrayRCP& sendBuffer, const int tag, const Comm& comm); -template<> -TEUCHOSCOMM_LIB_DLL_EXPORT void -gatherv (const float sendBuf[], - const int sendCount, - float recvBuf[], - const int recvCounts[], - const int displs[], - const int root, - const Comm& comm); - // Specialization for Ordinal=int and Packet=long long. template<> TEUCHOSCOMM_LIB_DLL_EXPORT void