From 0aee00546706c6574531ca56720fc332c2c32814 Mon Sep 17 00:00:00 2001 From: Ian Halim Date: Wed, 31 Jul 2024 18:16:24 -0600 Subject: [PATCH 01/20] Tpetra: TAFC Converted to use Kokkos Kokkos versions of doPosts(), doPostsAllToALl(), and doPostsNbrAllToAllV() added to Tpetra_Details_DistributorActor.hpp. Kokkos version of doPosts() added to Tpetra_Distributor.hpp. Tpetra_CrsMatrix_def.hpp edited to use these new methods. Some syncs have been removed as they are now superfluous. Signed-off-by: Ian Halim --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 90 +-- .../src/Tpetra_Details_DistributorActor.hpp | 652 +++++++++++++++++- .../tpetra/core/src/Tpetra_Distributor.hpp | 89 ++- 3 files changed, 757 insertions(+), 74 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index f0eef6b3b32e..a88b5ca649ba 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -47,6 +47,7 @@ #include "KokkosBlas1_scal.hpp" #include "KokkosSparse_getDiagCopy.hpp" #include "KokkosSparse_spmv.hpp" +#include "Kokkos_StdAlgorithms.hpp" #include #include @@ -8301,24 +8302,16 @@ CrsMatrix:: << std::endl; std::cerr << os.str (); } - // Make sure that host has the latest version, since we're - // using the version on host. If host has the latest - // version, syncing to host does nothing. - destMat->numExportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numExportPacketsPerLID = - getArrayViewFromDualView (destMat->numExportPacketsPerLID_); - destMat->numImportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numImportPacketsPerLID = - getArrayViewFromDualView (destMat->numImportPacketsPerLID_); - + destMat->numExportPacketsPerLID_.sync_device(); + auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device(); + auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device(); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits" << std::endl; std::cerr << os.str (); } - Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1, - destMat->numImportPacketsPerLID_.view_host()); + Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits" @@ -8326,34 +8319,26 @@ CrsMatrix:: std::cerr << os.str (); } - size_t totalImportPackets = 0; - for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) { - totalImportPackets += numImportPacketsPerLID[i]; - } + size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID); // Reallocation MUST go before setting the modified flag, // because it may clear out the flags. destMat->reallocImportsIfNeeded (totalImportPackets, verbose, verbosePrefix.get ()); destMat->imports_.modify_host (); - auto hostImports = destMat->imports_.view_host(); - // This is a legacy host pack/unpack path, so use the host - // version of exports_. - destMat->exports_.sync_host (); - auto hostExports = destMat->exports_.view_host(); + auto deviceImports = destMat->imports_.view_device(); + auto deviceExports = destMat->exports_.view_device(); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits" + os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } - Distor.doReversePostsAndWaits (hostExports, - numExportPacketsPerLID, - hostImports, - numImportPacketsPerLID); + destMat->imports_.sync_device(); + Distor.doReversePostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits" + os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } @@ -8396,23 +8381,16 @@ CrsMatrix:: << std::endl; std::cerr << os.str (); } - // Make sure that host has the latest version, since we're - // using the version on host. If host has the latest - // version, syncing to host does nothing. - destMat->numExportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numExportPacketsPerLID = - getArrayViewFromDualView (destMat->numExportPacketsPerLID_); - destMat->numImportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numImportPacketsPerLID = - getArrayViewFromDualView (destMat->numImportPacketsPerLID_); + destMat->numExportPacketsPerLID_.sync_device (); + auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device(); + auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device(); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Calling 3-arg doPostsAndWaits" << std::endl; std::cerr << os.str (); } - Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1, - destMat->numImportPacketsPerLID_.view_host()); + Distor.doPostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Finished 3-arg doPostsAndWaits" @@ -8420,34 +8398,26 @@ CrsMatrix:: std::cerr << os.str (); } - size_t totalImportPackets = 0; - for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) { - totalImportPackets += numImportPacketsPerLID[i]; - } + size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID); // Reallocation MUST go before setting the modified flag, // because it may clear out the flags. destMat->reallocImportsIfNeeded (totalImportPackets, verbose, verbosePrefix.get ()); destMat->imports_.modify_host (); - auto hostImports = destMat->imports_.view_host(); - // This is a legacy host pack/unpack path, so use the host - // version of exports_. - destMat->exports_.sync_host (); - auto hostExports = destMat->exports_.view_host(); + auto deviceImports = destMat->imports_.view_device(); + auto deviceExports = destMat->exports_.view_device(); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Calling 4-arg doPostsAndWaits" + os << *verbosePrefix << "Calling 4-arg doPostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } - Distor.doPostsAndWaits (hostExports, - numExportPacketsPerLID, - hostImports, - numImportPacketsPerLID); + destMat->imports_.sync_device (); + Distor.doPostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Finished 4-arg doPostsAndWaits" + os << *verbosePrefix << "Finished 4-arg doPostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } @@ -8494,12 +8464,6 @@ CrsMatrix:: Teuchos::Array RemotePids; if (runOnHost) { Teuchos::Array TargetPids; - // Backwards compatibility measure. We'll use this again below. - - // TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been) - // TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits(). - // TODO JHU This only becomes apparent as we begin to convert TAFC to run on device. - destMat->numImportPacketsPerLID_.modify_host(); //FIXME # ifdef HAVE_TPETRA_MMM_TIMINGS RCP tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data")))); @@ -8691,14 +8655,6 @@ CrsMatrix:: } else { // run on device - - // Backwards compatibility measure. We'll use this again below. - - // TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been) - // TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits(). - // TODO JHU This only becomes apparent as we begin to convert TAFC to run on device. - destMat->numImportPacketsPerLID_.modify_host(); //FIXME - # ifdef HAVE_TPETRA_MMM_TIMINGS RCP tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data")))); # endif diff --git a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp index 9b021ac53e9b..24e8351a6133 100644 --- a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp @@ -22,6 +22,7 @@ #include "Teuchos_Time.hpp" #include "Kokkos_TeuchosCommAdapters.hpp" +#include "Kokkos_StdAlgorithms.hpp" #ifdef HAVE_TPETRA_MPI #include "mpi.h" @@ -53,6 +54,13 @@ class DistributorActor { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + void doPostsAndWaitsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + template void doPosts(const DistributorPlan& plan, const ExpView& exports, @@ -66,6 +74,27 @@ class DistributorActor { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + void doPostsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + + template + void doPostsAllToAllKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + + template + void doPostsNbrAllToAllVKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + void doWaits(const DistributorPlan& plan); bool isReady() const; @@ -147,6 +176,22 @@ void DistributorActor::doPostsAndWaits(const DistributorPlan& plan, doWaits(plan); } + +template +void DistributorActor::doPostsAndWaitsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) +{ + static_assert(areKokkosViews, + "Data arrays for DistributorActor::doPostsAndWaitsKokkos must be Kokkos::Views"); + static_assert(areKokkosViews, + "Num packets arrays for DistributorActor::doPostsAndWaitsKokkos must be Kokkos::Views"); + doPostsKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + doWaits(plan); +} + template using HostAccessibility = Kokkos::SpaceAccessibility; @@ -760,6 +805,140 @@ void DistributorActor::doPostsAllToAll( << "\"."); } +template +void DistributorActor::doPostsAllToAllKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) { + TEUCHOS_TEST_FOR_EXCEPTION( + !plan.getIndicesTo().is_null(), std::runtime_error, + "Send Type=\"Alltoall\" only works for fast-path communication."); + + using size_type = Teuchos::Array::size_type; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + + auto comm = plan.getComm(); + Kokkos::View sendcounts("sendcounts", comm->getSize()); + Kokkos::View sdispls("sdispls", comm->getSize()); + Kokkos::View recvcounts("recvcounts", comm->getSize()); + Kokkos::View rdispls("rdispls", comm->getSize()); + + auto sendcounts_d = Kokkos::create_mirror_view(ExpExecSpace(), sendcounts); + auto sdispls_d = Kokkos::create_mirror_view(ExpExecSpace(), sdispls); + auto recvcounts_d = Kokkos::create_mirror_view(ImpExecSpace(), recvcounts); + auto rdispls_d = Kokkos::create_mirror_view(ImpExecSpace(), rdispls); + + auto getStartsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto getLengthsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + auto getProcsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getProcsTo()); + + size_t curPKToffset = 0; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& offset, bool is_final) { + sdispls_d(getProcsTo(pp)) = offset; + size_t numPackets = 0; + for (size_t j = getStartsTo(pp); j < getStartsTo(pp) + getLengthsTo(pp); ++j) { + numPackets += numExportPacketsPerLID(j); + } + sendcounts_d(getProcsTo(pp)) = static_cast(numPackets); + offset += numPackets; + }, curPKToffset); + + int overflow; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, int& index) { + if(sendcounts_d(getProcsTo(pp)) < 0) { + index = pp+1; + } + }, overflow); + + // numPackets is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Send count for send " + << overflow-1 << " is too large " + "to be represented as int."); + + const size_type actualNumReceives = + Teuchos::as(plan.getNumReceives()) + + Teuchos::as(plan.hasSelfMessage() ? 1 : 0); + + auto getLengthsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsFrom()); + auto getProcsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getProcsFrom()); + + Kokkos::View curLIDoffset("curLIDoffset", actualNumReceives); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, size_t& offset, bool is_final) { + if(is_final) curLIDoffset(i) = offset; + offset += getLengthsFrom(i); + }); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, size_t& curBufferOffset, bool is_final) { + size_t totalPacketsFrom_i = 0; + for(size_t j = 0; j < getLengthsFrom(i); j++) { + totalPacketsFrom_i += numImportPacketsPerLID(curLIDoffset(i) + j); + } + + if(is_final) rdispls_d(getProcsFrom(i)) = curBufferOffset; + if(is_final) recvcounts_d(getProcsFrom(i)) = static_cast(totalPacketsFrom_i); + curBufferOffset += totalPacketsFrom_i; + }); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, int& index) { + if(recvcounts_d(getProcsFrom(i)) < 0) { + index = i+1; + } + }, overflow); + + // totalPacketsFrom_i is converted down to int, so make sure it can be + // represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Recv count for receive " + << overflow-1 << " is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendcounts, sendcounts_d); + Kokkos::deep_copy(sdispls, sdispls_d); + Kokkos::deep_copy(recvcounts, recvcounts_d); + Kokkos::deep_copy(rdispls, rdispls_d); + + Teuchos::RCP> mpiComm = + Teuchos::rcp_dynamic_cast>(comm); + Teuchos::RCP> rawComm = + mpiComm->getRawMpiComm(); + using T = typename ExpView::non_const_value_type; + MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits::getType(T()); + +#if defined(HAVE_TPETRACORE_MPI_ADVANCE) + if (Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL == plan.getSendType()) { + MPIX_Comm *mpixComm = *plan.getMPIXComm(); + TEUCHOS_TEST_FOR_EXCEPTION(!mpixComm, std::runtime_error, + "MPIX_Comm is null in doPostsAllToAll \"" + << __FILE__ << ":" << __LINE__); + + const int err = MPIX_Alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPIX_Alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); + + return; + } +#endif // HAVE_TPETRACORE_MPI_ADVANCE + + const int err = MPI_Alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, (*rawComm)()); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPI_Alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); +} + #if defined(HAVE_TPETRACORE_MPI_ADVANCE) template void DistributorActor::doPostsNbrAllToAllV( @@ -840,6 +1019,117 @@ void DistributorActor::doPostsNbrAllToAllV( << Teuchos::mpiErrorCodeToString(err) << "\"."); } + +template +void DistributorActor::doPostsNbrAllToAllVKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) { + TEUCHOS_TEST_FOR_EXCEPTION( + !plan.getIndicesTo().is_null(), std::runtime_error, + "Send Type=\"Alltoall\" only works for fast-path communication."); + + const Teuchos_Ordinal numSends = plan.getProcsTo().size(); + const Teuchos_Ordinal numRecvs = plan.getProcsFrom().size(); + + auto comm = plan.getComm(); + Kokkos::View sendcounts("sendcounts", comm->getSize()); + Kokkos::View sdispls("sdispls", comm->getSize()); + Kokkos::View recvcounts("recvcounts", comm->getSize()); + Kokkos::View rdispls("rdispls", comm->getSize()); + + auto sendcounts_d = Kokkos::create_mirror_view(ExpExecSpace(), sendcounts); + auto sdispls_d = Kokkos::create_mirror_view(ExpExecSpace(), sdispls); + auto recvcounts_d = Kokkos::create_mirror_view(ImpExecSpace(), recvcounts); + auto rdispls_d = Kokkos::create_mirror_view(ImpExecSpace(), rdispls); + + auto getStartsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto getLengthsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + + Teuchos::RCP> mpiComm = + Teuchos::rcp_dynamic_cast>(comm); + Teuchos::RCP> rawComm = + mpiComm->getRawMpiComm(); + using T = typename ExpView::non_const_value_type; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits::getType(T()); + + // unlike standard alltoall, entry `i` in sdispls and sendcounts + // refer to the ith participating rank, rather than rank i + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numSends), KOKKOS_LAMBDA(const Teuchos_Ordinal pp, size_t& curPKToffset, bool is_final) { + sdispls_d(pp) = curPKToffset; + size_t numPackets = 0; + for (size_t j = getStartsTo(pp); j < getStartsTo(pp) + getLengthsTo(pp); ++j) { + numPackets += numExportPacketsPerLID(j); + } + sendcounts_d(pp) = static_cast(numPackets); + curPKToffset += numPackets; + }); + + int overflow; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numSends), KOKKOS_LAMBDA(const Teuchos_Ordinal pp, int& index) { + if(sendcounts_d(pp) < 0) { + index = i+1; + } + }, overflow); + + // numPackets is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Send count for send " + << overflow-1 << " is too large " + "to be represented as int."); + + auto getLengthsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsFrom()); + + Kokkos::View curLIDoffset("curLIDoffset", numRecvs); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, size_t& offset, bool is_final) { + if(is_final) curLIDoffset(i) = offset; + offset += getLengthsFrom(i); + }); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, size_t& curBufferOffset, bool is_final) { + rdispls_d(i) = curBufferOffset; + size_t totalPacketsFrom_i = 0; + for(size_t j = 0; j < getLengthsFrom(i); j++) { + totalPacketsFrom_i += numImportPacketsPerLID(curLIDoffset(i) + j); + } + + recvcounts_d(i) = static_cast(totalPacketsFrom_i); + curBufferOffset += totalPacketsFrom_i; + }); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, int& index) { + if(recvcounts_d(pp) < 0) { + index = i+1; + } + }, overflow); + + // totalPacketsFrom_i is converted down to int, so make sure it can be + // represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Recv count for receive " + << overflow-1 << ") is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendcounts, sendcounts_d); + Kokkos::deep_copy(sdispls, sdispls_d); + Kokkos::deep_copy(recvcounts, recvcounts_d); + Kokkos::deep_copy(rdispls, rdispls_d); + + MPIX_Comm *mpixComm = *plan.getMPIXComm(); + const int err = MPIX_Neighbor_alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPIX_Neighbor_alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); +} #endif // HAVE_TPETRACORE_MPI_ADVANCE #endif // HAVE_TPETRA_MPI // clang-format off @@ -1107,16 +1397,16 @@ void DistributorActor::doPosts(const DistributorPlan& plan, // This buffer is long enough for only one message at a time. // Thus, we use DISTRIBUTOR_SEND always in this case, regardless - // of sendType requested by user. + // of sendType requested by user. // This code path formerly errored out with message: - // Tpetra::Distributor::doPosts(4-arg, Kokkos): + // Tpetra::Distributor::doPosts(4-arg, Kokkos): // The "send buffer" code path // doesn't currently work with nonblocking sends. // Now, we opt to just do the communication in a way that works. #ifdef HAVE_TPETRA_DEBUG if (sendType != Details::DISTRIBUTOR_SEND) { if (plan.getComm()->getRank() == 0) - std::cout << "The requested Tpetra send type " + std::cout << "The requested Tpetra send type " << DistributorSendTypeEnumToString(sendType) << " requires Distributor data to be ordered by" << " the receiving processor rank. Since these" @@ -1125,7 +1415,7 @@ void DistributorActor::doPosts(const DistributorPlan& plan, } #endif - Kokkos::View sendArray ("sendArray", + Kokkos::View sendArray ("sendArray", maxNumPackets); Array indicesOffsets (numExportPacketsPerLID.size(), 0); @@ -1180,6 +1470,360 @@ void DistributorActor::doPosts(const DistributorPlan& plan, } } +template +void DistributorActor::doPostsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) +{ + static_assert(areKokkosViews, + "Data arrays for DistributorActor::doPostsKokkos must be Kokkos::Views"); + static_assert(areKokkosViews, + "Num packets arrays for DistributorActor::doPostsKokkos must be Kokkos::Views"); + using Teuchos::Array; + using Teuchos::as; + using Teuchos::ireceive; + using Teuchos::isend; + using Teuchos::send; + using Teuchos::TypeNameTraits; + using std::endl; + using Kokkos::Compat::create_const_view; + using Kokkos::Compat::create_view; + using Kokkos::Compat::subview_offset; + using Kokkos::Compat::deep_copy_offset; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + typedef Array::size_type size_type; + typedef ExpView exports_view_type; + typedef ImpView imports_view_type; + +#ifdef KOKKOS_ENABLE_CUDA + static_assert (! std::is_same::value && + ! std::is_same::value, + "Please do not use Tpetra::Distributor with UVM " + "allocations. See GitHub issue #1088."); +#endif // KOKKOS_ENABLE_CUDA + +#ifdef KOKKOS_ENABLE_SYCL + static_assert (! std::is_same::value && + ! std::is_same::value, + "Please do not use Tpetra::Distributor with SharedUSM " + "allocations. See GitHub issue #1088 (corresponding to CUDA)."); +#endif // KOKKOS_ENABLE_SYCL + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMon (*timer_doPosts4KV_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // Run-time configurable parameters that come from the input + // ParameterList set by setParameterList(). + const Details::EDistributorSendType sendType = plan.getSendType(); + +#ifdef HAVE_TPETRA_MPI + // All-to-all communication layout is quite different from + // point-to-point, so we handle it separately. + if (sendType == Details::DISTRIBUTOR_ALLTOALL) { + doPostsAllToAllKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } +#ifdef HAVE_TPETRACORE_MPI_ADVANCE + else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL) + { + doPostsAllToAllKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV) { + doPostsNbrAllToAllVKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } +#endif + +#else // HAVE_TPETRA_MPI + if (plan.hasSelfMessage()) { + size_t packetsPerSend; + Kokkos::parallel_reduce(Kokkos::RangePolicy(plan.getStartsTo()[0], plan.getStartsTo()[0]+plan.getLengthsTo()[0]), KOKKOS_LAMBDA(const size_t j, size_t& packets) { + packets += numExportPacketsPerLID(j); + }, packetsPerSend); + + deep_copy_offset(imports, exports, (size_t)0, (size_t)0, packetsPerSend); + } +#endif // HAVE_TPETRA_MPI + + const int myProcID = plan.getComm()->getRank (); + size_t selfReceiveOffset = 0; + +#ifdef HAVE_TPETRA_DEBUG + // Different messages may have different numbers of packets. + size_t totalNumImportPackets = Kokkos::Experimental::reduce(ImpExecSpace(), numImportPacketsPerLID); + TEUCHOS_TEST_FOR_EXCEPTION( + imports.extent (0) < totalNumImportPackets, std::runtime_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): The 'imports' array must have " + "enough entries to hold the expected number of import packets. " + "imports.extent(0) = " << imports.extent (0) << " < " + "totalNumImportPackets = " << totalNumImportPackets << "."); + TEUCHOS_TEST_FOR_EXCEPTION + (requests_.size () != 0, std::logic_error, "Tpetra::Distributor::" + "doPostsKokkos(4 args, Kokkos): Process " << myProcID << ": requests_.size () = " + << requests_.size () << " != 0."); +#endif // HAVE_TPETRA_DEBUG + // Distributor uses requests_.size() as the number of outstanding + // nonblocking message requests, so we resize to zero to maintain + // this invariant. + // + // getNumReceives() does _not_ include the self message, if there is + // one. Here, we do actually send a message to ourselves, so we + // include any self message in the "actual" number of receives to + // post. + // + // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts() + // doesn't (re)allocate its array of requests. That happens in + // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on + // demand), or Resize_(). + const size_type actualNumReceives = as (plan.getNumReceives()) + + as (plan.hasSelfMessage() ? 1 : 0); + requests_.resize (0); + + // Post the nonblocking receives. It's common MPI wisdom to post + // receives before sends. In MPI terms, this means favoring + // adding to the "posted queue" (of receive requests) over adding + // to the "unexpected queue" (of arrived messages not yet matched + // with a receive). + { +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4KV_recvs_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + size_t curBufferOffset = 0; + size_t curLIDoffset = 0; + for (size_type i = 0; i < actualNumReceives; ++i) { + size_t totalPacketsFrom_i = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getLengthsFrom()[i]), KOKKOS_LAMBDA(const size_t j, size_t& total) { + total += numImportPacketsPerLID(curLIDoffset+j); + }, totalPacketsFrom_i); + // totalPacketsFrom_i is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i > size_t(INT_MAX), + std::logic_error, "Tpetra::Distributor::doPostsKokkos(3 args, Kokkos): " + "Recv count for receive " << i << " (" << totalPacketsFrom_i << ") is too large " + "to be represented as int."); + curLIDoffset += plan.getLengthsFrom()[i]; + if (plan.getProcsFrom()[i] != myProcID && totalPacketsFrom_i) { + // If my process is receiving these packet(s) from another + // process (not a self-receive), and if there is at least + // one packet to receive: + // + // 1. Set up the persisting view (recvBuf) into the imports + // array, given the offset and size (total number of + // packets from process getProcsFrom()[i]). + // 2. Start the Irecv and save the resulting request. + imports_view_type recvBuf = + subview_offset (imports, curBufferOffset, totalPacketsFrom_i); + requests_.push_back (ireceive (recvBuf, plan.getProcsFrom()[i], + mpiTag_, *plan.getComm())); + } + else { // Receiving these packet(s) from myself + selfReceiveOffset = curBufferOffset; // Remember the offset + } + curBufferOffset += totalPacketsFrom_i; + } + } + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends (*timer_doPosts4KV_sends_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // setup views containing starting-offsets into exports for each send, + // and num-packets-to-send for each send. + Kokkos::View sendPacketOffsets("sendPacketOffsets", plan.getNumSends()); + Kokkos::View packetsPerSend("packetsPerSend", plan.getNumSends()); + auto sendPacketOffsets_d = Kokkos::create_mirror_view(ExpExecSpace(), sendPacketOffsets); + auto packetsPerSend_d = Kokkos::create_mirror_view(ExpExecSpace(), packetsPerSend); + + auto starts = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto lengths = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& curPKToffset, bool final_pass) { + if(final_pass) sendPacketOffsets_d(pp) = curPKToffset; + size_t numPackets = 0; + for(size_t j = starts(pp); j < starts(pp) + lengths(pp); j++) { + numPackets += numExportPacketsPerLID(j); + } + if(final_pass) packetsPerSend_d(pp) = numPackets; + curPKToffset += numPackets; + }); + + size_t maxNumPackets; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& max) { + if(packetsPerSend_d(pp) > max) { + max = packetsPerSend_d(pp); + } + }, Kokkos::Max(maxNumPackets)); + + // numPackets will be used as a message length, so make sure it can be represented as int + TEUCHOS_TEST_FOR_EXCEPTION(maxNumPackets > size_t(INT_MAX), + std::logic_error, "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "numPackets = " << maxNumPackets << " is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendPacketOffsets, sendPacketOffsets_d); + Kokkos::deep_copy(packetsPerSend, packetsPerSend_d); + + // setup scan through getProcsTo() list starting with higher numbered procs + // (should help balance message traffic) + size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage(); + size_t procIndex = 0; + while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myProcID)) { + ++procIndex; + } + if (procIndex == numBlocks) { + procIndex = 0; + } + + size_t selfNum = 0; + size_t selfIndex = 0; + if (plan.getIndicesTo().is_null()) { + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_fast_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // Data are already blocked (laid out) by process, so we don't + // need a separate send buffer (besides the exports array). + for (size_t i = 0; i < numBlocks; ++i) { + size_t p = i + procIndex; + if (p > (numBlocks - 1)) { + p -= numBlocks; + } + + if (plan.getProcsTo()[p] != myProcID && packetsPerSend[p] > 0) { + exports_view_type tmpSend = + subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]); + + if (sendType == Details::DISTRIBUTOR_ISEND) { + exports_view_type tmpSendBuf = + subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]); + requests_.push_back (isend (tmpSendBuf, plan.getProcsTo()[p], + mpiTag_, *plan.getComm())); + } + else { // DISTRIBUTOR_SEND + send (tmpSend, + as (tmpSend.size ()), + plan.getProcsTo()[p], mpiTag_, *plan.getComm()); + } + } + else { // "Sending" the message to myself + selfNum = p; + } + } + + if (plan.hasSelfMessage()) { + deep_copy_offset(imports, exports, selfReceiveOffset, + sendPacketOffsets[selfNum], packetsPerSend[selfNum]); + } + } + else { // data are not blocked by proc, use send buffer + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_slow_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // FIXME (mfh 05 Mar 2013) This may be broken for Isend. + typedef typename ExpView::non_const_value_type Packet; + typedef typename ExpView::array_layout Layout; + typedef typename ExpView::device_type Device; + typedef typename ExpView::memory_traits Mem; + + // This buffer is long enough for only one message at a time. + // Thus, we use DISTRIBUTOR_SEND always in this case, regardless + // of sendType requested by user. + // This code path formerly errored out with message: + // Tpetra::Distributor::doPostsKokkos(4-arg, Kokkos): + // The "send buffer" code path + // doesn't currently work with nonblocking sends. + // Now, we opt to just do the communication in a way that works. +#ifdef HAVE_TPETRA_DEBUG + if (sendType != Details::DISTRIBUTOR_SEND) { + if (plan.getComm()->getRank() == 0) + std::cout << "The requested Tpetra send type " + << DistributorSendTypeEnumToString(sendType) + << " requires Distributor data to be ordered by" + << " the receiving processor rank. Since these" + << " data are not ordered, Tpetra will use Send" + << " instead." << std::endl; + } +#endif + + Kokkos::View sendArray ("sendArray", + maxNumPackets); + + Kokkos::View indicesOffsets ("indicesOffsets", numExportPacketsPerLID.extent(0)); + size_t ioffset = 0; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numExportPacketsPerLID.extent(0)), KOKKOS_LAMBDA(const size_t j, size_t& offset, bool is_final) { + if(is_final) indicesOffsets(j) = offset; + offset += numExportPacketsPerLID(j); + }, ioffset); + + for (size_t i = 0; i < numBlocks; ++i) { + size_t p = i + procIndex; + if (p > (numBlocks - 1)) { + p -= numBlocks; + } + + if (plan.getProcsTo()[p] != myProcID) { + size_t j = plan.getStartsTo()[p]; + size_t numPacketsTo_p = 0; + //mirror in case execspaces are different + auto sendArrayMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), sendArray); + auto exportsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), exports); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getLengthsTo()[p]), KOKKOS_LAMBDA(const size_t k, size_t& offset, bool is_final) { + if(is_final) { + const size_t dst_end = offset + numExportPacketsPerLID(j + k); + const size_t src_end = indicesOffsets(j + k) + numExportPacketsPerLID(j + k); + auto dst_sub = Kokkos::subview(sendArrayMirror, Kokkos::make_pair(offset, dst_end)); + auto src_sub = Kokkos::subview(exportsMirror, Kokkos::make_pair(indicesOffsets(j + k), src_end)); + Kokkos::Experimental::local_deep_copy(dst_sub, src_sub); + } + offset += numExportPacketsPerLID(j + k); + }, numPacketsTo_p); + Kokkos::deep_copy(sendArray, sendArrayMirror); + typename ExpView::execution_space().fence(); + + if (numPacketsTo_p > 0) { + ImpView tmpSend = + subview_offset(sendArray, size_t(0), numPacketsTo_p); + + send (tmpSend, + as (tmpSend.size ()), + plan.getProcsTo()[p], mpiTag_, *plan.getComm()); + } + } + else { // "Sending" the message to myself + selfNum = p; + selfIndex = plan.getStartsTo()[p]; + } + } + + if (plan.hasSelfMessage()) { + //mirror in case execspaces are different + auto importsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), imports); + auto exportsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), exports); + size_t temp; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getLengthsTo()[selfNum]), KOKKOS_LAMBDA(const size_t k, size_t& offset, bool is_final) { + if(is_final) { + const size_t dst_end = selfReceiveOffset + offset + numExportPacketsPerLID(selfIndex + k); + const size_t src_end = indicesOffsets(selfIndex + k) + numExportPacketsPerLID(selfIndex + k); + auto dst_sub = Kokkos::subview(importsMirror, Kokkos::make_pair(selfReceiveOffset + offset, dst_end)); + auto src_sub = Kokkos::subview(exportsMirror, Kokkos::make_pair(indicesOffsets(selfIndex + k), src_end)); + Kokkos::Experimental::local_deep_copy(dst_sub, src_sub); + } + offset += numExportPacketsPerLID(selfIndex + k); + }, temp); + Kokkos::deep_copy(imports, importsMirror); + selfIndex += plan.getLengthsTo()[selfNum]; + selfReceiveOffset += temp; + } + } +} + } } diff --git a/packages/tpetra/core/src/Tpetra_Distributor.hpp b/packages/tpetra/core/src/Tpetra_Distributor.hpp index c0c31a0f8b54..a8beece8ee9d 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.hpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.hpp @@ -23,6 +23,7 @@ #include "KokkosCompat_View.hpp" #include "Kokkos_Core.hpp" #include "Kokkos_TeuchosCommAdapters.hpp" +#include "Kokkos_StdAlgorithms.hpp" #include #include #include @@ -426,6 +427,13 @@ namespace Tpetra { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doPostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + /// \brief Post the data for a forward plan, but do not execute the waits yet. /// /// Call this overload when you have the same number of Packets @@ -480,6 +488,13 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doPostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); /// \brief Execute the reverse communication plan. /// @@ -501,7 +516,14 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doReversePostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + /// \brief Post the data for a reverse plan, but do not execute the waits yet. /// /// This method takes the same arguments as the three-argument @@ -522,7 +544,14 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doReversePostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + //@} //! @name Implementation of Teuchos::Describable //@{ @@ -640,6 +669,16 @@ namespace Tpetra { actor_.doPostsAndWaits(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doPostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + actor_.doPostsAndWaitsKokkos(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -661,6 +700,17 @@ namespace Tpetra { { actor_.doPosts(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doPostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + actor_.doPostsKokkos(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -685,6 +735,19 @@ namespace Tpetra { numImportPacketsPerLID); doReverseWaits (); } + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doReversePostsAndWaitsKokkos (const ExpView& exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView& imports, + const ImpPacketsView &numImportPacketsPerLID) + { + doReversePostsKokkos (exports, numExportPacketsPerLID, imports, + numImportPacketsPerLID); + doReverseWaits (); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -723,7 +786,27 @@ namespace Tpetra { reverseDistributor_->doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doReversePostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + // FIXME (mfh 29 Mar 2012) WHY? + TEUCHOS_TEST_FOR_EXCEPTION( + ! plan_.getIndicesTo().is_null(), std::runtime_error, + "Tpetra::Distributor::doReversePosts(3 args): Can only do " + "reverse communication when original data are blocked by process."); + if (reverseDistributor_.is_null ()) { + createReverseDistributor (); + } + reverseDistributor_->doPostsKokkos (exports, numExportPacketsPerLID, + imports, numImportPacketsPerLID); + } + template void Distributor:: computeSends(const Teuchos::ArrayView& importGIDs, From f08be9cf358395a322c05871d9d17e38bb32a177 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 18 Oct 2024 13:35:15 -0600 Subject: [PATCH 02/20] RBILUK: Use new KK::sptrsv block support instead of KK::trsv Signed-off-by: James Foucar --- .../src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 + .../src/Ifpack2_Experimental_RBILUK_def.hpp | 90 ++++++++++++------- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 5e8378c027cb..2052c6e530e2 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -336,6 +336,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix //! The inverse of the diagonal Teuchos::RCP D_block_inverse_; + + Kokkos::View tmp_; }; diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index f68d8d96a793..e50a71649527 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -18,7 +18,7 @@ #include "Ifpack2_LocalFilter.hpp" #include "Ifpack2_Utilities.hpp" #include "Ifpack2_RILUK.hpp" -#include "KokkosSparse_trsv.hpp" +#include "KokkosSparse_sptrsv.hpp" //#define IFPACK2_RBILUK_INITIAL //#define IFPACK2_RBILUK_INITIAL_NOKK @@ -194,6 +194,11 @@ void RBILUK::allocate_L_and_U_blocks () U_block_->setAllToScalar (STM::zero ()); D_block_->setAllToScalar (STM::zero ()); + // Allocate temp space for apply + if (this->isKokkosKernelsSpiluk_) { + const auto numRows = L_block_->getLocalNumRows(); + tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_); + } } this->isAllocated_ = true; } @@ -1070,7 +1075,7 @@ apply (const Tpetra::MultiVectorgetCrsGraph().getLocalRowPtrsHost(); - auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost(); - auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost(); - auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost(); - auto L_values_host = L_block_->getValuesHost(); - auto U_values_host = U_block_->getValuesHost(); - - row_map_type* L_row_ptrs_host_ri = reinterpret_cast(&L_row_ptrs_host); - index_type* L_entries_host_ri = reinterpret_cast(&L_entries_host); - row_map_type* U_row_ptrs_host_ri = reinterpret_cast(&U_row_ptrs_host); - index_type* U_entries_host_ri = reinterpret_cast(&U_entries_host); - values_type* L_values_host_ri = reinterpret_cast(&L_values_host); - values_type* U_values_host_ri = reinterpret_cast(&U_values_host); + // Kokkos kernels impl. + auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite); + + auto lclL = L_block_->getLocalMatrixDevice(); + auto L_rowmap = lclL.graph.row_map; + auto L_entries = lclL.graph.entries; + auto L_values = lclL.values; + + auto lclU = U_block_->getLocalMatrixDevice(); + auto U_rowmap = lclU.graph.row_map; + auto U_entries = lclU.graph.entries; + auto U_values = lclU.values; const auto numRows = L_block_->getLocalNumRows(); - local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_); - local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_); + local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_); + local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_); if (mode == Teuchos::NO_TRANS) { - KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view); - KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; + { + KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); + KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); + Kokkos::fence(); + + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec); + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); + } + Kokkos::fence(); + + KernelHandle_->destroy_sptrsv_handle(); + } + + { + KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); + KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); + Kokkos::fence(); + + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); + } + Kokkos::fence(); + + KernelHandle_->destroy_sptrsv_handle(); + } + + KokkosBlas::axpby(alpha, Y_views, beta, Y_views); } else { - KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view); - KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::runtime_error, + "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm"); } //Y.getWrappedDualView().sync(); From 0a6fbd74207fb8d98675eb9ad470032166609537 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 22 Oct 2024 12:30:17 -0600 Subject: [PATCH 03/20] Remove unused host matrices Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index e50a71649527..a0736e8e9047 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -1108,8 +1108,6 @@ apply (const Tpetra::MultiVectorgetLocalNumRows(); - local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_); - local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_); if (mode == Teuchos::NO_TRANS) { KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; From 5c2d10b920da60e2c652da54c49a5ff25fbd443f Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 24 Oct 2024 15:56:44 -0600 Subject: [PATCH 04/20] config-specs: set Kokkos_CoreUnitTest_Cuda1 to run serial attempt to resolve the cuda_graph.diamond subtest failure in nightly integration testing track Signed-off-by: Nathan Ellingwood --- packages/framework/ini-files/config-specs.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 2c0ce84d57bf..de052bca3530 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1248,6 +1248,7 @@ opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF [CUDA11-RUN-SERIAL-TESTS] +opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON From a38eb95298e24aaddff9447d6825de6dabffd7ba Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 12:40:36 -0600 Subject: [PATCH 05/20] Fix tmp_ type, Kokkos::complex vs. std::complex mismatch Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 2052c6e530e2..84f5d421c6cd 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -337,7 +337,7 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix //! The inverse of the diagonal Teuchos::RCP D_block_inverse_; - Kokkos::View tmp_; + Kokkos::View tmp_; }; From a93e316f5190895ea107857f272bbd2165ff0d91 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 15:53:44 -0600 Subject: [PATCH 06/20] Move sptrsv handle setup out of apply Signed-off-by: James Foucar --- .../src/Ifpack2_Experimental_RBILUK_decl.hpp | 2 ++ .../src/Ifpack2_Experimental_RBILUK_def.hpp | 34 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 84f5d421c6cd..ed918212c44d 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -165,6 +165,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix // kk_handle_type;//test Teuchos::RCP KernelHandle_; + Teuchos::RCP L_Sptrsv_KernelHandle_; + Teuchos::RCP U_Sptrsv_KernelHandle_; //@} diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index a0736e8e9047..650f0f3ed58a 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -327,12 +327,21 @@ void RBILUK::initialize () if (this->isKokkosKernelsSpiluk_) { this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + const auto numRows = this->A_local_->getLocalNumRows(); KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1, - this->A_local_->getLocalNumRows(), + numRows, 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), blockSize_); this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic + + this->L_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + this->U_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + + KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1; + + this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); + this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); } else { this->Graph_->initialize (); @@ -919,6 +928,10 @@ void RBILUK::compute () KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_, A_local_rowmap, A_local_entries, A_local_values, L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); + + // Now call symbolic for sptrsvs + KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); + KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); } } // Stop timing @@ -1107,39 +1120,24 @@ apply (const Tpetra::MultiVectorgetLocalNumRows(); - if (mode == Teuchos::NO_TRANS) { - KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP; { - KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); - KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); - Kokkos::fence(); - const LO numVecs = X.getNumVectors(); for (LO vec = 0; vec < numVecs; ++vec) { auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec); auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); - KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); + KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); } Kokkos::fence(); - - KernelHandle_->destroy_sptrsv_handle(); } { - KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); - KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); - Kokkos::fence(); - const LO numVecs = X.getNumVectors(); for (LO vec = 0; vec < numVecs; ++vec) { auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); - KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); + KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); } Kokkos::fence(); - - KernelHandle_->destroy_sptrsv_handle(); } KokkosBlas::axpby(alpha, Y_views, beta, Y_views); From 643ff700bff96963ce106549c9d08610fe9bade0 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 25 Oct 2024 16:00:23 -0600 Subject: [PATCH 07/20] Remove fences Signed-off-by: James Foucar --- packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index 650f0f3ed58a..592d4dcfafd2 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -1128,7 +1128,6 @@ apply (const Tpetra::MultiVector Date: Thu, 17 Oct 2024 13:08:11 -0600 Subject: [PATCH 08/20] ShyLU - Basker : replace View-of-Views with std::vector-of-Views Signed-off-by: iyamazaki --- .../shylu_node/basker/src/shylubasker_def.hpp | 45 +-- .../basker/src/shylubasker_error_manager.hpp | 162 +++++------ .../basker/src/shylubasker_matrix_decl.hpp | 3 + .../basker/src/shylubasker_matrix_def.hpp | 9 +- .../basker/src/shylubasker_nfactor_blk.hpp | 144 +++++----- .../src/shylubasker_nfactor_blk_inc.hpp | 272 +++++++++--------- .../basker/src/shylubasker_nfactor_col.hpp | 266 ++++++++--------- .../basker/src/shylubasker_nfactor_col2.hpp | 84 +++--- .../src/shylubasker_nfactor_col_inc.hpp | 266 ++++++++--------- .../basker/src/shylubasker_nfactor_diag.hpp | 66 ++--- .../basker/src/shylubasker_order.hpp | 16 +- .../basker/src/shylubasker_sfactor.hpp | 189 +++++++----- .../basker/src/shylubasker_sfactor_inc.hpp | 52 ++-- .../basker/src/shylubasker_solve_rhs.hpp | 16 +- .../basker/src/shylubasker_solve_rhs_tr.hpp | 16 +- .../basker/src/shylubasker_structs.hpp | 7 +- .../basker/src/shylubasker_tree.hpp | 33 ++- .../basker/src/shylubasker_types.hpp | 158 +++++----- .../basker/src/shylubasker_util.hpp | 255 ++++++++-------- 19 files changed, 1089 insertions(+), 970 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp index c1b92347a094..c7b9d66311ab 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp @@ -92,16 +92,8 @@ namespace BaskerNS BASKER_INLINE void Basker::Finalize() { - //finalize all matrices - A.Finalize(); - At.Finalize(); //??? is At even used - BTF_A.Finalize(); - BTF_C.Finalize(); - BTF_B.Finalize(); - BTF_D.Finalize(); - BTF_E.Finalize(); - //finalize array of 2d matrics + // Actuall Finalize is called by desctructor FREE_MATRIX_2DARRAY(AVM, tree.nblks); FREE_MATRIX_2DARRAY(ALM, tree.nblks); @@ -120,7 +112,6 @@ namespace BaskerNS //Thread Array FREE_THREAD_1DARRAY(thread_array); - basker_barrier.Finalize(); //S (Check on this) FREE_INT_2DARRAY(S, tree.nblks); @@ -187,12 +178,6 @@ namespace BaskerNS FREE_ENTRY_1DARRAY(x_view_ptr_scale); FREE_ENTRY_1DARRAY(y_view_ptr_scale); - - //Structures - part_tree.Finalize(); - tree.Finalize(); - stree.Finalize(); - stats.Finalize(); }//end Finalize() @@ -239,7 +224,7 @@ namespace BaskerNS //Option = 2, BTF BASKER if(option == 1) - { + { default_order(); } else if(option == 2) @@ -475,12 +460,16 @@ namespace BaskerNS //Find BTF ordering if(btf_order2() != BASKER_SUCCESS) { + if(Options.verbose == BASKER_TRUE) + { + printf("Basker Ordering Failed \n"); fflush(stdout); + } return BASKER_ERROR; } if(Options.verbose == BASKER_TRUE) { - printf("Basker Ordering Found \n"); + printf("Basker Ordering Found \n"); fflush(stdout); } /*if((Options.btf == BASKER_TRUE) && (btf_tabs_offset != 0)) @@ -512,7 +501,7 @@ namespace BaskerNS if(symb_flag == BASKER_TRUE) { if(Options.verbose == BASKER_TRUE) { - printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); + printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); fflush(stdout); } return BASKER_ERROR; } @@ -547,7 +536,7 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf(" == Basker Symbolic Done ==\n\n"); + printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout); } #ifdef BASKER_TIMER @@ -1573,7 +1562,7 @@ namespace BaskerNS #endif } - // ---------------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------------- // 'sort' rows of BTF_A into ND structure #if 0 for (Int i = 0; i < BTF_A.nnz; ++i) { @@ -1621,6 +1610,7 @@ namespace BaskerNS symmetric_sfactor(); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for symbolic after ND on a big block A: " << nd_symbolic_timer.seconds() << std::endl; + fflush(stdout); } Kokkos::Timer nd_last_dense_timer; @@ -1628,16 +1618,23 @@ namespace BaskerNS btf_last_dense(flag); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for last-dense after ND on a big block A: " << nd_last_dense_timer.seconds() << std::endl; + fflush(stdout); } #ifdef BASKER_KOKKOS // ---------------------------------------------------------------------------------------------- // Allocate & Initialize blocks + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif /*kokkos_sfactor_init_workspace iWS(flag, this); @@ -1950,10 +1947,16 @@ namespace BaskerNS }*/ Kokkos::Timer nd_setup2_timer; +#ifdef BASKER_PARALLEL_INIT_WORKSPACE kokkos_sfactor_init_workspace iWS(flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); +#else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(flag, p); + } +#endif if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index a6e1f5c41e91..84cbb8b801b7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -46,66 +46,66 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; - } else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + } else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK SINGULAR: blk=" << thread_array(ti).error_blk + << " DOMBLK SINGULAR: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK NOMALLOC : blk=" << thread_array(ti).error_blk + << " DOMBLK NOMALLOC : blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_dom_error error_blk"); + BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_dom_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREAD: " << ti - << " DOMBLK MALLOC : blk=" << thread_array(ti).error_blk - << " subblk=" << thread_array(ti).error_subblk - << " newsize=" << thread_array(ti).error_info + << " DOMBLK MALLOC : blk=" << thread_array[ti].error_blk + << " subblk=" << thread_array[ti].error_subblk + << " newsize=" << thread_array[ti].error_info << std::endl; } //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array(ti).error_subblk != BASKER_MAX_IDX) + if(thread_array[ti].error_subblk != BASKER_MAX_IDX) { - BASKER_ASSERT(thread_array(ti).error_info > 0, "L) newsize not big enough"); - resize_L = thread_array(ti).error_info; + BASKER_ASSERT(thread_array[ti].error_info > 0, "L) newsize not big enough"); + resize_L = thread_array[ti].error_info; //if L is already bigger and U, //We will want re size U as, well - if(thread_array(ti).error_subblk == 0) + if(thread_array[ti].error_subblk == 0) { - Int blkcol = thread_array(ti).error_blk; + Int blkcol = thread_array[ti].error_blk; Int blkUrow = LU_size(blkcol)-1; - if(LL(blkcol)(0).nnz >= - LU(blkcol)(blkUrow).nnz) + if(LL[blkcol][0].nnz >= + LU[blkcol][blkUrow].nnz) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; } }//if - a domain } //We don't care about the other way since, //L is already checked before U. - if(thread_array(ti).error_subblk == -1) + if(thread_array[ti].error_subblk == -1) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; } //Resize L, if resize_L != -1 (meaning realloc-L is requested) @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(thread_array(ti).error_subblk); + LL[thread_array[ti].error_blk][thread_array[ti].error_subblk]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU(thread_array(ti).error_blk)(0); + LU[thread_array[ti].error_blk][0]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(0); + LL[thread_array[ti].error_blk][0]; L.clear_pend(); } @@ -163,11 +163,11 @@ namespace BaskerNS { //Clear workspace, whole column for(Int sb = 0; - sb < LL_size(thread_array(ti).error_blk); + sb < LL_size(thread_array[ti].error_blk); sb++) { BASKER_MATRIX &SL = - LL(thread_array(ti).error_blk)(sb); + LL[thread_array[ti].error_blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -198,13 +198,13 @@ namespace BaskerNS }//for - sb (subblks) }//if ws is filled - threads_start(ti) = thread_array(ti).error_blk; + threads_start(ti) = thread_array[ti].error_blk; //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC @@ -231,26 +231,26 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { thread_start(ti) = BASKER_MAX_IDX; continue; } - else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " SEPBLK SINGULAR: blk=" << thread_array(ti).error_blk + << " SEPBLK SINGULAR: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREADS: " << ti - << " SEPBLK NOMALLOC: blk=" << thread_array(ti).error_blk + << " SEPBLK NOMALLOC: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; @@ -260,22 +260,22 @@ namespace BaskerNS Int error_sep_lvl = BASKER_MAX_IDX; for(Int l = 1; l < tree.nlvls+1; l++) { - if(thread_array(ti).error_blk == S(l)(ti)) + if(thread_array[ti].error_blk == S[l][ti]) { error_sep_lvl = l; break; } } - if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_SEP_error error_blk"); + BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_SEP_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti - << " SEPBLK MALLOC: blk=" << thread_array(ti).error_blk - << " subblk=" << thread_array(ti).error_subblk - << " newsize=" << thread_array(ti).error_info + << " SEPBLK MALLOC: blk=" << thread_array[ti].error_blk + << " subblk=" << thread_array[ti].error_subblk + << " newsize=" << thread_array[ti].error_info << std::endl; std::cout << " > SEPLVL: " << error_sep_lvl << std::endl; } @@ -283,9 +283,9 @@ namespace BaskerNS //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array(ti).error_subblk <= -1) + if(thread_array[ti].error_subblk <= -1) { - resize_L = thread_array(ti).error_info; + resize_L = thread_array[ti].error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ L size: " << resize_L << std::endl; @@ -293,9 +293,9 @@ namespace BaskerNS } //We don't care about the other way since, //L is already checked before U. - if(thread_array(ti).error_subblk > -1) + if(thread_array[ti].error_subblk > -1) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ U size: " << resize_U << std::endl; @@ -305,9 +305,9 @@ namespace BaskerNS //Resize L, if resize_L != -1 (meaning realloc-L is requested) if(resize_L != BASKER_MAX_IDX) { - const Int tsb = (-1*thread_array(ti).error_subblk)-1; + const Int tsb = (-1*thread_array[ti].error_subblk)-1; BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(tsb); + LL[thread_array[ti].error_blk][tsb]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -322,9 +322,9 @@ namespace BaskerNS //Resize U, if resize_U != -1 (meaning realloc-U is requested) if(resize_U != BASKER_MAX_IDX) { - const Int tsb = thread_array(ti).error_subblk; + const Int tsb = thread_array[ti].error_subblk; BASKER_MATRIX &U = - LU(thread_array(ti).error_blk)(tsb); + LU[thread_array[ti].error_blk][tsb]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -346,13 +346,13 @@ namespace BaskerNS //Though this could be done in parallel in the future for(Int p = 0; p < num_threads; p++) { - Int blk = S(0)(p); + Int blk = S[0][p]; //if(LL(blk)(0).w_fill == BASKER_TRUE) { //Clear workspace, whole column for(Int sb = 0; sb < LL_size(blk); sb++) { - BASKER_MATRIX &SL = LL(blk)(sb); + BASKER_MATRIX &SL = LL[blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -369,10 +369,10 @@ namespace BaskerNS Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A for(Int p = 0; p < num_threads; p++) { - Int blk = S(error_sep_lvl)(p); + Int blk = S[error_sep_lvl][p]; //if(LL(blk)(0).w_fill == BASKER_TRUE) { - BASKER_MATRIX &TM = LL(blk)(0); + BASKER_MATRIX &TM = LL[blk][0]; //printf( " > p=%d: scol_top = %d, scol = %d, ncol = %d\n",p,scol_top,TM.scol,TM.ncol ); for(Int i = scol_top + TM.scol; i < scol_top + (TM.scol+TM.ncol); i++) { @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL(thread_array(ti).error_blk)(0); + LL[thread_array[ti].error_blk][0]; //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -394,12 +394,12 @@ namespace BaskerNS gperm(i) = BASKER_MAX_IDX; }//for--to clear perm - thread_start(ti) = thread_array(ti).error_blk; + thread_start(ti) = thread_array[ti].error_blk; //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; for(Int i = 0; i < num_threads; i++) { @@ -451,9 +451,9 @@ namespace BaskerNS Int btab = btf_tabs_offset; for(Int ti = 0; ti < num_threads; ti++) { - Int c = thread_array(ti).error_blk; + Int c = thread_array[ti].error_blk; //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { if (c >= btab) { thread_start(ti) = BASKER_MAX_IDX; @@ -463,7 +463,7 @@ namespace BaskerNS continue; }//end if NOERROR - if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { @@ -474,7 +474,7 @@ namespace BaskerNS return BASKER_ERROR; }//end if SINGULAR - if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { std::cout << "ERROR_THREADS: " << ti << " DIAGBLK NOMALLOC blk=" << c @@ -482,16 +482,16 @@ namespace BaskerNS return BASKER_ERROR; }//end if NOMALLOC - if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - Int liwork = thread_array(ti).iws_size*thread_array(ti).iws_mult; - Int lework = thread_array(ti).ews_size*thread_array(ti).ews_mult; + Int liwork = thread_array[ti].iws_size*thread_array[ti].iws_mult; + Int lework = thread_array[ti].ews_size*thread_array[ti].ews_mult; BASKER_ASSERT(c >= 0, "nfactor_diag_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti << " DIAGBLK MALLOC blk=" << c - << " newsize=" << thread_array(ti).error_info + << " newsize=" << thread_array[ti].error_info << " for both L( " << c << " ) and U( " << c << " )" << std::endl; @@ -504,24 +504,24 @@ namespace BaskerNS for(Int i = 0; i < liwork; i++) { - thread_array(ti).iws(i) = (Int) 0; + thread_array[ti].iws(i) = (Int) 0; } for(Int i = 0; i < lework; i++) { - thread_array(ti).ews(i) = zero; + thread_array[ti].ews(i) = zero; } //Resize L - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, - thread_array(ti).error_info); + thread_array[ti].error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, - thread_array(ti).error_info); - L.mnnz = thread_array(ti).error_info; - L.nnz = thread_array(ti).error_info; + thread_array[ti].error_info); + L.mnnz = thread_array[ti].error_info; + L.nnz = thread_array[ti].error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; @@ -533,15 +533,15 @@ namespace BaskerNS } //Resize U - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, - thread_array(ti).error_info); + thread_array[ti].error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, - thread_array(ti).error_info); - U.mnnz = thread_array(ti).error_info; - U.nnz = thread_array(ti).error_info; + thread_array[ti].error_info); + U.mnnz = thread_array[ti].error_info; + U.nnz = thread_array[ti].error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; @@ -561,9 +561,9 @@ namespace BaskerNS } //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; nthread_remalloc++; @@ -593,7 +593,7 @@ namespace BaskerNS { for(Int ti = 0; ti < num_threads; ti++) { - thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp index 02a896d957c0..4bbd86507d9d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp @@ -95,6 +95,9 @@ namespace BaskerNS BASKER_INLINE int fill(); + BASKER_INLINE + void init_ptr(); + BASKER_INLINE void init_inc_lvl(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp index 4f12887c87ed..e40361e6f988 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp @@ -328,7 +328,7 @@ namespace BaskerNS if(nnz == _nnz) { copy_vec(_row_idx, _nnz, row_idx); - copy_vec(_val,_nnz, val); + copy_vec(_val, _nnz, val); } else { @@ -498,6 +498,13 @@ namespace BaskerNS return 0; } + template + BASKER_INLINE + void BaskerMatrix::init_ptr() + { + for (Int i = 0; i < ncol+1; i ++) col_ptr(i) = 0; + } + template BASKER_INLINE void BaskerMatrix::convert2D diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 499e00edd417..6613d992dbc2 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -150,14 +150,14 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL(b)(0); - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); - BASKER_MATRIX &M = ALM(b)(0); //A->blk + BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &M = ALM[b][0]; //A->blk #ifdef BASKER_2DL //printf("Accessing blk: %d kid: %d \n", b, kid); - INT_1DARRAY ws = LL(b)(0).iws; - ENTRY_1DARRAY X = LL(b)(0).ews; - Int ws_size = LL(b)(0).iws_size; + INT_1DARRAY ws = LL[b][0].iws; + ENTRY_1DARRAY X = LL[b][0].ews; + Int ws_size = LL[b][0].iws_size; #else //else if BASKER_2DL INT_1DARRAY ws = thread_array[kid].iws; ENTRY_1DARRAY X = thread_array[kid].ews; @@ -577,11 +577,11 @@ namespace BaskerNS } } if (!explicit_pivot) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; - thread_array(kid).error_info = k; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; + thread_array[kid].error_info = k; return BASKER_ERROR; } } @@ -676,17 +676,17 @@ namespace BaskerNS (int)kid, (long)b, (long)llnnz, (long)lnnz, (long)lcnt, (int)lnnz, (int)M.nrow, (long)newsize); } - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -701,17 +701,17 @@ namespace BaskerNS (int)kid, (long)b, (long)uunnz, (long)unnz+ucnt, (long)k, (int)uunnz, (int)M.nrow, (int)newsize); } - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = -1; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -981,10 +981,10 @@ namespace BaskerNS ) { //Setup variables - const Int wsb = S(0)(kid); + const Int wsb = S[0][kid]; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1011,18 +1011,18 @@ namespace BaskerNS ) { const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A - const Int b = S(lvl)(kid); + const Int b = S[lvl][kid]; //const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - const Int U_col = S(lvl)(kid); + BASKER_MATRIX &L = LL[b][0]; + const Int U_col = S[lvl][kid]; Int U_row = LU_size(U_col)-1; if(lvl > 0) { //U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); } - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int brow = L.srow; @@ -1128,14 +1128,14 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -1279,12 +1279,12 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; #else INT_1DARRAY ws = thread_array[kid].iws; Int ws_size = thread_array[kid].iws_size; @@ -1452,13 +1452,13 @@ namespace BaskerNS Int k, Int top, Int xnnz) { - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + Int ws_size = LL[wsb][l].iws_size; #else INT_1DARRAY ws = thread_array[kid].iws; ENTRY_1DARRAY X = thread_array[kid].ews; @@ -1534,10 +1534,10 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; //const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; @@ -1608,7 +1608,7 @@ namespace BaskerNS #endif //LL[X_col][X_row].p_size = 0; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_dense_offdiag_mov_L() @@ -1623,12 +1623,12 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; - const Int p_size = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; + const Int p_size = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -1658,17 +1658,17 @@ namespace BaskerNS (long)blkcol, (long)blkrow, (long)kid, (long)llnnz, (long)lnnz, (long)p_size ); } - thread_array(kid).error_blk = blkcol; - thread_array(kid).error_subblk = blkrow; + thread_array[kid].error_blk = blkcol; + thread_array[kid].error_subblk = blkrow; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } //BASKER_ASSERT(0==1, "REALLOC LOWER BLOCK\n"); @@ -1714,7 +1714,7 @@ namespace BaskerNS } #endif - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_offdiag_mov_L() @@ -1733,17 +1733,17 @@ namespace BaskerNS BASKER_BOOL A_option) { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //printf( " t_dense_back_solve_offdiag( LL(%d,%d) and ALM(%d,%d)\n", blkcol,blkrow,blkcol,blkrow ); #ifdef BASKER_DEBUG_NFACTOR_BLK - Int ws_size = LL(X_col)(X_row).iws_size; + Int ws_size = LL[X_col][X_row].iws_size; const Int brow = L.srow; const Int bcol = L.scol; printf("\n\n"); @@ -1832,7 +1832,7 @@ namespace BaskerNS #ifdef BASKER_2DL //LL[X_col][X_row].p_size = nnz; - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug @@ -1878,14 +1878,14 @@ namespace BaskerNS { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int ws_size = LL[X_col][X_row].iws_size; + Int nnz = LL[X_col][X_row].p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -2057,7 +2057,7 @@ namespace BaskerNS printf("kid %d Ending nnz: %d \n",kid, nnz); #endif //LL[X_col][X_row].p_size = nnz; - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 1fb5dc3fcc2b..48dae30f95c9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -130,14 +130,14 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_inc_lvl(Int kid) { - Int b = S(0)(kid); //Which blk from schedule - BASKER_MATRIX &L = LL(b)(0); - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); - BASKER_MATRIX &M = ALM(b)(0); //A->blk + Int b = S[0][kid]; //Which blk from schedule + BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &M = ALM[b][0]; //A->blk - INT_1DARRAY ws = LL(b)(0).iws; - ENTRY_1DARRAY X = LL(b)(0).ews; - Int ws_size = LL(b)(0).iws_size; + INT_1DARRAY ws = LL[b][0].iws; + ENTRY_1DARRAY X = LL[b][0].ews; + Int ws_size = LL[b][0].iws_size; Int brow = L.srow; //begining row Int lval = 0; @@ -384,10 +384,10 @@ namespace BaskerNS << pivot << endl; cout << "lcnt: " << lcnt << endl; } - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = b; - thread_array(kid).error_info = k; + thread_array[kid].error_blk = b; + thread_array[kid].error_info = k; return BASKER_ERROR; } @@ -410,17 +410,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -441,17 +441,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -665,13 +665,13 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int brow = L.srow; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -936,12 +936,12 @@ namespace BaskerNS ) { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -985,13 +985,13 @@ namespace BaskerNS //Will want to make this backward in the future //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int brow = L.srow; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1353,12 +1353,12 @@ namespace BaskerNS //We note that this can be fixed to be faster - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - const Int ws_size = LL(wsb)(l).iws_size; + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + const Int ws_size = LL[wsb][l].iws_size; Int brow = L.srow; @@ -1441,12 +1441,12 @@ namespace BaskerNS { //We note that this can be fixed to be faster - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - const Int ws_size = LL(wsb)(l).iws_size; + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + const Int ws_size = LL[wsb][l].iws_size; Int brow = L.srow; Int *color = &(ws(0)); @@ -1555,14 +1555,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK printf("t_back_solve_diag, kid: %d blkcol: %d blkrow: %d \n", @@ -1696,7 +1696,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif return; @@ -1717,14 +1717,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //Int brow = L.srow; //Int bcol = L.scol; @@ -1869,14 +1869,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; Int brow = L.srow; Int bcol = L.scol; @@ -2065,12 +2065,12 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; - const Int p_size = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; + const Int p_size = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -2105,18 +2105,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = blkcol; - thread_array(kid).error_subblk = blkrow; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = blkcol; + thread_array[kid].error_subblk = blkrow; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -2155,14 +2155,14 @@ namespace BaskerNS //Fix later if(Options.same_pattern == BASKER_FALSE) { - for(Int i = 0; i < LL(X_col)(X_row).nrow; i++) + for(Int i = 0; i < LL[X_col][X_row].nrow; i++) { stack[i] = BASKER_MAX_IDX; } } L.col_ptr(k+1) = lnnz; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_offdiag_mov_L_inc_lvl() @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; /* @@ -2740,27 +2740,27 @@ namespace BaskerNS LP_col, LP_row, kid); */ - BASKER_MATRIX *UPP = &LU(UP_col)(0); + BASKER_MATRIX *UPP = &LU[UP_col][0]; if(UP_row != BASKER_MAX_IDX) { - UPP = &(LU(UP_col)(UP_row)); + UPP = &(LU[UP_col][UP_row]); } BASKER_MATRIX &UP = *(UPP); - BASKER_MATRIX *LPP = &LU(LP_col)(0); + BASKER_MATRIX *LPP = &LU[LP_col][0]; if(LP_row != BASKER_MAX_IDX) { - LPP = &(LL(LP_col)(LP_row)); + LPP = &(LL[LP_col][LP_row]); } BASKER_MATRIX &LP = *(LPP); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; @@ -2948,7 +2948,7 @@ namespace BaskerNS }//over all nonzero in left - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; return; @@ -2969,14 +2969,14 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -3106,7 +3106,7 @@ namespace BaskerNS */ - Int temp = INC_LVL_TEMP(k_i+LL(blkcol)(0).srow) + L.inc_lvl(j) + 1; + Int temp = INC_LVL_TEMP(k_i+LL[blkcol][0].srow) + L.inc_lvl(j) + 1; /* printf("lower row: %d kid: %d inc: %d %d %d j: %d \n", @@ -3183,7 +3183,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug @@ -3219,11 +3219,11 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; //const Int p_size = LL(X_col)(X_row).p_size; //NDE - warning: unused @@ -3296,7 +3296,7 @@ namespace BaskerNS } L.col_ptr(k+1) = lnnz; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_dense_offdiag_mov_L_inv_lvl() @@ -3315,12 +3315,12 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; @@ -3439,11 +3439,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3576,11 +3576,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3758,16 +3758,16 @@ namespace BaskerNS BASKER_MATRIX *B; if(lower == BASKER_TRUE) { - B = &(ALM(blkcol)(blkrow)); + B = &(ALM[blkcol][blkrow]); } else { - B = &(AVM(blkcol)(blkrow)); + B = &(AVM[blkcol][blkrow]); } BASKER_MATRIX &M = *B; //BASKER_MATRIX &M = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -3840,9 +3840,9 @@ namespace BaskerNS ) { - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; const Int team_leader = find_leader(kid,sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //Int loop_col_idx = S(l)(kid); //printf("Reduce col fill called, kid: %d leader: %d \n", @@ -3857,12 +3857,12 @@ namespace BaskerNS for(Int blk = l+1; blk < endblk; ++blk) { // ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; + INT_1DARRAY &wsL = LL[leader_idx][blk].iws; //Int p_sizeL = LL(leader_idx)(blk).p_size; - Int ws_sizeL = LL(leader_idx)(blk).iws_size; + Int ws_sizeL = LL[leader_idx][blk].iws_size; // ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &ws = LL(my_idx)(blk).iws; - const Int ws_size = LL(my_idx)(blk).iws_size; + INT_1DARRAY &ws = LL[my_idx][blk].iws; + const Int ws_size = LL[my_idx][blk].iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -3875,7 +3875,7 @@ namespace BaskerNS Int *stackL = &(patternL[ws_sizeL]); //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { //if(kid==3) // { @@ -3941,12 +3941,12 @@ namespace BaskerNS //printf("===========T ADD ORIG FILL CALLED\n"); const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; //Int X_col = S(0)(leader_id); - Int X_col = S(0)(kid); + Int X_col = S[0][kid]; Int X_row = l+1; @@ -3977,7 +3977,7 @@ namespace BaskerNS //Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; //printf("=***== fill MY ID: %d LEADER ID: %d ===** \n", @@ -3987,7 +3987,7 @@ namespace BaskerNS { Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; /* printf("leader_id: %d kid: %d lvl: %d l: %d blk: %d %d \n", @@ -3996,16 +3996,16 @@ namespace BaskerNS */ Int my_row_leader = find_leader(kid, lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; - Int A_row = (lvl==l)?(2):S(bl)(kid)%(LU_size(A_col)); - if((S(bl)(kid)>14) && - (S(bl)(kid)>LU_size(A_col)) && + Int A_row = (lvl==l)?(2):S[bl][kid]%(LU_size(A_col)); + if((S[bl](kid)>14) && + (S[bl](kid)>LU_size(A_col)) && (lvl != 1)) { - Int tm = (S(bl)(kid)+1)/16; - A_row = ((S(bl)(kid)+1)-(tm*16))%LU_size(A_col); + Int tm = (S[bl][kid]+1)/16; + A_row = ((S[bl][kid]+1)-(tm*16))%LU_size(A_col); } /* diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 650bc77a8de6..9c77c1f38994 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -134,12 +134,12 @@ namespace BaskerNS double barrier_time = 0; #endif - Int U_col = S(lvl)(kid); + Int U_col = S[lvl][kid]; Int U_row = 0; - const Int scol = LU(U_col)(U_row).scol; - const Int ecol = LU(U_col)(U_row).ecol; - const Int ncol = LU(U_col)(U_row).ncol; + const Int scol = LU[U_col][U_row].scol; + const Int ecol = LU[U_col][U_row].ecol; + const Int ncol = LU[U_col][U_row].ncol; //for(Int k = scol; k < ecol; k++) //might have to use k+scol for barrier @@ -460,15 +460,15 @@ namespace BaskerNS const Entry zero (0.0); //Get needed variables - const Int L_col = S(l)(kid); - const Int U_col = S(lvl)(kid); + const Int L_col = S[l][kid]; + const Int U_col = S[lvl][kid]; - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; //Int my_new_row = // L_col - my_row_leader; Int U_row = L_col - my_row_leader; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -488,7 +488,7 @@ namespace BaskerNS //if(sep_flg == BASKER_FALSE) if(l == 0) { - Bp = &(AVM(U_col)(U_row)); + Bp = &(AVM[U_col][U_row]); //bbcol = Bp->scol; } else @@ -503,9 +503,9 @@ namespace BaskerNS // kid, X_col, X_row); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -649,17 +649,17 @@ namespace BaskerNS Int newsize = (unnz+U.nrow) * 1.2 ; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; }//if/else realloc } @@ -741,10 +741,10 @@ namespace BaskerNS std::cout << "----Error--- kid = " << kid << ": extra L[" << j << "]=" << X[j] << " with gperm( " << brow_g << " + " << j << " ) = " << t << std::endl; - thread_array(kid).error_type = BASKER_ERROR_OTHER; - thread_array(kid).error_blk = lvl; - thread_array(kid).error_subblk = l; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_OTHER; + thread_array[kid].error_blk = lvl; + thread_array[kid].error_subblk = l; + thread_array[kid].error_info = k; info = BASKER_ERROR; //BASKER_ASSERT(t != BASKER_MAX_IDX, "lower entry in U"); #endif @@ -864,14 +864,14 @@ namespace BaskerNS int lteam_size = pow(2, l); #ifdef BASKER_2DL - Int L_col = S(l)(my_leader); + Int L_col = S[l][my_leader]; Int L_row = 0; - Int U_col = S(lvl)(kid); - Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); - Int X_col = S(0)(my_leader); + Int U_col = S[lvl][kid]; + Int U_row = (lvl==1)?(kid%2):S[l][kid]%LU_size(U_col); + Int X_col = S[0][my_leader]; Int X_row = l; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; const Int bcol = U.scol; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only work with with 2D layout"); @@ -1066,11 +1066,11 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; //Get needed variables - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = 0; - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; const Int U_row = LU_size(U_col)-1; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; //Int col_idx_offset = 0; //can we get rid of now? #ifdef BASKER_DEBUG_NFACTOR_COL @@ -1080,10 +1080,10 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array(kid).C; + BASKER_MATRIX &B = thread_array[kid].C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1098,9 +1098,9 @@ namespace BaskerNS //B.print(); - INT_1DARRAY ws = LL(X_col)(l+1).iws; - const Int ws_size = LL(X_col)(l+1).iws_size; - ENTRY_1DARRAY X = LL(X_col)(l+1).ews; + INT_1DARRAY ws = LL[X_col][l+1].iws; + const Int ws_size = LL[X_col][l+1].iws_size; + ENTRY_1DARRAY X = LL[X_col][l+1].ews; Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -1327,10 +1327,10 @@ namespace BaskerNS X(maxindex) = pivot; } else { // replace-tiny-pivot not requested, or the current column is structurally empty after elimination - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = k; return BASKER_ERROR; } } else if (Options.replace_tiny_pivot && normA_blk > abs(zero) && abs(pivot) < normA_blk * sqrt(eps)) { @@ -1374,17 +1374,17 @@ namespace BaskerNS //cout << " > L_col = " << L_col << " L_row = " << L_row << endl; } - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -1399,17 +1399,17 @@ namespace BaskerNS << endl; } - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -1640,20 +1640,20 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); //U.fill(); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; //U.fill(); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int bcol = U.scol; @@ -1743,15 +1743,15 @@ namespace BaskerNS //Setup - Int A_col = S(lvl)(kid); - Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); + Int A_col = S[lvl][kid]; + Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); - BASKER_MATRIX &B = AVM(A_col)(A_col); + BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; team_leader = find_leader(kid, l); - const Int leader_idx = S(0)(team_leader); - Int loop_col_idx = S(l)(kid); + const Int leader_idx = S[0][team_leader]; + Int loop_col_idx = S[l][kid]; #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -1769,17 +1769,17 @@ namespace BaskerNS //Split over threads (leader and nonleader) for(Int blk=l+1; blk Accumulate the update from (l-1)th level: // LU(U_col)(U_row) -= L(U_col)(l-1) * U(l-1)(U_row) t_add_extend(thread, kid, lvl, l-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_FALSE); if(kid%((Int)pow(2, l)) == 0) @@ -248,9 +248,9 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier(thread, kid, my_leader, - b_size, 3, LU(U_col)(U_row).scol, 0); + b_size, 3, LU[U_col][U_row].scol, 0); for(Int ti = 0; ti < num_threads; ti++) { - if (thread_array(kid).error_type != BASKER_SUCCESS) { + if (thread_array[kid].error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -287,7 +287,7 @@ namespace BaskerNS printf( " kid=%d: calling t_add_extend(k=%d/%d)\n",kid,k,ncol ); fflush(stdout); #endif t_add_extend(thread, kid,lvl,lvl-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_TRUE); } #ifdef BASKER_TIMER @@ -336,7 +336,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 4, k, lvl-1); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array(tid).error_type != BASKER_SUCCESS) { + if (thread_array[tid].error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -395,7 +395,7 @@ namespace BaskerNS #ifdef BASKER_TIMER double time_factot = timer.seconds(); if((kid%(Int)(pow(2,lvl))) == 0) { - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = LU_size(U_col)-1; printf("Time Lower-Col(%d): %lf, n = %d, nnz(L) = %d, nnz(U) = %d \n", (int)kid, time_factot, @@ -446,7 +446,7 @@ namespace BaskerNS #endif //This will do the correct spmv - if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_upper_col_factor_offdiag2(kid, lvl, sl,l, k, lower); } //Barrier--Start @@ -461,7 +461,7 @@ namespace BaskerNS //Barrier--End if(kid%((Int)pow(2,sl)) == 0 && - thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_dense_blk_col_copy_atomic2(kid, my_leader, lvl, sl, l, k, lower); } @@ -477,7 +477,7 @@ namespace BaskerNS #endif }//over all sublevels - if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_dense_copy_update_matrix2(kid, my_leader, lvl, l, k); } }//end t_add_add @@ -507,15 +507,15 @@ namespace BaskerNS return; } - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); - const Int L_col = S(sl)(my_leader); - const Int U_col = S(lvl)(kid); - const Int X_col = S(0)(my_leader); + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + const Int L_col = S[sl][my_leader]; + const Int U_col = S[lvl][kid]; + const Int X_col = S[0][my_leader]; Int L_row = l-sl+1; //Might have to think about th Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; #ifdef BASKER_DEBUG_NFACTOR_COL2 if(L_row >= LL_size(L_col)) { @@ -588,10 +588,10 @@ namespace BaskerNS //Setup //printf("DEBUG, kid: %d k: %d A_col: %d A_row: %d \n", // kid, k, A_col, A_row); - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; #ifdef BASKER_DEBUG_NFACTOR_COL2 if(lower == BASKER_TRUE) { @@ -609,10 +609,10 @@ namespace BaskerNS Int endblk = (lower)?(LL_size(my_idx)):(l+2); for(Int blk = l+1; blk < endblk; ++blk) { - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; - Int p_sizeL = LL(leader_idx)(blk).p_size; - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + Int p_sizeL = LL[leader_idx][blk].p_size; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; Int *color = &(ws[0]); //printf( " + t_dense_blk_col_copy_atomic2(kid=%d: LL(%d)(%d) += LL(%d)(%d)\n",kid,leader_idx, blk,my_idx,blk); @@ -629,7 +629,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { color[jj] = 0; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -677,7 +677,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; } else { @@ -685,7 +685,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL(leader_idx)(blk).p_size = p_sizeL; + LL[leader_idx][blk].p_size = p_sizeL; //p_size = 0; //not needed }//over all blks } @@ -709,8 +709,8 @@ namespace BaskerNS //printf("\n\n\n\n"); const Entry zero (0.0); - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; //Over each blk @@ -724,10 +724,10 @@ namespace BaskerNS // X += B(:, k) { Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); - Int A_row = S(bl)(kid) - my_row_leader; + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + Int A_row = S[bl][kid] - my_row_leader; BASKER_MATRIX *Bp; if(A_row != (LU_size(A_col)-1)) @@ -735,12 +735,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("copy, kid: %d bl: %d A: %d %d \n", @@ -749,7 +749,7 @@ namespace BaskerNS // X += B(:, k) BASKER_MATRIX &B = *Bp; - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; //printf( " -- t_dense_copy_update_matrix2(kid=%d: LL(%d)(%d) += B)\n",kid,leader_idx,bl ); //printf("ADDING UPDATES TO B\n"); //B.info(); @@ -800,9 +800,9 @@ namespace BaskerNS //For recounting patterns in dense blk //Need better sparse update - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; - const Int nrow = LL(leader_idx)(bl).nrow; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; + const Int nrow = LL[leader_idx][bl].nrow; Int *color = &(ws(0)); #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("moving, kid: %d A: %d %d %d %d p_size: %d \n", @@ -875,18 +875,18 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); - const Int U_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; + const Int U_col = S[lvl][leader_id]; Int L_row = 0; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 1425385d9f2e..ee72c5d32c7b 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -83,7 +83,7 @@ namespace BaskerNS ) { - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int U_row = 0; //const Int scol = LU(U_col)(U_row).scol; @@ -101,7 +101,7 @@ namespace BaskerNS //for(Int k = 0; k < 1; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -148,7 +148,7 @@ namespace BaskerNS //barrier k = 0 usedl1 t_basker_barrier_inc_lvl(thread,kid,my_leader, - b_size, 0, LU(U_col)(U_row).scol, 0); + b_size, 0, LU[U_col][U_row].scol, 0); //printf("1 kid: %d error_leader: %d lvl: %d \n", kid, error_leader, lvl); BASKER_BOOL error_flag = BASKER_FALSE; basker_barrier.ExitGet(error_leader, error_flag); @@ -172,7 +172,7 @@ namespace BaskerNS { //for(Int k = 2; k < 3; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -181,7 +181,7 @@ namespace BaskerNS #endif t_add_extend_inc_lvl(thread, kid,lvl,l-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_FALSE); //where to start again @@ -234,7 +234,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier_inc_lvl(thread, kid, my_leader, - b_size, 7, LU(U_col)(U_row).scol, 0); + b_size, 7, LU[U_col][U_row].scol, 0); #ifdef BASKER_DEBUG_NFACTOR_COL_INC if(kid == 0) @@ -248,7 +248,7 @@ namespace BaskerNS //if(lvl < 2) { //for(Int k=0; k < 1; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -259,7 +259,7 @@ namespace BaskerNS //printf("test: %d \n", LU(U_col)(U_row).scol); t_add_extend_inc_lvl(thread, kid,lvl,lvl-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_TRUE); Entry pivot = 0; if((kid%(Int)(pow(2,lvl))) == 0) @@ -577,12 +577,12 @@ namespace BaskerNS ) { l = l+1; - Int my_token = S(l)(kid); + Int my_token = S[l][kid]; Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S(l)(my_loc) != my_token) + if(S[l][my_loc] != my_token) { my_loc++; break; @@ -615,14 +615,14 @@ namespace BaskerNS //Get needed variables - const Int L_col = S(l)(kid); + const Int L_col = S[l][kid]; // const Int L_row = 0; //NDE - warning: unused - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); //Int my_new_row = // L_col - S(0)(my_row_leader); - Int U_row = L_col - S(0)(my_row_leader); + Int U_row = L_col - S[0][my_row_leader]; /* Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); @@ -642,7 +642,7 @@ namespace BaskerNS //U_row = my_new_row; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -654,13 +654,13 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Ask C++ guru if this is ok BASKER_MATRIX *Bp; if(l == 0) { - Bp = &(AVM(U_col)(U_row)); + Bp = &(AVM[U_col][U_row]); } else { @@ -674,9 +674,9 @@ namespace BaskerNS // } //B.print(); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -920,18 +920,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { //printf("HERE\n"); - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; + thread_array[kid].error_info = newsize; return BASKER_ERROR; }//if/else realloc }//if need to realloc @@ -1086,26 +1086,26 @@ namespace BaskerNS // kid, lvl, sl, l); } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); - if((S(sl)(kid) > 14) && - (S(sl)(kid) > LU_size(U_col)) && + (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); + if((S[sl][kid] > 14) && + (S[sl][kid] > LU_size(U_col)) && (lvl != 1)) { //printf("lower offdiag new num, %d %d \n", // S(sl)(kid), LU_size(U_col)); - Int tm = (S(sl)(kid)+1)/16; - U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); + Int tm = (S[sl][kid]+1)/16; + U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); } //printf("UFF kid:%d U: %d %d new: %d leader: %d %d lvl: %d l: %d sl: %d \n", @@ -1116,12 +1116,12 @@ namespace BaskerNS //JDB PASS TEST U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us //Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1256,31 +1256,31 @@ namespace BaskerNS return; } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; Int U_row = 0; U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Need to give them the output pattern - Int U_pattern_col = S(lvl)(kid); + Int U_pattern_col = S[lvl][kid]; Int my_pattern_leader = find_leader_inc_lvl(kid,l); - Int U_pattern_row = S(l+1)(my_pattern_leader) - - S(0)(my_row_leader); + Int U_pattern_row = S[l+1][my_pattern_leader] - + S[0][my_row_leader]; /* printf("Test mypleader: %d myrowleader: %d kid: %d\n", @@ -1292,7 +1292,7 @@ namespace BaskerNS */ - Int L_pattern_col = S(lvl)(kid); + Int L_pattern_col = S[lvl][kid]; Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -1418,26 +1418,26 @@ namespace BaskerNS return; } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); - if((S(sl)(kid) > 14) && - (S(sl)(kid) > LU_size(U_col)) && + (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); + if((S[sl][kid] > 14) && + (S[sl][kid] > LU_size(U_col)) && (lvl != 1)) { - Int tm = (S(sl)(kid)+1)/16; - U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); + Int tm = (S[sl][kid]+1)/16; + U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); } // printf("lowerspmv kid: %d U: %d %d new %d leader: %d %d lvl: %d %d %d \n", @@ -1448,12 +1448,12 @@ namespace BaskerNS U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1538,8 +1538,8 @@ namespace BaskerNS ) { - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; // Int gbrow = 0; //NDE - warning: unused @@ -1549,11 +1549,11 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; Int A_row = 0; A_row = my_new_row; @@ -1564,12 +1564,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1580,10 +1580,10 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; Int *color = &(ws(0)); - LL(leader_idx)(bl).p_size = 0; + LL[leader_idx][bl].p_size = 0; //Get the columns pattern Int U_pattern_col = A_col; @@ -1606,7 +1606,7 @@ namespace BaskerNS //Copy into C - BASKER_MATRIX &Up = LU(U_pattern_col)(U_pattern_row); + BASKER_MATRIX &Up = LU[U_pattern_col][U_pattern_row]; for(Int i = Up.col_ptr(k); i < Up.col_ptr(k+1); i++) { const Int j = Up.row_idx(i); @@ -1620,7 +1620,7 @@ namespace BaskerNS //if there is a L if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &Lp = LL(L_pattern_col)(L_pattern_row); + BASKER_MATRIX &Lp = LL[L_pattern_col][L_pattern_row]; for(Int i = Lp.col_ptr(k)+1; i < Lp.col_ptr(k+1);i++) { const Int j = Lp.row_idx(i); @@ -1653,8 +1653,8 @@ namespace BaskerNS ) { - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; Int gbrow = 0; @@ -1672,24 +1672,24 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; //Int A_row = my_new_row; - Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); - if((S(bl)(kid) > 14) && - (S(bl)(kid) > LU_size(A_col)) && + Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); + if((S[bl][kid] > 14) && + (S[bl][kid] > LU_size(A_col)) && (lvl != 1)) { //printf("test cm %d %d %d \n", // kid, S(bl)(kid), LU_size(A_col)); - Int tm = (S(bl)(kid)+1)/16; - A_row = ((S(bl)(kid)+1) - (tm*16))%LU_size(A_col); + Int tm = (S[bl][kid]+1)/16; + A_row = ((S[bl][kid]+1) - (tm*16))%LU_size(A_col); } @@ -1708,12 +1708,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1724,8 +1724,8 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; //const Int brow = LL(leader_idx)(bl).srow; //const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; @@ -1789,11 +1789,11 @@ namespace BaskerNS //Int CM_idx = kid; - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; - const Int ws_size = LL(leader_idx)(bl).ews_size; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; + const Int ws_size = LL[leader_idx][bl].ews_size; // const Int brow = LL(leader_idx)(bl).srow; //NU //NDE - warning: unused - const Int nrow = LL(leader_idx)(bl).nrow; + const Int nrow = LL[leader_idx][bl].nrow; //Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk @@ -1883,12 +1883,12 @@ namespace BaskerNS ) { //Get needed variables - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = 0; - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; const Int U_row = LU_size(U_col)-1; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; //Int col_idx_offset = 0; //can we get rid of now? @@ -1902,10 +1902,10 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array(kid).C; + BASKER_MATRIX &B = thread_array[kid].C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1926,9 +1926,9 @@ namespace BaskerNS } */ - INT_1DARRAY ws = LL(X_col)(l+1).iws; - const Int ws_size = LL(X_col)(l+1).iws_size; - ENTRY_1DARRAY X = LL(X_col)(l+1).ews; + INT_1DARRAY ws = LL[X_col][l+1].iws; + const Int ws_size = LL[X_col][l+1].iws_size; + ENTRY_1DARRAY X = LL[X_col][l+1].ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -2201,17 +2201,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -2229,16 +2229,16 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -2462,20 +2462,20 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; + INT_1DARRAY ws = LL[X_col][X_row].iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; //const Int brow = U.srow; //const Int bcol = U.scol; @@ -2585,18 +2585,18 @@ namespace BaskerNS //const Int lteam_size = pow(2,l+1); //NDE - warning: unused // const Int L_col = S(lvl)(leader_id); //NDE - warning: unused // Int L_row = 0; //NDE - warning: unused - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; + INT_1DARRAY ws = LL[X_col][X_row].iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; if(kid == leader_id) { @@ -2621,11 +2621,11 @@ namespace BaskerNS const BASKER_BOOL lower ) { - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //If I an not a leader, then need to copy over if(kid != team_leader) @@ -2636,15 +2636,15 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused // Int p_sizeL = LL(leader_idx)(blk).p_size; //NDE - warning: unused // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; // const Int ws_size = LL(my_idx)(blk).iws_size; //NDE - warning: unused //Int p_size = LL(my_idx)(blk).p_size; - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; Int *color = &(ws[0]); // Int *pattern = &(color[ws_size]); //NDE - warning: unused // Int *stack = &(pattern[ws_size]); //NDE - warning: unused @@ -2682,7 +2682,7 @@ namespace BaskerNS - Int U_pattern_col = S(lvl)(kid); + Int U_pattern_col = S[lvl][kid]; Int U_pattern_row = BASKER_MAX_IDX; if(blk == l+1) @@ -2691,11 +2691,11 @@ namespace BaskerNS //S(0)(find_leader(kid,lvl)); //U_pattern_row = S(l+1)(kid) - //S(0)(my_pattern_leader); - U_pattern_row = S(l+1)(kid) - - S(0)(find_leader(kid,lvl-1)); + U_pattern_row = S[l+1][kid] - + S[0][find_leader(kid,lvl-1)]; } - Int L_pattern_col = S(lvl)(kid); + Int L_pattern_col = S[lvl][kid]; Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -2716,7 +2716,7 @@ namespace BaskerNS if(U_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &UP = LU(U_pattern_col)(U_pattern_row); + BASKER_MATRIX &UP = LU[U_pattern_col][U_pattern_row]; for(Int jj = UP.col_ptr(k); jj < UP.col_ptr(k+1); @@ -2730,7 +2730,7 @@ namespace BaskerNS }//if UPattern if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &LP = LL(L_pattern_col)(L_pattern_row); + BASKER_MATRIX &LP = LL[L_pattern_col][L_pattern_row]; for(Int jj = LP.col_ptr(k); jj < LP.col_ptr(k+1); jj++) @@ -2769,11 +2769,11 @@ namespace BaskerNS //BASKER_MATRIX &B = AVM(A_col)(A_col); - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //Int loop_col_idx = S(l)(kid); NU //#ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -2807,13 +2807,13 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused - Int p_sizeL = LL(leader_idx)(blk).p_size; + Int p_sizeL = LL[leader_idx][blk].p_size; // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; - const Int ws_size = LL(my_idx)(blk).iws_size; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; + const Int ws_size = LL[my_idx][blk].iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -2845,7 +2845,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { color[jj] = 0; @@ -2910,7 +2910,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; } else { @@ -2918,7 +2918,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL(leader_idx)(blk).p_size = p_sizeL; + LL[leader_idx][blk].p_size = p_sizeL; //p_size = 0; NOT USED }//over all blks } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index ccbd5a33b827..dc59708fe158 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -258,8 +258,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); Int k = btf_tabs(c); Int bcol = M.scol; @@ -294,9 +294,9 @@ namespace BaskerNS printf("Error: NaN diag in single factor\n"); } } - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } @@ -336,8 +336,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); Int bcol = M.scol; //JDB: brow hack: fix. @@ -373,9 +373,9 @@ namespace BaskerNS Mag rmin_ (0.0); //workspace - Int ws_size = thread_array(kid).iws_size; - INT_1DARRAY ws = thread_array(kid).iws; - ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + ENTRY_1DARRAY X = thread_array[kid].ews; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -580,9 +580,9 @@ namespace BaskerNS << " Column: " << k << std::endl; } - thread_array(kid).error_type = BASKER_ERROR_NAN; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_NAN; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } absv = abs(value); @@ -714,9 +714,9 @@ namespace BaskerNS pivot = normA_blk * eps; X(maxindex) = pivot; } else { - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } } @@ -780,16 +780,16 @@ namespace BaskerNS (long)btf_tabs(c), (long)btf_tabs(c+1), (long)(btf_tabs(c+1)-btf_tabs(c))); } - thread_array(kid).error_blk = c; + thread_array[kid].error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -804,16 +804,16 @@ namespace BaskerNS printf("blk: %ld column: %ld \n", (long)c, (long)k); } - thread_array(kid).error_blk = c; + thread_array[kid].error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -991,8 +991,8 @@ namespace BaskerNS ) { //printf("=======LOCAL REACH BTF SHORT CALLED (pattern[top=%d - 1] = %d) =====\n",(int)top, (int)j); - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,8 +1014,8 @@ namespace BaskerNS { //printf("=======LOCAL REACH BTF CALLED =====\n"); - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; /*{ printf("ws_size: %d \n", ws_size); @@ -1144,8 +1144,8 @@ namespace BaskerNS ) { - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; /* printf("ws_size: %d \n", ws_size); @@ -1289,9 +1289,9 @@ namespace BaskerNS { const Entry zero (0.0); - INT_1DARRAY ws = thread_array(kid).iws; - ENTRY_1DARRAY X = thread_array(kid).ews; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + ENTRY_1DARRAY X = thread_array[kid].ews; + Int ws_size = thread_array[kid].iws_size; Int brow = L.srow; Int *color = &(ws(0)); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp index 69d06a6bd72e..82ea04be3754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp @@ -1096,11 +1096,19 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2) find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS - kokkos_order_init_2D iO(this); - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this); + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + bool alloc = true; + //bool keep_zeros = true; + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else - //Comeback + //Comeback #endif #ifdef BASKER_TIMER double init_2d_time = scotch_timer.seconds(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index cc20d3b21e78..fd11208ea309 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -117,9 +117,11 @@ namespace BaskerNS // thread.team_rank()); Int kid = basker->t_get_kid(thread); #endif + printf( " * kokkos_sfactor_init_factor(%d) *\n",kid ); fflush(stdout); basker->t_init_factor(kid); + printf( " * kokkos_sfactor_init_factor(%d) done *\n",kid ); fflush(stdout); //This needs to be done earlier in ordering now //basker->t_init_2DA(kid); @@ -159,7 +161,7 @@ int Basker::sfactor() printf("Total NNZ: %ld \n", (long)global_nnz); printf(" > blk_matching = %d\n", (int)Options.blk_matching ); printf("----------------------------------\n"); - printf("\n"); + printf("\n"); fflush(stdout); } } @@ -169,28 +171,45 @@ int Basker::sfactor() } //Allocate Factorspace - //printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", - // btf_tabs_offset,allocate_nd_workspace); + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", + btf_tabs_offset,allocate_nd_workspace); fflush(stdout); + #endif if(btf_tabs_offset != 0 && allocate_nd_workspace) { #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif #else #endif } + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_workspace <<\n"); fflush(stdout); + #endif //if(btf_tabs_offset != 0) { //Allocate workspace #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_WORKSPACE typedef Kokkos::TeamPolicy TeamPolicy; kokkos_sfactor_init_workspace iWS(setup_flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(setup_flag, p); + } + #endif #endif } @@ -292,7 +311,9 @@ int Basker::sfactor() double time2 = 0.0; double time3 = 0.0; Kokkos::Timer timer1; + Kokkos::Timer timer2; timer.reset(); + timer2.reset(); #endif //split_num = num_threads/2; @@ -303,7 +324,7 @@ int Basker::sfactor() printf("\n --------------- OVER DOMS ---------------\n"); printf("\n"); } - #define SHYLU_BASKER_STREE_LIST + //#define SHYLU_BASKER_STREE_LIST std::vector stree_list (num_threads); #ifdef SHYLU_BASKER_STREE_LIST Kokkos::parallel_for( @@ -313,7 +334,7 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) #endif { - Int blk = S(0)(p); + Int blk = S[0][p]; if(Options.verbose == BASKER_TRUE) { printf(" ============= DOMAIN BLK (p=%d) ============\n",(int)p); @@ -323,34 +344,34 @@ int Basker::sfactor() //printf("\n\n STREE SIZE: %d \n", AL[blk][0].ncol); //printf("Here 0\n"); //Find nnz_counts for leafs - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; - e_tree (ALM(blk)(0), stree_p, 1); + e_tree (ALM[blk][0], stree_p, 1); #else - e_tree (ALM(blk)(0), stree, 1); + e_tree (ALM[blk][0], stree, 1); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - post_order(ALM(blk)(0), stree_p); + post_order(ALM[blk][0], stree_p); #else - post_order(ALM(blk)(0), stree); + post_order(ALM[blk][0], stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - col_count (ALM(blk)(0), stree_p); + col_count (ALM[blk][0], stree_p); #else - col_count (ALM(blk)(0), stree); + col_count (ALM[blk][0], stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif @@ -362,17 +383,17 @@ int Basker::sfactor() printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - leaf_assign_nnz(LL(blk)(0), stree_p, 0); - leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree_p, 0); + leaf_assign_nnz(LL[blk][0], stree_p, 0); + leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree_p, 0); #else - leaf_assign_nnz(LL(blk)(0), stree, 0); - leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); + leaf_assign_nnz(LL[blk][0], stree, 0); + leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree, 0); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); #endif } @@ -380,22 +401,26 @@ int Basker::sfactor() ); Kokkos::fence(); #endif + #ifdef BASKER_TIMER + double dom_time = timer2.seconds(); + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl; + #endif for(Int p = 0; p < num_threads; ++p) { //Do off diag - Int blk = S(0)(p); + Int blk = S[0][p]; #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; #endif for(Int l =0; l < tree.nlvls; l++) { - Int U_col = S(l+1)(p); + Int U_col = S[l+1][p]; //Note: Need to think more about this flow //Should be subtracted by how many times in the //future - Int my_row_leader = S(0)(find_leader(p,l)); + Int my_row_leader = S[0][find_leader(p,l)]; //Int my_new_row = // blk - my_row_leader; Int U_row = blk-my_row_leader; @@ -416,10 +441,10 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM(U_col)(U_row), stree_p, + U_blk_sfactor(AVM[U_col][U_row], stree_p, gScol[l], gSrow[glvl], off_diag); #else - U_blk_sfactor(AVM(U_col)(U_row), stree, + U_blk_sfactor(AVM[U_col][U_row], stree, gScol[l], gSrow[glvl], off_diag); #endif #ifdef BASKER_TIMER @@ -448,11 +473,11 @@ int Basker::sfactor() //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); - L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); + L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); #else - U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); - L_assign_nnz(LL(blk)(l+1), stree, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); + L_assign_nnz(LL[blk][l+1], stree, fill_factor, 0); #endif #ifdef BASKER_TIMER time2 += timer1.seconds(); @@ -484,13 +509,17 @@ int Basker::sfactor() //over all the seps in a lvle #ifdef SHYLU_BASKER_STREE_LIST + //printf( " parallel for \n" ); Kokkos::parallel_for( "permute_col", p, KOKKOS_LAMBDA(const int pp) #else + //printf( " serial for \n" ); for(Int pp = 0; pp < p; pp++) #endif { - //printf( " -- level = %d separator = %d --\n",lvl,pp ); + #ifdef BASKER_TIMER + printf( " -- level = %d/%d separator = %d/%d --\n",lvl,tree.nlvls, pp,p ); fflush(stdout); + #endif //S blks Int ppp; ppp = pp*pow(tree.nparts, lvl+1); @@ -505,43 +534,50 @@ int Basker::sfactor() (long)U_col, (long)U_row, (long)lvl, (long)pp); #endif - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, //gScol[lvl], gSrow[pp]); - //printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); + #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; - S_blk_sfactor(ALM(U_col)(U_row), stree_p, - gScol(lvl), gSrow(pp)); + S_blk_sfactor(ALM[U_col][U_row], stree_p, + gScol[lvl], gSrow[pp]); #else - S_blk_sfactor(ALM(U_col)(U_row), stree, - gScol(lvl), gSrow(pp)); + S_blk_sfactor(ALM[U_col][U_row], stree, + gScol[lvl], gSrow[pp]); + #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); #endif - //printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); //S_assign_nnz(LL[U_col][U_row], stree, 0); if(Options.verbose == BASKER_TRUE) { - printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); + printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LL(U_col)(U_row), stree_p, 0); + S_assign_nnz(LL[U_col][U_row], stree_p, 0); #else - S_assign_nnz(LL(U_col)(U_row), stree, 0); + S_assign_nnz(LL[U_col][U_row], stree, 0); #endif //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { - printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); + printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); + S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree_p, 0); #else - S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); + S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree, 0); + #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",LU[U_col][LU_size(U_col)-1].nnz); fflush(stdout); #endif } #ifdef SHYLU_BASKER_STREE_LIST @@ -557,19 +593,20 @@ int Basker::sfactor() Int ppp; ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S(l+1)(ppp); - U_row = S(lvl+1)(ppp)%LU_size(U_col); + //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); + U_col = S[l+1][ppp]; + U_row = S[lvl+1][ppp]%LU_size(U_col); - Int my_row_leader = S(0)(find_leader(ppp,l)); + Int my_row_leader = S[0][find_leader(ppp,l)]; //Int my_new_row = // S(lvl+1)(ppp) - my_row_leader; - U_row = S(lvl+1)(ppp) - my_row_leader; + U_row = S[lvl+1][ppp] - my_row_leader; #ifdef BASKER_DEBUG_SFACTOR printf("offida sep, lvl: %d l: %d U_col: %d U_row: %d \n", lvl, l, U_col, U_row); @@ -578,11 +615,11 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM(U_col)(U_row), stree_p, - gScol(l), gSrow(pp), off_diag); + U_blk_sfactor(AVM[U_col][U_row], stree_p, + gScol[l], gSrow[pp], off_diag); #else - U_blk_sfactor(AVM(U_col)(U_row), stree, - gScol(l), gSrow(pp), off_diag); + U_blk_sfactor(AVM[U_col][U_row], stree, + gScol[l], gSrow[pp], off_diag); #endif //In symmetric will not need @@ -598,14 +635,15 @@ int Basker::sfactor() { printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + fflush(stdout); } double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); - L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); + L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); #else - U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); - L_assign_nnz(LL(inner_blk)(l-lvl), stree, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); + L_assign_nnz(LL[inner_blk][l-lvl], stree, fill_factor, 0); #endif //printf("Here 1 \n"); } @@ -625,6 +663,9 @@ int Basker::sfactor() FREE(gScol); FREE(gSrow); + #ifdef BASKER_TIMER + std::cout << " >> symmetric_sfactor done << " << std::endl; + #endif return 0; }//end symmetric_symbolic() @@ -1151,7 +1192,6 @@ int Basker::sfactor() BASKER_SYMBOLIC_TREE &ST ) { -printf( " col_count:: view \n" ); //Still like to find a way to do this without transpose BASKER_MATRIX Mt; matrix_transpose(MV, Mt); @@ -2419,6 +2459,9 @@ printf( " col_count:: view \n" ); //printf("number of blks: %d \n", // btf_nblks-btf_tabs_offset); #endif + #ifdef BASKER_TIMER + printf( " > btf_last_dense(%s) <\n",(flag ? "true" : "false") ); fflush(stdout); + #endif Int max_blk_size = 0; #if defined(BASKER_SPLIT_A) @@ -2440,7 +2483,7 @@ printf( " col_count:: view \n" ); nnz = lblk_size*lblk_size; } //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - L_D(i).init_matrix("LBFT", + L_D[i].init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2448,9 +2491,9 @@ printf( " col_count:: view \n" ); nnz); //For pruning - L_D(i).init_pend(); + L_D[i].init_pend(); - U_D(i).init_matrix("UBFT", + U_D[i].init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2459,6 +2502,9 @@ printf( " col_count:: view \n" ); }//over all blks } #endif + #ifdef BASKER_TIMER + printf( " > top blocks done <\n" ); fflush(stdout); + #endif //Malloc L and U #ifdef BASKER_DEBUG_SFACTOR @@ -2487,7 +2533,7 @@ printf( " col_count:: view \n" ); nnz = lblk_size*lblk_size; } //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - LBTF(i-btf_tabs_offset).init_matrix("LBFT", + LBTF[i-btf_tabs_offset].init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2496,10 +2542,10 @@ printf( " col_count:: view \n" ); //For pruning //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); - LBTF(i-btf_tabs_offset).init_pend(); + LBTF[i-btf_tabs_offset].init_pend(); //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - UBTF(i-btf_tabs_offset).init_matrix("UBFT", + UBTF[i-btf_tabs_offset].init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2511,6 +2557,9 @@ printf( " col_count:: view \n" ); //MALLOC workspace }//over all blks } + #ifdef BASKER_TIMER + printf( " > left blocks done <\n" ); fflush(stdout); + #endif //JDB: This needs to be fixed max_blk_size = BTF_D.nrow + BTF_C.nrow; @@ -2525,14 +2574,14 @@ printf( " col_count:: view \n" ); for(Int i = 0 ; i < num_threads; i++) { - thread_array(i).iws_size = max_blk_size; - thread_array(i).ews_size = max_blk_size; + thread_array[i].iws_size = max_blk_size; + thread_array[i].ews_size = max_blk_size; //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); + MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); } #ifdef BASKER_DEBUG_SFACTOR printf("Malloc Thread: %d iws: %d \n", @@ -2545,8 +2594,12 @@ printf( " col_count:: view \n" ); } } + #ifdef BASKER_TIMER + printf( " > btf_last_dense done <\n" ); + #endif }//end btf_last_dense() }//end namespace Bakser +#undef BASKER_TIMER #endif//endif BASKER_SFACTOR_NEWFRM_HPP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 64c041a6536c..ec7774a43f13 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -99,20 +99,20 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { - Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM(blk)(0), - LL(blk)(0), - LU(blk)(LU_size(blk)-1)); + Int blk = S[0][p]; + sfactor_nd_dom_estimate(ALM[blk][0], + LL[blk][0], + LU[blk][LU_size(blk)-1]); for(Int l=0; l < tree.nlvls; l++) { - Int U_col = S(l+1)(p); + Int U_col = S[l+1][p]; Int my_row_leader = find_leader(p,l); Int my_new_row = - blk - S(0)(my_row_leader); + blk - S[0][my_row_leader]; - Int U_row = (l==0)?(p%2):S(0)(p)%LU_size(U_col); + Int U_row = (l==0)?(p%2):S[0][p]%LU_size(U_col); if((blk > 14) && (blk > LU_size(U_col)) && (l!=0)) @@ -124,11 +124,11 @@ namespace BaskerNS //JDB TEST PASSED U_row = my_new_row; - sfactor_nd_upper_estimate(AVM(U_col)(U_row), - LU(U_col)(U_row)); + sfactor_nd_upper_estimate(AVM[U_col][U_row], + LU[U_col][U_row]); - sfactor_nd_lower_estimate(ALM(blk)(l+1), - LL(blk)(l+1)); + sfactor_nd_lower_estimate(ALM[blk][l+1], + LL[blk][l+1]); } // end for l @@ -138,41 +138,41 @@ namespace BaskerNS for(Int pp=0; pp < pow(tree.nparts, tree.nlvls-lvl-1); pp++) { Int ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; - sfactor_nd_sep_estimate(ALM(U_col)(U_row), - LL(U_col)(U_row), - LU(U_col)(LU_size(U_col)-1)); + sfactor_nd_sep_estimate(ALM[U_col][U_row], + LL[U_col][U_row], + LU[U_col][LU_size(U_col)-1]); Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S(l+1)(ppp); + U_col = S[l+1][ppp]; Int my_row_leader = find_leader(ppp,l); Int my_new_row = - S(lvl+1)(ppp) - S(0)(my_row_leader); + S[lvl+1][ppp] - S[0][my_row_leader]; - U_row = S(lvl+1)(ppp)%LU_size(U_col); - if((S(lvl+1)(ppp) > 14) && - (S(lvl+1)(ppp) > LU_size(U_col)) + U_row = S[lvl+1][ppp]%LU_size(U_col); + if((S[lvl+1][ppp] > 14) && + (S[lvl+1][ppp] > LU_size(U_col)) ) { - Int tm = (S(lvl+1)(ppp)+1)/16; - U_row = ((S(lvl+1)(ppp)+1) - + Int tm = (S[lvl+1][ppp]+1)/16; + U_row = ((S[lvl+1][ppp]+1) - (tm*16))%LU_size(U_col); } //JDB TEST PASS U_row = my_new_row; - sfactor_nd_sep_upper_estimate(AVM(U_col)(U_row), - LU(U_col)(U_row)); + sfactor_nd_sep_upper_estimate(AVM[U_col][U_row], + LU[U_col][U_row]); sfactor_nd_sep_lower_estimate( - ALM(innerblk)(l-lvl), - LL(innerblk)(l-lvl)); + ALM[innerblk][l-lvl], + LL[innerblk][l-lvl]); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp index b01d3ec72632..b2fa1204cd86 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp @@ -293,7 +293,7 @@ namespace BaskerNS for(Int b = nblks_c-1; b>= 0; b--) { //---Lower solve - BASKER_MATRIX &LC = LBTF(b); + BASKER_MATRIX &LC = LBTF[b]; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n btf b=%ld (%d x %d), LBTF(%d)\n", (long)b, (int)LC.nrow, (int)LC.ncol, (int)b); #endif @@ -303,7 +303,7 @@ namespace BaskerNS //printVec(y,gn); - BASKER_MATRIX &UC = UBTF(b); + BASKER_MATRIX &UC = UBTF[b]; //U(C)\x -> y upper_tri_solve(UC,x,y); @@ -420,7 +420,7 @@ namespace BaskerNS for(Int b = btf_top_tabs_offset-1; b>= 0; b--) { //L(C)\x -> y - BASKER_MATRIX &LC = L_D(b); + BASKER_MATRIX &LC = L_D[b]; lower_tri_solve(LC, x, y); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after L solve (b=%d)\n",b ); fflush(stdout); @@ -429,7 +429,7 @@ namespace BaskerNS #endif //U(C)\y -> x - BASKER_MATRIX &UC = U_D(b); + BASKER_MATRIX &UC = U_D[b]; upper_tri_solve(UC, y, x); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after U solve\n" ); fflush(stdout); @@ -476,7 +476,7 @@ namespace BaskerNS //Forward solve on A for(Int b = 0; b < tree.nblks; ++b) { - BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &L = LL[b][0]; //L\x -> y lower_tri_solve(L, x, y, scol_top); @@ -500,7 +500,7 @@ namespace BaskerNS //Update offdiag for(Int bb = 1; bb < LL_size(b); ++bb) { - BASKER_MATRIX &LD = LL(b)(bb); + BASKER_MATRIX &LD = LL[b][bb]; //x = LD*y; #ifdef BASKER_DEBUG_SOLVE_RHS char filename[200]; @@ -549,7 +549,7 @@ namespace BaskerNS #endif //U\y -> x - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; upper_tri_solve(U, y, x, scol_top); // NDE: y , x positions swapped... // seems role of x and y changed... #ifdef BASKER_DEBUG_SOLVE_RHS @@ -568,7 +568,7 @@ namespace BaskerNS #endif //y = UB*x; - BASKER_MATRIX &UB = LU(b)(bb); + BASKER_MATRIX &UB = LU[b][bb]; neg_spmv(UB, x, y, scol_top); #ifdef BASKER_DEBUG_SOLVE_RHS diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp index f950e9bd6132..bfd6e2460062 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp @@ -346,10 +346,10 @@ namespace BaskerNS // Update off-diag in the block-row before the diag solve for(int bb = LL_size(b)-1; bb > 0; bb--) { - BASKER_MATRIX &LD = LL(b)(bb); + BASKER_MATRIX &LD = LL[b][bb]; neg_spmv_perm_tr(LD, x, y, scol_top); // update y as mod. rhs, x as solution } - BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &L = LL[b][0]; if (L.nrow != 0 && L.ncol != 0) // Avoid degenerate case e.g. empty block following nd-partitioning lower_tri_solve_tr(L, y, x, scol_top); // x and y should be equal after in M range... } @@ -373,10 +373,10 @@ namespace BaskerNS for(Int bb = 0; bb < LU_size(b)-1; bb++) { // update offdiag corresponding to the block-row - BASKER_MATRIX &UB = LU(b)(bb); + BASKER_MATRIX &UB = LU[b][bb]; neg_spmv_tr(UB, x, y, scol_top); } - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; if (U.nrow != 0 && U.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(U, x, y, scol_top); } @@ -410,7 +410,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of { for(Int b = 0; b < btf_top_tabs_offset; b++) { - BASKER_MATRIX &UC = U_D(b); + BASKER_MATRIX &UC = U_D[b]; if ( b > 0 ) spmv_BTF_tr(b, BTF_D, x, y, false); @@ -418,7 +418,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC, x, y); - BASKER_MATRIX &LC = L_D(b); + BASKER_MATRIX &LC = L_D[b]; if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC, x, y); @@ -462,7 +462,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (nblks_c > 0) { Int offset = 0; for(Int b = 0; b < nblks_c; b++) { - BASKER_MATRIX &UC = UBTF(b); + BASKER_MATRIX &UC = UBTF[b]; // Update off-diag // Update X with Y @@ -472,7 +472,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC,x,y); - BASKER_MATRIX &LC = LBTF(b); + BASKER_MATRIX &LC = LBTF[b]; if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC,x,y); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp index 1248d7472b0e..bd5bc82efdbc 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp @@ -54,7 +54,7 @@ namespace BaskerNS #ifndef BASKER_KOKKOS FREE_INT_1DARRAY(iws); FREE_ENTRY_1DARRAY(ews); - C.Finalize(); + //C.Finalize(); #endif } @@ -129,13 +129,12 @@ namespace BaskerNS BASKER_INLINE ~basker_tree() { - //Finalize(); + Finalize(); }//end ~basker_tree BASKER_INLINE void Finalize() { - //printf("basker_tree Finalize todo \n"); if(nroots > 0) { FREE_INT_1DARRAY(roots); @@ -267,7 +266,7 @@ namespace BaskerNS ~basker_symbolic_tree() { - //Finalize(); + Finalize(); }//end ~basker_symbolic_tree BASKER_INLINE diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index be4c146e9c83..81e3c78c7f9c 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -827,16 +827,16 @@ namespace BaskerNS Int U_view_size = (U_view_count(i) > 0 ? U_view_count(i) : 1); if (U_view_size > 0) { - MALLOC_MATRIX_1DARRAY(AVM(i), U_view_size); - MALLOC_MATRIX_1DARRAY(LU(i), U_view_size); + MALLOC_MATRIX_1DARRAY(AVM[i], U_view_size); + MALLOC_MATRIX_1DARRAY(LU[i], U_view_size); } //Malloc AL subarray // NOTE: size at least one to allow empty block Int L_view_size = (L_view_count(i) > 0 ? L_view_count(i): 1); if (L_view_size > 0) { - MALLOC_MATRIX_1DARRAY(ALM(i), L_view_size); - MALLOC_MATRIX_1DARRAY(LL(i), L_view_size); + MALLOC_MATRIX_1DARRAY(ALM[i], L_view_size); + MALLOC_MATRIX_1DARRAY(LL[i], L_view_size); } LU_size(i) = U_view_count(i); @@ -1056,7 +1056,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((L_row+1 < LL_size(L_col)) && - (tree.row_tabs(r_idx+1) == ALM(L_col)(L_row+1).srow)) + (tree.row_tabs(r_idx+1) == ALM[L_col][L_row+1].srow)) { //printf( " > ALM(%d)(%d).srow = %d, row_tab(%d) = %d\n",L_col,L_row+1,ALM(L_col)(L_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); L_row++; @@ -1071,7 +1071,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((U_row+1 < LU_size(U_col)) && - (tree.row_tabs(r_idx+1) == AVM(U_col)(U_row+1).srow)) + (tree.row_tabs(r_idx+1) == AVM[U_col][U_row+1].srow)) { //printf( " + AVM(%d)(%d).srow = %d, row_tab(%d) = %d\n",U_col,U_row+1,AVM(U_col)(U_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); U_row++; @@ -1095,8 +1095,8 @@ namespace BaskerNS //Get Matrix Ref - BASKER_MATRIX &Ltemp = ALM(L_col)(L_row); - BASKER_MATRIX &Utemp = AVM(U_col)(U_row); + BASKER_MATRIX &Ltemp = ALM[L_col][L_row]; + BASKER_MATRIX &Utemp = AVM[U_col][U_row]; Int bcol = Ltemp.scol; //diag blk @@ -1162,11 +1162,11 @@ namespace BaskerNS for(Int sb = 0; sb < LL_size(b); ++sb) { //printf( " ALM(%d)(%d).clean_col()\n",b,sb ); - ALM(b)(sb).clean_col(); + ALM[b][sb].clean_col(); } for(Int sb = 0; sb < LU_size(b); ++sb) { - AVM(b)(sb).clean_col(); + AVM[b][sb].clean_col(); } }//for - over all blks @@ -1178,6 +1178,7 @@ namespace BaskerNS BASKER_INLINE int Basker::sfactor_copy() { + printf( " .. sfactor_copy ..\n" ); fflush(stdout); //Reorder A; //Match order if(match_flag == BASKER_TRUE) @@ -1322,9 +1323,15 @@ namespace BaskerNS #ifdef BASKER_KOKKOS BASKER_BOOL keep_zeros = BASKER_FALSE; BASKER_BOOL alloc = alloc_BTFA; //BASKER_FALSE; - kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else //Comeback #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 6009e346f73b..8ea5c54c8e89 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -144,17 +144,18 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_KOKKOS_NOINIT Kokkos::ViewAllocateWithoutInitializing #define INT_RANK2DARRAY Kokkos::View #define INT_1DARRAY Kokkos::View -#define INT_2DARRAY Kokkos::View #define ENTRY_1DARRAY Kokkos::View -#define ENTRY_2DARRAY Kokkos::View #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define MATRIX_1DARRAY Kokkos::View -#define MATRIX_2DARRAY Kokkos::View -#define MATRIX_VIEW_1DARRAY Kokkos::View -#define MATRIX_VIEW_2DARRAY Kokkos::View -#define THREAD_1DARRAY Kokkos::View -#define THREAD_2DARRAY Kokkos::View + +#define INT_2DARRAY std::vector +#define ENTRY_2DARRAY std::vector +#define MATRIX_1DARRAY std::vector +#define MATRIX_2DARRAY std::vector +#define MATRIX_VIEW_1DARRAY std::vector +#define MATRIX_VIEW_2DARRAY std::vector +#define THREAD_1DARRAY std::vector +#define THREAD_2DARRAY std::vector #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +164,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s); \ + /*a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -172,7 +174,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s); \ + /*a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -181,7 +184,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1); \ + /*a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1);*/ \ + Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } @@ -189,7 +193,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_2DARRAY("int_2d",s); \ + /*a = INT_2DARRAY("int_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -198,7 +203,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s); \ + /*a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -207,7 +213,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_2DARRAY("entry_2d",s); \ + /*a = ENTRY_2DARRAY("entry_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -216,7 +223,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s); \ + /*a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -225,7 +233,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_2DARRAY("bool_2d", s); \ + /*a = BOOL_2DARRAY("bool_2d", s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -234,7 +243,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_1DARRAY("matrix_1d",s); \ + /*a = MATRIX_1DARRAY("matrix_1d",s)*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -243,7 +253,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_2DARRAY("matrix_2d",s); \ + /*a = MATRIX_2DARRAY("matrix_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,7 +263,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s); \ + /*a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -261,7 +273,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s); \ + /*a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -270,7 +283,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_1DARRAY("thread_1d",s); \ + /*a = THREAD_1DARRAY("thread_1d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -279,9 +293,10 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_2DARRAY("thread_2d",s); \ + /*a = THREAD_2DARRAY("thread_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ - throw std::bad_alloc(); \ + throw std::bad_alloc(); \ } \ } //RESIZE (with copy) @@ -334,77 +349,92 @@ enum BASKER_INCOMPLETE_CODE #define FREE(a) BASKER_NO_OP -#define FREE_INT_1DARRAY_PAIRS(a) \ - { \ - a = INT_1DARRAY_PAIRS(); \ +#define FREE_INT_1DARRAY_PAIRS(a) \ + { \ + /*a = INT_1DARRAY_PAIRS();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_1DARRAY(a) \ - { \ - a = INT_1DARRAY(); \ +#define FREE_INT_1DARRAY(a) \ + { \ + /*a = INT_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_RANK2DARRAY(a) \ - { \ - a = INT_RANK2DARRAY(); \ +#define FREE_INT_RANK2DARRAY(a) \ + { \ + /*a = INT_RANK2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_2DARRAY(a,n) \ - { \ - a = INT_2DARRAY(); \ +#define FREE_INT_2DARRAY(a,n) \ + { \ + /*a = INT_2DARRAY();*/ \ + a.resize(0); \ } -#define FREE_ENTRY_1DARRAY(a) \ - { \ - a = ENTRY_1DARRAY(); \ +#define FREE_ENTRY_1DARRAY(a) \ + { \ + /*a = ENTRY_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_2DARRAY(a,n) \ - { \ - a = ENTRY_2DARRAY(); \ +#define FREE_ENTRY_2DARRAY(a,n) \ + { \ + /*a = ENTRY_2DARRAY();*/ \ + a.resize(0); \ } -#define FREE_BOOL_1DARRAY(a) \ - { \ - a = BOOL_1DARRAY(); \ +#define FREE_BOOL_1DARRAY(a) \ + { \ + /*a = BOOL_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_2DARRAY(a,n) \ - { \ - a = BOOL_2DARRAY(); \ +#define FREE_BOOL_2DARRAY(a,n) \ + { \ + /*a = BOOL_2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_1DARRAY(a) \ - { \ - a = MATRIX_1DARRAY(); \ +#define FREE_MATRIX_1DARRAY(a) \ + { \ + /*a = MATRIX_1DARRAY();*/ \ + a.resize(0); \ } -#define FREE_MATRIX_2DARRAY(a,n) \ - { \ - a = MATRIX_2DARRAY(); \ +#define FREE_MATRIX_2DARRAY(a,n) \ + { \ + /*a = MATRIX_2DARRAY();*/ \ + a.resize(0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ - { \ - a = MATRIX_VIEW_1DARRAY(); \ + { \ + /*a = MATRIX_VIEW_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ - { \ - a = MATRIX_VIEW_2DARRAY(); \ +#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ + { \ + /*a = MATRIX_VIEW_2DARRAY();*/ \ + a.resize(0); \ } #define FREE_THREAD_1DARRAY(a) \ - { \ - a = THREAD_1DARRAY(); \ + { \ + /*a = THREAD_1DARRAY();*/ \ + a.resize(0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - a = TRHEAD_2DARRAY(); \ +#define FREE_THREAD_2DARRAY(a,n) \ + { \ + /*a = TRHEAD_2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#else +#else // not BASKER_KOKKOS + //Execution Space #define BASKER_EXE_SPACE void* //ReMacro Basker Classes diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 130f62ea6127..9cf52f3db66d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -252,11 +252,11 @@ namespace BaskerNS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -291,12 +291,11 @@ namespace BaskerNS #ifdef BASKER_KOKKOS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; - Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + Kokkos::parallel_for(TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -328,7 +327,7 @@ namespace BaskerNS { for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = LBTF(b-btf_tabs_offset); + BASKER_MATRIX &L = LBTF[b-btf_tabs_offset]; L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -343,7 +342,7 @@ namespace BaskerNS #if defined(BASKER_SPLIT_A) for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = L_D(b); + BASKER_MATRIX &L = L_D[b]; L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -359,7 +358,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { @@ -368,8 +367,8 @@ namespace BaskerNS b, row, kid, LL[b][row].nnz); #endif - LL(b)(row).clear_pend(); - LL(b)(row).nnz = LL(b)(row).mnnz; + LL[b][row].clear_pend(); + LL[b][row].nnz = LL[b][row].mnnz; }//end over all row }//end select which thread @@ -379,7 +378,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -388,9 +387,9 @@ namespace BaskerNS #endif //LU(b)(LU_size(b)-1).nnz = 0; - for(Int kk = 0; kk < LU(b)(LU_size(b)-1).ncol+1; kk++) + for(Int kk = 0; kk < LU[b][LU_size(b)-1].ncol+1; kk++) { - LU(b)(LU_size(b)-1).col_ptr(kk) = 0; + LU[b][LU_size(b)-1].col_ptr(kk) = 0; } /* @@ -400,16 +399,16 @@ namespace BaskerNS LU(b)(LU_size(b)-1).mnnz); */ - LU(b)(LU_size(b)-1).nnz = LU(b)(LU_size(b)-1).mnnz; + LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S(0)(my_row_leader); + b - S[0][my_row_leader]; - Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); //JDB TEST PASS U_row = my_new_row; @@ -420,9 +419,9 @@ namespace BaskerNS LU[U_col][U_row].nnz); #endif - for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) + for(Int kk = 0; kk < LU[U_col][U_row].ncol+1; kk++) { - LU(U_col)(U_row).col_ptr(kk) = 0; + LU[U_col][U_row].col_ptr(kk) = 0; } /* printf("flipU (%d,%d) %d %d \n", @@ -431,7 +430,7 @@ namespace BaskerNS LU(U_col)(U_row).mnnz); */ - LU(U_col)(U_row).nnz = LU(U_col)(U_row).mnnz; + LU[U_col][U_row].nnz = LU[U_col][U_row].mnnz; //LU(U_col)(U_row).nnz = 0; }//over inner lvls @@ -455,13 +454,13 @@ namespace BaskerNS Kokkos::Timer timer_init_matrixL; Kokkos::Timer timer_fill_matrixL; timer_initL.reset(); + printf( " > t_init_factor( tid = %d, nlvls = %d ) <\n",kid,tree.nlvls+1 ); fflush(stdout); #endif - //printf( " > t_init_factor( tid = %d ) <\n",kid ); for(Int lvl = 0; lvl < tree.nlvls+1; lvl++) { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { @@ -472,34 +471,39 @@ namespace BaskerNS #ifdef BASKER_TIMER timer_init_matrixL.reset(); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL[b][row].nnz, (int)LL[b][row].mnnz); fflush(stdout); #endif - //printf( " lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); - LL(b)(row).init_matrix("Loffdig", - LL(b)(row).srow, - LL(b)(row).nrow, - LL(b)(row).scol, - LL(b)(row).ncol, - LL(b)(row).nnz); + LL[b][row].init_matrix("Loffdig", + LL[b][row].srow, + LL[b][row].nrow, + LL[b][row].scol, + LL[b][row].ncol, + LL[b][row].nnz); #ifdef BASKER_TIMER + printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); #endif //Fix when this all happens in the future if(Options.incomplete == BASKER_TRUE) { - LL(b)(row).init_inc_lvl(); + LL[b][row].init_inc_lvl(); } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); + printf( " ++ zero out (%d) ++\n",int(LL[b][row].col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - Kokkos::deep_copy(LL(b)(row).col_ptr, 0); + LL[b][row].init_ptr(); + //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL[b][row].ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - //printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); - LL(b)(row).init_pend(); - + LL[b][row].init_pend(); + #ifdef BASKER_TIMER + printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); + #endif }//end over all row }//end select which thread }//end for over all lvl @@ -508,6 +512,7 @@ namespace BaskerNS std::cout << " > Basker t_init_factor::initL(" << kid << "): time: " << initL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::initMatrix(" << kid << "): time: " << init_matrixL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::fillMatrix(" << kid << "): time: " << fill_matrixL_time << std::endl; + fflush(stdout); #endif //U @@ -519,7 +524,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -528,25 +533,26 @@ namespace BaskerNS #endif //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); - LU(b)(LU_size(b)-1).init_matrix("Udiag", - LU(b)(LU_size(b)-1).srow, - LU(b)(LU_size(b)-1).nrow, - LU(b)(LU_size(b)-1).scol, - LU(b)(LU_size(b)-1).ncol, - LU(b)(LU_size(b)-1).nnz); + LU[b][LU_size(b)-1].init_matrix("Udiag", + LU[b][LU_size(b)-1].srow, + LU[b][LU_size(b)-1].nrow, + LU[b][LU_size(b)-1].scol, + LU[b][LU_size(b)-1].ncol, + LU[b][LU_size(b)-1].nnz); //LU(b)(LU_size(b)-1).fill(); - Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); + LU[b][LU_size(b)-1].init_ptr(); + //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S(0)(my_row_leader); + b - S[0][my_row_leader]; - Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); if( (b > 14) && // NDE: Why is 14 specifically used here? (b > LU_size(U_col)) && @@ -577,19 +583,20 @@ namespace BaskerNS #endif //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); - LU(U_col)(U_row).init_matrix("Uoffdiag", - LU(U_col)(U_row).srow, - LU(U_col)(U_row).nrow, - LU(U_col)(U_row).scol, - LU(U_col)(U_row).ncol, - LU(U_col)(U_row).nnz); + LU[U_col][U_row].init_matrix("Uoffdiag", + LU[U_col][U_row].srow, + LU[U_col][U_row].nrow, + LU[U_col][U_row].scol, + LU[U_col][U_row].ncol, + LU[U_col][U_row].nnz); //LU(U_col)(U_row).fill(); - Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); + LU[U_col][U_row].init_ptr(); + //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { - LU(U_col)(U_row).init_inc_lvl(); + LU[U_col][U_row].init_inc_lvl(); } }//over inner lvls @@ -624,15 +631,15 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { #ifdef BASKER_DEBUG_INIT printf("ALM Factor Init: %d %d , kid: %d, nnz: %d nrow: %d ncol: %d \n", - b, row, kid, ALM(b)(row).nnz, - ALM(b)(row).nrow, - ALM(b)(row).ncol); + b, row, kid, ALM[b][row].nnz, + ALM[b][row].nrow, + ALM[b][row].ncol); #endif /*if (kid == 1) @@ -647,7 +654,7 @@ namespace BaskerNS printf("ALM(%d,%d: %dx%d) alloc with A: kid=%d btf=%d\n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid, Options.btf); #endif - ALM(b)(row).convert2D(A, alloc, kid); + ALM[b][row].convert2D(A, alloc, kid); } else { @@ -656,7 +663,7 @@ namespace BaskerNS printf("ALM(%d,%d, %dx%d) alloc (btf) with BTF_A: kid=%d \n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid); #endif - ALM(b)(row).convert2D(BTF_A, alloc, kid); + ALM[b][row].convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < ALM(b)(row).ncol; j++) { @@ -676,14 +683,14 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", b, LU_size(b)-1, kid, - AVM(b)(LU_size(b)-1).nnz, - AVM(b)(LU_size(b)-1).nrow, - AVM(b)(LU_size(b)-1).ncol); + AVM[b][LU_size(b)-1].nnz, + AVM[b][LU_size(b)-1].nrow, + AVM[b][LU_size(b)-1].ncol); #endif /*if (kid == 1) { @@ -692,13 +699,13 @@ namespace BaskerNS }*/ if(Options.btf == BASKER_FALSE) { - AVM(b)(LU_size(b)-1).convert2D(A, alloc, kid); + AVM[b][LU_size(b)-1].convert2D(A, alloc, kid); } else { //printf("Using BTF AU\n"); //printf(" > kid=%d: convert2D AVM(%d,%d)\n", kid, b, LU_size(b)-1); - AVM(b)(LU_size(b)-1).convert2D(BTF_A, alloc, kid); + AVM[b][LU_size(b)-1].convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < AVM(b)(LU_size(b)-1).ncol; j++) { @@ -715,10 +722,10 @@ namespace BaskerNS //TEST Int my_leader = find_leader(kid,l-1); - Int my_leader_row = S(0)(my_leader); + Int my_leader_row = S[0][my_leader]; //Int my_col_size = pow(2,l); Not used Int my_new_row = - (S(lvl)(kid) - my_leader_row); + (S[lvl][kid] - my_leader_row); //my_new_row = my_new_row%my_col_size; /* @@ -729,7 +736,7 @@ namespace BaskerNS my_col_size, my_new_row); */ - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int U_row = my_new_row; //Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); @@ -755,9 +762,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init AUM: %d %d lvl: %d l: %d kid: %d nnz: %d nrow: %d ncol: %d \n", U_col, U_row, lvl, l, kid, - AVM(U_col)(U_row).nnz, - AVM(U_col)(U_row).nrow, - AVM(U_col)(U_row).ncol); + AVM[U_col][U_row].nnz, + AVM[U_col][U_row].nrow, + AVM[U_col][U_row].ncol); #endif #if 0 @@ -775,9 +782,9 @@ namespace BaskerNS { //printf(" %d: Using BTF AVM(%d,%d), %dx%d\n",kid,U_col,U_row, AVM(U_col)(U_row).nrow,AVM(U_col)(U_row).ncol); //printf("2nd convert AVM: %d %d size:%d kid: %d\n", - // U_col, U_row, AVM(U_col)(U_row).nnz, + // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); - AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); + AVM[U_col][U_row].convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); } @@ -807,22 +814,22 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int l = 0; l < LL_size(b); l++) { //defining here - LL(b)(l).iws_size = LL(b)(l).nrow; + LL[b][l].iws_size = LL[b][l].nrow; //This can be made smaller, see notes in Sfactor_old - LL(b)(l).iws_mult = 5; - LL(b)(l).ews_size = LL(b)(l).nrow; + LL[b][l].iws_mult = 5; + LL[b][l].ews_size = LL[b][l].nrow; //This can be made smaller, see notes in sfactor_old - LL(b)(l).ews_mult = 2; + LL[b][l].ews_mult = 2; - Int iws_size = LL(b)(l).iws_size; - Int iws_mult = LL(b)(l).iws_mult; - Int ews_size = LL(b)(l).ews_size; - Int ews_mult = LL(b)(l).ews_mult; + Int iws_size = LL[b][l].iws_size; + Int iws_mult = LL[b][l].iws_mult; + Int ews_size = LL[b][l].ews_size; + Int ews_mult = LL[b][l].ews_mult; if(iws_size > max_sep_size) { @@ -835,10 +842,10 @@ namespace BaskerNS } BASKER_ASSERT((iws_size*iws_mult)>0, "util iws"); - MALLOC_INT_1DARRAY(LL(b)(l).iws, iws_size*iws_mult); + MALLOC_INT_1DARRAY(LL[b][l].iws, iws_size*iws_mult); for(Int i=0; i 0) { BASKER_ASSERT((ews_size*ews_mult)>0, "util ews"); - MALLOC_ENTRY_1DARRAY(LL(b)(l).ews, ews_size*ews_mult); + MALLOC_ENTRY_1DARRAY(LL[b][l].ews, ews_size*ews_mult); for(Int i=0; i::find_leader(Int kid, Int l) { l = l+1; - Int my_token = S(l)(kid); + Int my_token = S[l][kid]; Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S(l)(my_loc) != my_token) + if(S[l][my_loc] != my_token) { my_loc++; break; @@ -2477,4 +2485,5 @@ namespace BaskerNS }//end namespace basker +#undef BASKER_TIMER #endif //end basker_util_hpp From e484f62bbdade30959fa3541503bd993ea7faf64 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 24 Oct 2024 16:12:13 -0600 Subject: [PATCH 09/20] ShyLU - Basker : memory tune Signed-off-by: iyamazaki --- .../basker/src/shylubasker_decl.hpp | 7 -- .../basker/src/shylubasker_nfactor.hpp | 17 +--- .../basker/src/shylubasker_sfactor.hpp | 87 +++++++++++-------- .../basker/src/shylubasker_structs.hpp | 1 - .../basker/src/shylubasker_types.hpp | 33 +------ .../basker/src/shylubasker_util.hpp | 13 ++- 6 files changed, 69 insertions(+), 89 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp index 94f4ba1df086..f9b33e325bd7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp @@ -1494,13 +1494,6 @@ namespace BaskerNS //end NDE - - //RHS and solutions (These are not used anymore) - ENTRY_2DARRAY rhs; - ENTRY_2DARRAY sol; - Int nrhs; - - BASKER_TREE part_tree; BASKER_TREE tree; BASKER_SYMBOLIC_TREE stree; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp index d2c6a5690528..cef593230d5e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp @@ -171,13 +171,9 @@ namespace BaskerNS }//end while if(Options.verbose == BASKER_TRUE) { - printf("Time DOMAIN: %lf \n", timer.seconds()); + printf("Time DOMAIN: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time DOMAIN: %lf \n", timer.seconds()); - timer.reset(); - #endif #else// else basker_kokkos #pragma omp parallel @@ -282,13 +278,9 @@ namespace BaskerNS //printf( " End Sep: info = %d (%d, %d)\n",info,BASKER_SUCCESS,BASKER_ERROR ); if(Options.verbose == BASKER_TRUE) { - printf("Time SEP: %lf \n", timer.seconds()); + printf("Time SEP: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time SEP: %lf \n", timer.seconds()); - timer.reset(); - #endif } // ---------------------------------------------------------------------------------------- // @@ -363,11 +355,8 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf("Time BTF: %lf \n", timer.seconds()); + printf("Time BTF: %lf \n\n", timer.seconds()); } - #ifdef BASKER_TIMER - printf("Time BTF: %lf \n", timer.seconds()); - #endif }//end btf call Kokkos::Timer tzback; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index fd11208ea309..d48f0e720a7e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -381,7 +381,7 @@ int Basker::sfactor() if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); - printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); + printf( " >> leaf_assign_nnz(LU(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); @@ -403,7 +403,7 @@ int Basker::sfactor() #endif #ifdef BASKER_TIMER double dom_time = timer2.seconds(); - std::cout << " DOMAIN BLKs done : " << dom_time << std::endl; + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl << std::endl; #endif for(Int p = 0; p < num_threads; ++p) @@ -462,16 +462,15 @@ int Basker::sfactor() //Assign nnz counts for leaf off-diag //U_assign_nnz(LU[U_col][U_row], stree, 0); //L_assign_nnz(LL[blk][l+1], stree, 0); - if(Options.verbose == BASKER_TRUE) - { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)blk,(int)l+1); - } #ifdef BASKER_TIMER timer1.reset(); #endif - //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; + if(Options.verbose == BASKER_TRUE) + { + printf( " ++ U_assign_nnz(LU(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)U_col,(int)U_row, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + } #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); @@ -490,7 +489,7 @@ int Basker::sfactor() std::cout << " >> symmetric_sfactor::domain : " << timer.seconds() << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::postorder : " << time1_2 << " + " << time1_3 << " + " << time1 << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::init : " << time2 << " seconds" << std::endl; - std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl; + std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl << std::endl; timer.reset(); #endif @@ -631,13 +630,13 @@ int Basker::sfactor() //Assign nnz + double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; if(Options.verbose == BASKER_TRUE) { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + printf( " ++ leaf_assign_nnz(LU(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)U_col,(int)U_row, (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ leaf_assign_nnz(LL(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)inner_blk,(int)(l-lvl), (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); fflush(stdout); } - double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); @@ -2260,6 +2259,9 @@ int Basker::sfactor() Int option ) { + #ifdef BASKER_TIMER + printf("leaf_assign_nnz:\n"); + #endif if(option == 0) { const Int Int_MAX = std::numeric_limits::max(); @@ -2268,19 +2270,23 @@ int Basker::sfactor() for(Int i = 0; i < M.ncol; i++) { if (t_nnz <= Int_MAX - ST.col_counts[i]) { + #ifdef BASKER_TIMER + //printf( " > %d: %d += %d\n",i,t_nnz, ST.col_counts[i] ); + #endif t_nnz += ST.col_counts[i]; } else { // let's just hope it is enough, if overflow break; } } - #ifdef BASKER_DEBUG_SFACTOR - printf("leaf nnz: %ld \n", (long)t_nnz); + #ifdef BASKER_TIMER + printf(" > leaf nnz: (%ld + %ld) / 2 = %ld\n", (long)t_nnz,(long)M.ncol,(long)(t_nnz+M.ncol)/2); #endif + t_nnz = long(t_nnz+M.ncol)/2; //double nnz_shoulder = 1.05; double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; // used to boost fill estimate - Int temp = fill_factor*t_nnz; + Int temp = fill_factor*t_nnz; // assuming (t_nnz/2) as triangular part if (temp > t_nnz) { M.nnz = temp; } else { @@ -2298,8 +2304,8 @@ int Basker::sfactor() } if(Options.verbose == BASKER_TRUE) { - printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld) with fill-factor x(%d+%f = %f)\n", + (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol,(int)BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } } }//end assign_leaf_nnz @@ -2330,12 +2336,12 @@ int Basker::sfactor() } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("U_assing_nnz: %ld \n", t_nnz); #endif //double fill_factor = 1.05; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2352,8 +2358,8 @@ int Basker::sfactor() #endif if(Options.verbose == BASKER_TRUE) { - printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f + %f = %f), M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); } } }//end assign_upper_nnz @@ -2384,13 +2390,13 @@ int Basker::sfactor() } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("L_assign_nnz: %ld \n", t_nnz); #endif // double fill_factor = 2.05; double old_nnz = M.nnz; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2407,8 +2413,8 @@ int Basker::sfactor() } if(Options.verbose == BASKER_TRUE) { - printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %e + %e = %e), M.nnz = %ld -> %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); + printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld -> %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); } } }//end assign_lower_nnz @@ -2482,7 +2488,9 @@ int Basker::sfactor() if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif L_D[i].init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2493,6 +2501,9 @@ int Basker::sfactor() //For pruning L_D[i].init_pend(); + #ifdef BASKER_TIMER + printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif U_D[i].init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2532,7 +2543,9 @@ int Basker::sfactor() if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif LBTF[i-btf_tabs_offset].init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2544,7 +2557,9 @@ int Basker::sfactor() //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); LBTF[i-btf_tabs_offset].init_pend(); - //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif UBTF[i-btf_tabs_offset].init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2579,18 +2594,18 @@ int Basker::sfactor() //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); + #ifdef BASKER_TIMER + printf("Malloc Thread: %d iws: %d \n", + i, (thread_array[i].iws_size* + thread_array[i].iws_mult)); + printf("Malloc Thread: %d ews: %d \n", + i, (thread_array[i].ews_size* + thread_array[i].ews_mult)); + #endif if (max_blk_size > 0) { MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); } - #ifdef BASKER_DEBUG_SFACTOR - printf("Malloc Thread: %d iws: %d \n", - i, (thread_array(i).iws_size* - thread_array(i).iws_mult)); - printf("Malloc Thread: %d ews: %d \n", - i, (thread_array(i).ews_size* - thread_array(i).ews_mult)); - #endif } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp index bd5bc82efdbc..ef1e29d597e4 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp @@ -54,7 +54,6 @@ namespace BaskerNS #ifndef BASKER_KOKKOS FREE_INT_1DARRAY(iws); FREE_ENTRY_1DARRAY(ews); - //C.Finalize(); #endif } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 8ea5c54c8e89..c55c222ec7c7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -90,7 +90,7 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_INC_TOL_VALUE 0.0001 //MACRO INC FILL (this will become dynamic in the future) -#define BASKER_FILL_USER 1.00 +#define BASKER_FILL_USER 0.00 #define BASKER_FILL_LESTIMATE 1.50 #define BASKER_FILL_UESTIMATE 1.50 #define BASKER_FILL_LLOWERESTIMATE 2.00 @@ -149,13 +149,12 @@ enum BASKER_INCOMPLETE_CODE #define BOOL_2DARRAY Kokkos::View #define INT_2DARRAY std::vector -#define ENTRY_2DARRAY std::vector +#define ENTRY_2DARRAY std::vector #define MATRIX_1DARRAY std::vector #define MATRIX_2DARRAY std::vector #define MATRIX_VIEW_1DARRAY std::vector #define MATRIX_VIEW_2DARRAY std::vector #define THREAD_1DARRAY std::vector -#define THREAD_2DARRAY std::vector #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -193,7 +192,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_2DARRAY("int_2d",s);*/ \ + /*a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s);*/ \ a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -289,16 +288,7 @@ enum BASKER_INCOMPLETE_CODE throw std::bad_alloc(); \ } \ } -#define MALLOC_THREAD_2DARRAY(a,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ - if (s > 0) { \ - /*a = THREAD_2DARRAY("thread_2d",s);*/ \ - a.resize(s); \ - if(a.data() == NULL) \ - throw std::bad_alloc(); \ - } \ - } + //RESIZE (with copy) #define RESIZE_1DARRAY(a,os,s) \ { \ @@ -427,12 +417,6 @@ enum BASKER_INCOMPLETE_CODE a.resize(0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - /*a = TRHEAD_2DARRAY();*/ \ - Kokkos::resize(a,0); \ - } - #else // not BASKER_KOKKOS //Execution Space @@ -458,7 +442,6 @@ enum BASKER_INCOMPLETE_CODE #define MATRIX_VIEW_1DARRAY BASKER_MATRIX_VIEW* #define MATRIX_VIEW_2DARRAY BASKER_MATRIX_VIEW** #define THREAD_1DARRAY BASKER_THREAD* -#define THREAD_2DARRAY BASKER_THREAD** //Macro Memory Calls //Malloc @@ -473,7 +456,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -#define MALLOC_THREAD_2DARRAY(a,s) a = new THREAD_1DARRAY [s] //Resize (copy old data) (come back and add) #define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP #define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP @@ -555,13 +537,6 @@ enum BASKER_INCOMPLETE_CODE FREE(a); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - for(BASKER_INT MACRO_I = 0; MACRO_I < s; MACRO_I++) \ - FREE(a[MACRO_I]); \ - FREE(a); \ - } - #endif //end ifdef BASKER_KOKKOS //Inline command diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 9cf52f3db66d..3691d73c63ba 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -532,7 +532,11 @@ namespace BaskerNS LU[b][LU_size[b]-1].nnz); #endif - //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); + #ifdef BASKER_TIMER + printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, + (int)LU[b][LU_size(b)-1].nrow,(int)LU[b][LU_size(b)-1].ncol,(int)LU[b][LU_size(b)-1].nnz, (int)LU[b][LU_size(b)-1].mnnz, + (int)LU[b][LU_size(b)-1].srow,(int)LU[b][LU_size(b)-1].scol); + #endif LU[b][LU_size(b)-1].init_matrix("Udiag", LU[b][LU_size(b)-1].srow, LU[b][LU_size(b)-1].nrow, @@ -582,7 +586,12 @@ namespace BaskerNS LU[U_col][U_row].nnz); #endif - //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); + #ifdef BASKER_TIMER + printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, + (int)LU[U_col][U_row].nrow,(int)LU[U_col][U_row].ncol, + (int)LU[U_col][U_row].nnz, (int)LU[U_col][U_row].mnnz, + (int)LU[U_col][U_row].srow,(int)LU[U_col][U_row].scol); + #endif LU[U_col][U_row].init_matrix("Uoffdiag", LU[U_col][U_row].srow, LU[U_col][U_row].nrow, From 03b1ab0362ca3aa4c620146b57229652570d9c15 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 25 Oct 2024 17:33:32 -0600 Subject: [PATCH 10/20] ShyLU - Basker : replace std::vector with SequentialHostInit Signed-off-by: iyamazaki --- .../basker/src/shylubasker_error_manager.hpp | 150 +++++++-------- .../basker/src/shylubasker_nfactor_blk.hpp | 74 ++++---- .../src/shylubasker_nfactor_blk_inc.hpp | 171 +++++++++--------- .../basker/src/shylubasker_nfactor_col.hpp | 148 +++++++-------- .../basker/src/shylubasker_nfactor_col2.hpp | 44 ++--- .../src/shylubasker_nfactor_col_inc.hpp | 158 ++++++++-------- .../basker/src/shylubasker_nfactor_diag.hpp | 58 +++--- .../basker/src/shylubasker_sfactor.hpp | 64 +++---- .../basker/src/shylubasker_sfactor_inc.hpp | 24 +-- .../basker/src/shylubasker_tree.hpp | 22 +-- .../basker/src/shylubasker_types.hpp | 85 +++------ .../basker/src/shylubasker_util.hpp | 68 +++---- 12 files changed, 513 insertions(+), 553 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index 84cbb8b801b7..d9695c6e5c78 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -46,66 +46,66 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; - } else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + } else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK SINGULAR: blk=" << thread_array[ti].error_blk + << " DOMBLK SINGULAR: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK NOMALLOC : blk=" << thread_array[ti].error_blk + << " DOMBLK NOMALLOC : blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_dom_error error_blk"); + BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_dom_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREAD: " << ti - << " DOMBLK MALLOC : blk=" << thread_array[ti].error_blk - << " subblk=" << thread_array[ti].error_subblk - << " newsize=" << thread_array[ti].error_info + << " DOMBLK MALLOC : blk=" << thread_array(ti).error_blk + << " subblk=" << thread_array(ti).error_subblk + << " newsize=" << thread_array(ti).error_info << std::endl; } //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array[ti].error_subblk != BASKER_MAX_IDX) + if(thread_array(ti).error_subblk != BASKER_MAX_IDX) { - BASKER_ASSERT(thread_array[ti].error_info > 0, "L) newsize not big enough"); - resize_L = thread_array[ti].error_info; + BASKER_ASSERT(thread_array(ti).error_info > 0, "L) newsize not big enough"); + resize_L = thread_array(ti).error_info; //if L is already bigger and U, //We will want re size U as, well - if(thread_array[ti].error_subblk == 0) + if(thread_array(ti).error_subblk == 0) { - Int blkcol = thread_array[ti].error_blk; + Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; if(LL[blkcol][0].nnz >= LU[blkcol][blkUrow].nnz) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; } }//if - a domain } //We don't care about the other way since, //L is already checked before U. - if(thread_array[ti].error_subblk == -1) + if(thread_array(ti).error_subblk == -1) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; } //Resize L, if resize_L != -1 (meaning realloc-L is requested) @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][thread_array[ti].error_subblk]; + LL[thread_array(ti).error_blk][thread_array(ti).error_subblk]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU[thread_array[ti].error_blk][0]; + LU[thread_array(ti).error_blk][0]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][0]; + LL[thread_array(ti).error_blk][0]; L.clear_pend(); } @@ -163,11 +163,11 @@ namespace BaskerNS { //Clear workspace, whole column for(Int sb = 0; - sb < LL_size(thread_array[ti].error_blk); + sb < LL_size(thread_array(ti).error_blk); sb++) { BASKER_MATRIX &SL = - LL[thread_array[ti].error_blk][sb]; + LL[thread_array(ti).error_blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -198,13 +198,13 @@ namespace BaskerNS }//for - sb (subblks) }//if ws is filled - threads_start(ti) = thread_array[ti].error_blk; + threads_start(ti) = thread_array(ti).error_blk; //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC @@ -231,26 +231,26 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { thread_start(ti) = BASKER_MAX_IDX; continue; } - else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " SEPBLK SINGULAR: blk=" << thread_array[ti].error_blk + << " SEPBLK SINGULAR: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREADS: " << ti - << " SEPBLK NOMALLOC: blk=" << thread_array[ti].error_blk + << " SEPBLK NOMALLOC: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; @@ -260,22 +260,22 @@ namespace BaskerNS Int error_sep_lvl = BASKER_MAX_IDX; for(Int l = 1; l < tree.nlvls+1; l++) { - if(thread_array[ti].error_blk == S[l][ti]) + if(thread_array(ti).error_blk == S(l)(ti)) { error_sep_lvl = l; break; } } - if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_SEP_error error_blk"); + BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_SEP_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti - << " SEPBLK MALLOC: blk=" << thread_array[ti].error_blk - << " subblk=" << thread_array[ti].error_subblk - << " newsize=" << thread_array[ti].error_info + << " SEPBLK MALLOC: blk=" << thread_array(ti).error_blk + << " subblk=" << thread_array(ti).error_subblk + << " newsize=" << thread_array(ti).error_info << std::endl; std::cout << " > SEPLVL: " << error_sep_lvl << std::endl; } @@ -283,9 +283,9 @@ namespace BaskerNS //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array[ti].error_subblk <= -1) + if(thread_array(ti).error_subblk <= -1) { - resize_L = thread_array[ti].error_info; + resize_L = thread_array(ti).error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ L size: " << resize_L << std::endl; @@ -293,9 +293,9 @@ namespace BaskerNS } //We don't care about the other way since, //L is already checked before U. - if(thread_array[ti].error_subblk > -1) + if(thread_array(ti).error_subblk > -1) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ U size: " << resize_U << std::endl; @@ -305,9 +305,9 @@ namespace BaskerNS //Resize L, if resize_L != -1 (meaning realloc-L is requested) if(resize_L != BASKER_MAX_IDX) { - const Int tsb = (-1*thread_array[ti].error_subblk)-1; + const Int tsb = (-1*thread_array(ti).error_subblk)-1; BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][tsb]; + LL[thread_array(ti).error_blk][tsb]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -322,9 +322,9 @@ namespace BaskerNS //Resize U, if resize_U != -1 (meaning realloc-U is requested) if(resize_U != BASKER_MAX_IDX) { - const Int tsb = thread_array[ti].error_subblk; + const Int tsb = thread_array(ti).error_subblk; BASKER_MATRIX &U = - LU[thread_array[ti].error_blk][tsb]; + LU[thread_array(ti).error_blk][tsb]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -346,7 +346,7 @@ namespace BaskerNS //Though this could be done in parallel in the future for(Int p = 0; p < num_threads; p++) { - Int blk = S[0][p]; + Int blk = S(0)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { //Clear workspace, whole column @@ -369,7 +369,7 @@ namespace BaskerNS Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A for(Int p = 0; p < num_threads; p++) { - Int blk = S[error_sep_lvl][p]; + Int blk = S(error_sep_lvl)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { BASKER_MATRIX &TM = LL[blk][0]; @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL[thread_array[ti].error_blk][0]; + LL[thread_array(ti).error_blk][0]; //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -394,12 +394,12 @@ namespace BaskerNS gperm(i) = BASKER_MAX_IDX; }//for--to clear perm - thread_start(ti) = thread_array[ti].error_blk; + thread_start(ti) = thread_array(ti).error_blk; //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; for(Int i = 0; i < num_threads; i++) { @@ -451,9 +451,9 @@ namespace BaskerNS Int btab = btf_tabs_offset; for(Int ti = 0; ti < num_threads; ti++) { - Int c = thread_array[ti].error_blk; + Int c = thread_array(ti).error_blk; //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { if (c >= btab) { thread_start(ti) = BASKER_MAX_IDX; @@ -463,7 +463,7 @@ namespace BaskerNS continue; }//end if NOERROR - if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { @@ -474,7 +474,7 @@ namespace BaskerNS return BASKER_ERROR; }//end if SINGULAR - if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { std::cout << "ERROR_THREADS: " << ti << " DIAGBLK NOMALLOC blk=" << c @@ -482,16 +482,16 @@ namespace BaskerNS return BASKER_ERROR; }//end if NOMALLOC - if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - Int liwork = thread_array[ti].iws_size*thread_array[ti].iws_mult; - Int lework = thread_array[ti].ews_size*thread_array[ti].ews_mult; + Int liwork = thread_array(ti).iws_size*thread_array(ti).iws_mult; + Int lework = thread_array(ti).ews_size*thread_array(ti).ews_mult; BASKER_ASSERT(c >= 0, "nfactor_diag_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti << " DIAGBLK MALLOC blk=" << c - << " newsize=" << thread_array[ti].error_info + << " newsize=" << thread_array(ti).error_info << " for both L( " << c << " ) and U( " << c << " )" << std::endl; @@ -504,11 +504,11 @@ namespace BaskerNS for(Int i = 0; i < liwork; i++) { - thread_array[ti].iws(i) = (Int) 0; + thread_array(ti).iws(i) = (Int) 0; } for(Int i = 0; i < lework; i++) { - thread_array[ti].ews(i) = zero; + thread_array(ti).ews(i) = zero; } //Resize L @@ -516,12 +516,12 @@ namespace BaskerNS L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, - thread_array[ti].error_info); + thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, - thread_array[ti].error_info); - L.mnnz = thread_array[ti].error_info; - L.nnz = thread_array[ti].error_info; + thread_array(ti).error_info); + L.mnnz = thread_array(ti).error_info; + L.nnz = thread_array(ti).error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; @@ -536,12 +536,12 @@ namespace BaskerNS BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, - thread_array[ti].error_info); + thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, - thread_array[ti].error_info); - U.mnnz = thread_array[ti].error_info; - U.nnz = thread_array[ti].error_info; + thread_array(ti).error_info); + U.mnnz = thread_array(ti).error_info; + U.nnz = thread_array(ti).error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; @@ -561,9 +561,9 @@ namespace BaskerNS } //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; @@ -593,7 +593,7 @@ namespace BaskerNS { for(Int ti = 0; ti < num_threads; ti++) { - thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 6613d992dbc2..030d526299a1 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -149,7 +149,7 @@ namespace BaskerNS const Mag normA = BTF_A.gnorm; const Mag normA_blk = BTF_A.anorm; - Int b = S[0][kid]; //Which blk from schedule + Int b = S(0)(kid); //Which blk from schedule BASKER_MATRIX &L = LL[b][0]; BASKER_MATRIX &U = LU[b][LU_size(b)-1]; BASKER_MATRIX &M = ALM[b][0]; //A->blk @@ -159,9 +159,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL[b][0].ews; Int ws_size = LL[b][0].iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif //Int bcol = L.scol; //begining col //NOT UD Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -577,11 +577,11 @@ namespace BaskerNS } } if (!explicit_pivot) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; - thread_array[kid].error_info = k; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; + thread_array(kid).error_info = k; return BASKER_ERROR; } } @@ -676,17 +676,17 @@ namespace BaskerNS (int)kid, (long)b, (long)llnnz, (long)lnnz, (long)lcnt, (int)lnnz, (int)M.nrow, (long)newsize); } - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -701,17 +701,17 @@ namespace BaskerNS (int)kid, (long)b, (long)uunnz, (long)unnz+ucnt, (long)k, (int)uunnz, (int)M.nrow, (int)newsize); } - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = -1; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -981,7 +981,7 @@ namespace BaskerNS ) { //Setup variables - const Int wsb = S[0][kid]; + const Int wsb = S(0)(kid); INT_1DARRAY ws = LL[wsb][l].iws; const Int ws_size = LL[wsb][l].iws_size; @@ -1011,11 +1011,11 @@ namespace BaskerNS ) { const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A - const Int b = S[lvl][kid]; + const Int b = S(lvl)(kid); //const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int U_row = LU_size(U_col)-1; if(lvl > 0) { @@ -1128,8 +1128,8 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset @@ -1279,15 +1279,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL INT_1DARRAY ws = LL[wsb][l].iws; const Int ws_size = LL[wsb][l].iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1452,17 +1452,17 @@ namespace BaskerNS Int k, Int top, Int xnnz) { - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL INT_1DARRAY ws = LL[wsb][l].iws; ENTRY_1DARRAY X = LL[wsb][l].ews; Int ws_size = LL[wsb][l].iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif const Entry zero (0.0); @@ -1658,17 +1658,17 @@ namespace BaskerNS (long)blkcol, (long)blkrow, (long)kid, (long)llnnz, (long)lnnz, (long)p_size ); } - thread_array[kid].error_blk = blkcol; - thread_array[kid].error_subblk = blkrow; + thread_array(kid).error_blk = blkcol; + thread_array(kid).error_subblk = blkrow; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } //BASKER_ASSERT(0==1, "REALLOC LOWER BLOCK\n"); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 48dae30f95c9..cf6fd8b3c0d9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -130,14 +130,14 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_inc_lvl(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; - BASKER_MATRIX &M = ALM[b][0]; //A->blk + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &M = ALM(b)(0); //A->blk - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; Int brow = L.srow; //begining row Int lval = 0; @@ -384,10 +384,10 @@ namespace BaskerNS << pivot << endl; cout << "lcnt: " << lcnt << endl; } - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = b; - thread_array[kid].error_info = k; + thread_array(kid).error_blk = b; + thread_array(kid).error_info = k; return BASKER_ERROR; } @@ -410,17 +410,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -441,17 +441,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -665,9 +665,9 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; INT_1DARRAY ws = LL[wsb][l].iws; @@ -936,12 +936,12 @@ namespace BaskerNS ) { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -985,9 +985,9 @@ namespace BaskerNS //Will want to make this backward in the future //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; INT_1DARRAY ws = LL[wsb][l].iws; @@ -1353,12 +1353,12 @@ namespace BaskerNS //We note that this can be fixed to be faster - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - const Int ws_size = LL[wsb][l].iws_size; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + const Int ws_size = LL(wsb)(l).iws_size; Int brow = L.srow; @@ -1441,12 +1441,12 @@ namespace BaskerNS { //We note that this can be fixed to be faster - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - const Int ws_size = LL[wsb][l].iws_size; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + const Int ws_size = LL(wsb)(l).iws_size; Int brow = L.srow; Int *color = &(ws(0)); @@ -2105,18 +2105,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = blkcol; - thread_array[kid].error_subblk = blkrow; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = blkcol; + thread_array(kid).error_subblk = blkrow; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -2176,18 +2176,18 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_old(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size[b]-1]; + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size[b]-1); #ifdef BASKER_2DL printf("Accessing blk: %d \n", b); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif Int bcol = L.scol; //begining col @@ -2576,15 +2576,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int brow = L.srow; @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM[blkcol][blkrow]; /* @@ -2750,17 +2750,16 @@ namespace BaskerNS BASKER_MATRIX *LPP = &LU[LP_col][0]; if(LP_row != BASKER_MAX_IDX) { - LPP = &(LL[LP_col][LP_row]); + LPP = &(LL(LP_col)(LP_row)); } BASKER_MATRIX &LP = *(LPP); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; - - Int nnz = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; @@ -2948,7 +2947,7 @@ namespace BaskerNS }//over all nonzero in left - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; return; @@ -2969,7 +2968,7 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM[blkcol][blkrow]; INT_1DARRAY ws = LL[X_col][X_row].iws; @@ -3840,9 +3839,9 @@ namespace BaskerNS ) { - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); const Int team_leader = find_leader(kid,sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //Int loop_col_idx = S(l)(kid); //printf("Reduce col fill called, kid: %d leader: %d \n", @@ -3941,12 +3940,12 @@ namespace BaskerNS //printf("===========T ADD ORIG FILL CALLED\n"); const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; //Int X_col = S(0)(leader_id); - Int X_col = S[0][kid]; + Int X_col = S(0)(kid); Int X_row = l+1; @@ -3977,7 +3976,7 @@ namespace BaskerNS //Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; //printf("=***== fill MY ID: %d LEADER ID: %d ===** \n", @@ -3987,7 +3986,7 @@ namespace BaskerNS { Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); /* printf("leader_id: %d kid: %d lvl: %d l: %d blk: %d %d \n", @@ -3996,16 +3995,16 @@ namespace BaskerNS */ Int my_row_leader = find_leader(kid, lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); - Int A_row = (lvl==l)?(2):S[bl][kid]%(LU_size(A_col)); - if((S[bl](kid)>14) && - (S[bl](kid)>LU_size(A_col)) && + Int A_row = (lvl==l)?(2):S(bl)(kid)%(LU_size(A_col)); + if((S(bl)(kid)>14) && + (S(bl)(kid)>LU_size(A_col)) && (lvl != 1)) { - Int tm = (S[bl][kid]+1)/16; - A_row = ((S[bl][kid]+1)-(tm*16))%LU_size(A_col); + Int tm = (S(bl)(kid)+1)/16; + A_row = ((S(bl)(kid)+1)-(tm*16))%LU_size(A_col); } /* diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 9c77c1f38994..68246464f757 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -134,7 +134,7 @@ namespace BaskerNS double barrier_time = 0; #endif - Int U_col = S[lvl][kid]; + Int U_col = S(lvl)(kid); Int U_row = 0; const Int scol = LU[U_col][U_row].scol; @@ -435,8 +435,8 @@ namespace BaskerNS for(Int l = 0; l < lvl; l++) { printf("OPS. KID : %d LVL: %d OPS : %d \n", - kid, l, thread_array[kid].ops_counts[l][0]); - thread_array[kid].ops_count[1][0] = 0; + kid, l, thread_array(kid).ops_counts[l][0]); + thread_array(kid).ops_count[1][0] = 0; } #endif @@ -460,15 +460,15 @@ namespace BaskerNS const Entry zero (0.0); //Get needed variables - const Int L_col = S[l][kid]; - const Int U_col = S[lvl][kid]; + const Int L_col = S(l)(kid); + const Int U_col = S(lvl)(kid); - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); //Int my_new_row = // L_col - my_row_leader; Int U_row = L_col - my_row_leader; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -493,7 +493,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); //printf("Using temp matrix, kid: %d\n", kid); //Bp->print(); } @@ -613,7 +613,7 @@ namespace BaskerNS //Count ops to show imbalance #ifdef BASKER_COUNT_OPS - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif //WE SHOUD DO A UNNZ COUNT @@ -649,17 +649,17 @@ namespace BaskerNS Int newsize = (unnz+U.nrow) * 1.2 ; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; }//if/else realloc } @@ -741,10 +741,10 @@ namespace BaskerNS std::cout << "----Error--- kid = " << kid << ": extra L[" << j << "]=" << X[j] << " with gperm( " << brow_g << " + " << j << " ) = " << t << std::endl; - thread_array[kid].error_type = BASKER_ERROR_OTHER; - thread_array[kid].error_blk = lvl; - thread_array[kid].error_subblk = l; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_OTHER; + thread_array(kid).error_blk = lvl; + thread_array(kid).error_subblk = l; + thread_array(kid).error_info = k; info = BASKER_ERROR; //BASKER_ASSERT(t != BASKER_MAX_IDX, "lower entry in U"); #endif @@ -864,11 +864,11 @@ namespace BaskerNS int lteam_size = pow(2, l); #ifdef BASKER_2DL - Int L_col = S[l][my_leader]; + Int L_col = S(l)(my_leader); Int L_row = 0; - Int U_col = S[lvl][kid]; - Int U_row = (lvl==1)?(kid%2):S[l][kid]%LU_size(U_col); - Int X_col = S[0][my_leader]; + Int U_col = S(lvl)(kid); + Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); + Int X_col = S(0)(my_leader); Int X_row = l; //this will change for us Int col_idx_offset = 0; BASKER_MATRIX &U = LU[U_col][U_row]; @@ -959,12 +959,12 @@ namespace BaskerNS ) { - Int b = S[l][kid]; + Int b = S(l)(kid); BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[team_leader].ews; - Int ws_size = thread_array[kid].iws_size; - Int ews_size = thread_array[team_leader].ews_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(team_leader).ews; + Int ws_size = thread_array(kid).iws_size; + Int ews_size = thread_array(team_leader).ews_size; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid>3) @@ -1066,11 +1066,11 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; //Get needed variables - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = 0; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = LU_size(U_col)-1; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); //Int col_idx_offset = 0; //can we get rid of now? #ifdef BASKER_DEBUG_NFACTOR_COL @@ -1083,7 +1083,7 @@ namespace BaskerNS BASKER_MATRIX &L = LL[L_col][L_row]; BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array[kid].C; + BASKER_MATRIX &B = thread_array(kid).C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1237,7 +1237,7 @@ namespace BaskerNS #endif #ifdef BASKER_OPS_COUNT - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif t_back_solve(kid, lvl,l+1, k, top, xnnz); // note: l not lvl given @@ -1327,10 +1327,10 @@ namespace BaskerNS X(maxindex) = pivot; } else { // replace-tiny-pivot not requested, or the current column is structurally empty after elimination - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = k; return BASKER_ERROR; } } else if (Options.replace_tiny_pivot && normA_blk > abs(zero) && abs(pivot) < normA_blk * sqrt(eps)) { @@ -1374,17 +1374,17 @@ namespace BaskerNS //cout << " > L_col = " << L_col << " L_row = " << L_row << endl; } - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -1399,17 +1399,17 @@ namespace BaskerNS << endl; } - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -1640,11 +1640,11 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? @@ -1743,15 +1743,15 @@ namespace BaskerNS //Setup - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); - const Int leader_idx = S[0][team_leader]; - Int loop_col_idx = S[l][kid]; + const Int leader_idx = S(0)(team_leader); + Int loop_col_idx = S(l)(kid); #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -1909,15 +1909,15 @@ namespace BaskerNS //Setup - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); - const Int leader_idx = S[0][team_leader]; - Int loop_col_idx = S[l][kid]; + const Int leader_idx = S(0)(team_leader); + Int loop_col_idx = S(l)(kid); #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -2073,8 +2073,8 @@ namespace BaskerNS //printf("-----------------copy_update_matrx----------"); //printf("\n\n\n\n"); - Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; //COME BACK HERE @@ -2089,8 +2089,8 @@ namespace BaskerNS // for(Int bl = l+1; bl < last_blk; bl++) { Int bl = l+1; - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); Int CM_idx = kid; BASKER_MATRIX *Bp; @@ -2173,8 +2173,8 @@ namespace BaskerNS // l, last_blk, kid); for(Int bl=l+1; bl BB; - BB.Barrier(thread_array[leader_kid].token[sublvl][function_n], - thread_array[leader_kid].token[sublvl][1], + BB.Barrier(thread_array(leader_kid).token[sublvl][function_n], + thread_array(leader_kid).token[sublvl][1], size); */ } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp index 342835bd640c..4389365a84d6 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp @@ -135,7 +135,7 @@ namespace BaskerNS // 3) t_lower_col_factor : factor A(7,7), sequential // 4) t_lower_col_factor_offdiag2 : compute L(8:end, 7) - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = 0; Int ncol = LU[U_col][U_row].ncol; Int my_leader = find_leader(kid, 0); @@ -181,7 +181,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 0, LU[U_col][U_row].scol, 0); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array[tid].error_type != BASKER_SUCCESS) { + if (thread_array(tid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -250,7 +250,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 3, LU[U_col][U_row].scol, 0); for(Int ti = 0; ti < num_threads; ti++) { - if (thread_array[kid].error_type != BASKER_SUCCESS) { + if (thread_array(kid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -336,7 +336,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 4, k, lvl-1); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array[tid].error_type != BASKER_SUCCESS) { + if (thread_array(tid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -395,7 +395,7 @@ namespace BaskerNS #ifdef BASKER_TIMER double time_factot = timer.seconds(); if((kid%(Int)(pow(2,lvl))) == 0) { - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = LU_size(U_col)-1; printf("Time Lower-Col(%d): %lf, n = %d, nnz(L) = %d, nnz(U) = %d \n", (int)kid, time_factot, @@ -446,7 +446,7 @@ namespace BaskerNS #endif //This will do the correct spmv - if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_upper_col_factor_offdiag2(kid, lvl, sl,l, k, lower); } //Barrier--Start @@ -461,7 +461,7 @@ namespace BaskerNS //Barrier--End if(kid%((Int)pow(2,sl)) == 0 && - thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_dense_blk_col_copy_atomic2(kid, my_leader, lvl, sl, l, k, lower); } @@ -477,7 +477,7 @@ namespace BaskerNS #endif }//over all sublevels - if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_dense_copy_update_matrix2(kid, my_leader, lvl, l, k); } }//end t_add_add @@ -507,10 +507,10 @@ namespace BaskerNS return; } - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; - const Int L_col = S[sl][my_leader]; - const Int U_col = S[lvl][kid]; - const Int X_col = S[0][my_leader]; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + const Int L_col = S(sl)(my_leader); + const Int U_col = S(lvl)(kid); + const Int X_col = S(0)(my_leader); Int L_row = l-sl+1; //Might have to think about th Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us @@ -588,10 +588,10 @@ namespace BaskerNS //Setup //printf("DEBUG, kid: %d k: %d A_col: %d A_row: %d \n", // kid, k, A_col, A_row); - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); #ifdef BASKER_DEBUG_NFACTOR_COL2 if(lower == BASKER_TRUE) { @@ -709,8 +709,8 @@ namespace BaskerNS //printf("\n\n\n\n"); const Entry zero (0.0); - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; //Over each blk @@ -724,10 +724,10 @@ namespace BaskerNS // X += B(:, k) { Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; - Int A_row = S[bl][kid] - my_row_leader; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + Int A_row = S(bl)(kid) - my_row_leader; BASKER_MATRIX *Bp; if(A_row != (LU_size(A_col)-1)) @@ -875,13 +875,13 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; - const Int U_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); + const Int U_col = S(lvl)(leader_id); Int L_row = 0; Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index ee72c5d32c7b..02fde7c7ccad 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -83,7 +83,7 @@ namespace BaskerNS ) { - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int U_row = 0; //const Int scol = LU(U_col)(U_row).scol; @@ -577,12 +577,12 @@ namespace BaskerNS ) { l = l+1; - Int my_token = S[l][kid]; + Int my_token = S(l)(kid); Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S[l][my_loc] != my_token) + if(S(l)(my_loc) != my_token) { my_loc++; break; @@ -615,14 +615,14 @@ namespace BaskerNS //Get needed variables - const Int L_col = S[l][kid]; + const Int L_col = S(l)(kid); // const Int L_row = 0; //NDE - warning: unused - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); //Int my_new_row = // L_col - S(0)(my_row_leader); - Int U_row = L_col - S[0][my_row_leader]; + Int U_row = L_col - S(0)(my_row_leader); /* Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); @@ -642,7 +642,7 @@ namespace BaskerNS //U_row = my_new_row; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -664,7 +664,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); } BASKER_MATRIX &B = *Bp; //if(kid ==0) @@ -920,18 +920,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { //printf("HERE\n"); - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; + thread_array(kid).error_info = newsize; return BASKER_ERROR; }//if/else realloc }//if need to realloc @@ -1086,26 +1086,26 @@ namespace BaskerNS // kid, lvl, sl, l); } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); - if((S[sl][kid] > 14) && - (S[sl][kid] > LU_size(U_col)) && + (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); + if((S(sl)(kid) > 14) && + (S(sl)(kid) > LU_size(U_col)) && (lvl != 1)) { //printf("lower offdiag new num, %d %d \n", // S(sl)(kid), LU_size(U_col)); - Int tm = (S[sl][kid]+1)/16; - U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); + Int tm = (S(sl)(kid)+1)/16; + U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); } //printf("UFF kid:%d U: %d %d new: %d leader: %d %d lvl: %d l: %d sl: %d \n", @@ -1116,7 +1116,7 @@ namespace BaskerNS //JDB PASS TEST U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us //Int col_idx_offset = 0; @@ -1256,18 +1256,18 @@ namespace BaskerNS return; } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); Int U_row = 0; U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us Int col_idx_offset = 0; @@ -1276,11 +1276,11 @@ namespace BaskerNS //Need to give them the output pattern - Int U_pattern_col = S[lvl][kid]; + Int U_pattern_col = S(lvl)(kid); Int my_pattern_leader = find_leader_inc_lvl(kid,l); - Int U_pattern_row = S[l+1][my_pattern_leader] - - S[0][my_row_leader]; + Int U_pattern_row = S(l+1)(my_pattern_leader) - + S(0)(my_row_leader); /* printf("Test mypleader: %d myrowleader: %d kid: %d\n", @@ -1292,7 +1292,7 @@ namespace BaskerNS */ - Int L_pattern_col = S[lvl][kid]; + Int L_pattern_col = S(lvl)(kid); Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -1418,26 +1418,26 @@ namespace BaskerNS return; } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); - if((S[sl][kid] > 14) && - (S[sl][kid] > LU_size(U_col)) && + (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); + if((S(sl)(kid) > 14) && + (S(sl)(kid) > LU_size(U_col)) && (lvl != 1)) { - Int tm = (S[sl][kid]+1)/16; - U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); + Int tm = (S(sl)(kid)+1)/16; + U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); } // printf("lowerspmv kid: %d U: %d %d new %d leader: %d %d lvl: %d %d %d \n", @@ -1448,7 +1448,7 @@ namespace BaskerNS U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us Int col_idx_offset = 0; @@ -1538,8 +1538,8 @@ namespace BaskerNS ) { - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; // Int gbrow = 0; //NDE - warning: unused @@ -1549,11 +1549,11 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); Int A_row = 0; A_row = my_new_row; @@ -1653,8 +1653,8 @@ namespace BaskerNS ) { - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; Int gbrow = 0; @@ -1672,24 +1672,24 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); //Int A_row = my_new_row; - Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); - if((S[bl][kid] > 14) && - (S[bl][kid] > LU_size(A_col)) && + Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); + if((S(bl)(kid) > 14) && + (S(bl)(kid) > LU_size(A_col)) && (lvl != 1)) { //printf("test cm %d %d %d \n", // kid, S(bl)(kid), LU_size(A_col)); - Int tm = (S[bl][kid]+1)/16; - A_row = ((S[bl][kid]+1) - (tm*16))%LU_size(A_col); + Int tm = (S(bl)(kid)+1)/16; + A_row = ((S(bl)(kid)+1) - (tm*16))%LU_size(A_col); } @@ -1883,12 +1883,12 @@ namespace BaskerNS ) { //Get needed variables - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = 0; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = LU_size(U_col)-1; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); //Int col_idx_offset = 0; //can we get rid of now? @@ -1905,7 +1905,7 @@ namespace BaskerNS BASKER_MATRIX &L = LL[L_col][L_row]; BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array[kid].C; + BASKER_MATRIX &B = thread_array(kid).C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -2201,17 +2201,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -2229,16 +2229,16 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -2462,11 +2462,11 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? @@ -2585,9 +2585,9 @@ namespace BaskerNS //const Int lteam_size = pow(2,l+1); //NDE - warning: unused // const Int L_col = S(lvl)(leader_id); //NDE - warning: unused // Int L_row = 0; //NDE - warning: unused - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused @@ -2621,11 +2621,11 @@ namespace BaskerNS const BASKER_BOOL lower ) { - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //If I an not a leader, then need to copy over if(kid != team_leader) @@ -2682,7 +2682,7 @@ namespace BaskerNS - Int U_pattern_col = S[lvl][kid]; + Int U_pattern_col = S(lvl)(kid); Int U_pattern_row = BASKER_MAX_IDX; if(blk == l+1) @@ -2691,11 +2691,11 @@ namespace BaskerNS //S(0)(find_leader(kid,lvl)); //U_pattern_row = S(l+1)(kid) - //S(0)(my_pattern_leader); - U_pattern_row = S[l+1][kid] - - S[0][find_leader(kid,lvl-1)]; + U_pattern_row = S(l+1)(kid) - + S(0)(find_leader(kid,lvl-1)); } - Int L_pattern_col = S[lvl][kid]; + Int L_pattern_col = S(lvl)(kid); Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -2769,11 +2769,11 @@ namespace BaskerNS //BASKER_MATRIX &B = AVM(A_col)(A_col); - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //Int loop_col_idx = S(l)(kid); NU //#ifdef BASKER_DEBUG_NFACTOR_COL2 diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index dc59708fe158..b87a0f48eadf 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -294,9 +294,9 @@ namespace BaskerNS printf("Error: NaN diag in single factor\n"); } } - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } @@ -373,9 +373,9 @@ namespace BaskerNS Mag rmin_ (0.0); //workspace - Int ws_size = thread_array[kid].iws_size; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; + Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -580,9 +580,9 @@ namespace BaskerNS << " Column: " << k << std::endl; } - thread_array[kid].error_type = BASKER_ERROR_NAN; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_NAN; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } absv = abs(value); @@ -714,9 +714,9 @@ namespace BaskerNS pivot = normA_blk * eps; X(maxindex) = pivot; } else { - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } } @@ -780,16 +780,16 @@ namespace BaskerNS (long)btf_tabs(c), (long)btf_tabs(c+1), (long)(btf_tabs(c+1)-btf_tabs(c))); } - thread_array[kid].error_blk = c; + thread_array(kid).error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -804,16 +804,16 @@ namespace BaskerNS printf("blk: %ld column: %ld \n", (long)c, (long)k); } - thread_array[kid].error_blk = c; + thread_array(kid).error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -991,8 +991,8 @@ namespace BaskerNS ) { //printf("=======LOCAL REACH BTF SHORT CALLED (pattern[top=%d - 1] = %d) =====\n",(int)top, (int)j); - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,8 +1014,8 @@ namespace BaskerNS { //printf("=======LOCAL REACH BTF CALLED =====\n"); - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; /*{ printf("ws_size: %d \n", ws_size); @@ -1144,8 +1144,8 @@ namespace BaskerNS ) { - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; /* printf("ws_size: %d \n", ws_size); @@ -1289,9 +1289,9 @@ namespace BaskerNS { const Entry zero (0.0); - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; Int brow = L.srow; Int *color = &(ws(0)); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index d48f0e720a7e..ef9bdb8084ef 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -285,8 +285,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.ncol > 0, "Basker symmetric_sfactor assert: A.ncol malloc > 0 failed"); - MALLOC_INT_1DARRAY(gScol[ii], A.ncol); - init_value(gScol[ii], A.ncol, (Int)0); + MALLOC_INT_1DARRAY(gScol(ii), A.ncol); + init_value(gScol(ii), A.ncol, (Int)0); } @@ -298,8 +298,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.nrow > 0, "sfactor A.nrow malloc"); - MALLOC_INT_1DARRAY(gSrow[ii], A.nrow); - init_value(gSrow[ii], A.nrow, (Int)0); + MALLOC_INT_1DARRAY(gSrow(ii), A.nrow); + init_value(gSrow(ii), A.nrow, (Int)0); } #ifdef BASKER_TIMER @@ -334,7 +334,7 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) #endif { - Int blk = S[0][p]; + Int blk = S(0)(p); if(Options.verbose == BASKER_TRUE) { printf(" ============= DOMAIN BLK (p=%d) ============\n",(int)p); @@ -409,18 +409,18 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) { //Do off diag - Int blk = S[0][p]; + Int blk = S(0)(p); #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; #endif for(Int l =0; l < tree.nlvls; l++) { - Int U_col = S[l+1][p]; + Int U_col = S(l+1)(p); //Note: Need to think more about this flow //Should be subtracted by how many times in the //future - Int my_row_leader = S[0][find_leader(p,l)]; + Int my_row_leader = S(0)(find_leader(p,l)); //Int my_new_row = // blk - my_row_leader; Int U_row = blk-my_row_leader; @@ -436,16 +436,16 @@ int Basker::sfactor() Int off_diag = 1; //printf( " U_blk_sfactor(AVM(%d,%d))\n",U_col,U_row ); //U_blk_sfactor(AV[U_col][U_row], stree, - // gScol[l], gSrow[glvl],0); + // gScol(l), gSrow(glvl),0); #ifdef BASKER_TIMER timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM[U_col][U_row], stree_p, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #else U_blk_sfactor(AVM[U_col][U_row], stree, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER time3 += timer1.seconds(); @@ -533,11 +533,11 @@ int Basker::sfactor() (long)U_col, (long)U_row, (long)lvl, (long)pp); #endif - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, - //gScol[lvl], gSrow[pp]); + //gScol(lvl), gSrow(pp)); #ifdef BASKER_TIMER printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); @@ -545,10 +545,10 @@ int Basker::sfactor() #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; S_blk_sfactor(ALM[U_col][U_row], stree_p, - gScol[lvl], gSrow[pp]); + gScol(lvl), gSrow(pp)); #else S_blk_sfactor(ALM[U_col][U_row], stree, - gScol[lvl], gSrow[pp]); + gScol(lvl), gSrow(pp)); #endif #ifdef BASKER_TIMER printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); @@ -592,20 +592,20 @@ int Basker::sfactor() Int ppp; ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); - U_col = S[l+1][ppp]; - U_row = S[lvl+1][ppp]%LU_size(U_col); + U_col = S(l+1)(ppp); + U_row = S(lvl+1)(ppp)%LU_size(U_col); - Int my_row_leader = S[0][find_leader(ppp,l)]; + Int my_row_leader = S(0)(find_leader(ppp,l)); //Int my_new_row = // S(lvl+1)(ppp) - my_row_leader; - U_row = S[lvl+1][ppp] - my_row_leader; + U_row = S(lvl+1)(ppp) - my_row_leader; #ifdef BASKER_DEBUG_SFACTOR printf("offida sep, lvl: %d l: %d U_col: %d U_row: %d \n", lvl, l, U_col, U_row); @@ -615,10 +615,10 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM[U_col][U_row], stree_p, - gScol[l], gSrow[pp], off_diag); + gScol(l), gSrow(pp), off_diag); #else U_blk_sfactor(AVM[U_col][U_row], stree, - gScol[l], gSrow[pp], off_diag); + gScol(l), gSrow(pp), off_diag); #endif //In symmetric will not need @@ -656,8 +656,8 @@ int Basker::sfactor() for(Int ii = 0 ; ii < split_num; ++ii) { //printf("split\n"); - FREE(gScol[ii]); - FREE(gSrow[ii]); + FREE(gScol(ii)); + FREE(gSrow(ii)); } FREE(gScol); FREE(gSrow); @@ -2589,22 +2589,22 @@ int Basker::sfactor() for(Int i = 0 ; i < num_threads; i++) { - thread_array[i].iws_size = max_blk_size; - thread_array[i].ews_size = max_blk_size; + thread_array(i).iws_size = max_blk_size; + thread_array(i).ews_size = max_blk_size; //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); #ifdef BASKER_TIMER printf("Malloc Thread: %d iws: %d \n", - i, (thread_array[i].iws_size* - thread_array[i].iws_mult)); + i, (thread_array(i).iws_size* + thread_array(i).iws_mult)); printf("Malloc Thread: %d ews: %d \n", - i, (thread_array[i].ews_size* - thread_array[i].ews_mult)); + i, (thread_array(i).ews_size* + thread_array(i).ews_mult)); #endif if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); + MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); } } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index ec7774a43f13..890bc8a17fca 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -99,20 +99,20 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { - Int blk = S[0][p]; + Int blk = S(0)(p); sfactor_nd_dom_estimate(ALM[blk][0], LL[blk][0], LU[blk][LU_size(blk)-1]); for(Int l=0; l < tree.nlvls; l++) { - Int U_col = S[l+1][p]; + Int U_col = S(l+1)(p); Int my_row_leader = find_leader(p,l); Int my_new_row = - blk - S[0][my_row_leader]; + blk - S(0)(my_row_leader); - Int U_row = (l==0)?(p%2):S[0][p]%LU_size(U_col); + Int U_row = (l==0)?(p%2):S(0)(p)%LU_size(U_col); if((blk > 14) && (blk > LU_size(U_col)) && (l!=0)) @@ -138,7 +138,7 @@ namespace BaskerNS for(Int pp=0; pp < pow(tree.nparts, tree.nlvls-lvl-1); pp++) { Int ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; sfactor_nd_sep_estimate(ALM[U_col][U_row], @@ -148,19 +148,19 @@ namespace BaskerNS Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S[l+1][ppp]; + U_col = S(l+1)(ppp); Int my_row_leader = find_leader(ppp,l); Int my_new_row = - S[lvl+1][ppp] - S[0][my_row_leader]; + S(lvl+1)(ppp) - S(0)(my_row_leader); - U_row = S[lvl+1][ppp]%LU_size(U_col); - if((S[lvl+1][ppp] > 14) && - (S[lvl+1][ppp] > LU_size(U_col)) + U_row = S(lvl+1)(ppp)%LU_size(U_col); + if((S(lvl+1)(ppp) > 14) && + (S(lvl+1)(ppp) > LU_size(U_col)) ) { - Int tm = (S[lvl+1][ppp]+1)/16; - U_row = ((S[lvl+1][ppp]+1) - + Int tm = (S(lvl+1)(ppp)+1)/16; + U_row = ((S(lvl+1)(ppp)+1) - (tm*16))%LU_size(U_col); } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index 81e3c78c7f9c..74a478591636 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -118,7 +118,7 @@ namespace BaskerNS for(Int i =0; i < tree.nblks+1; i++) { BASKER_ASSERT(num_threads > 0, "tree num_threads"); - MALLOC_INT_1DARRAY(S[i], num_threads); + MALLOC_INT_1DARRAY(S(i), num_threads); } //this will want to be across all threads @@ -335,7 +335,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -356,7 +356,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -368,11 +368,11 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -592,7 +592,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -611,7 +611,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -624,10 +624,10 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index c55c222ec7c7..193ecb11e24a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -148,13 +148,13 @@ enum BASKER_INCOMPLETE_CODE #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define INT_2DARRAY std::vector -#define ENTRY_2DARRAY std::vector -#define MATRIX_1DARRAY std::vector -#define MATRIX_2DARRAY std::vector -#define MATRIX_VIEW_1DARRAY std::vector -#define MATRIX_VIEW_2DARRAY std::vector -#define THREAD_1DARRAY std::vector +#define INT_2DARRAY Kokkos::View +#define ENTRY_2DARRAY Kokkos::View +#define MATRIX_1DARRAY Kokkos::View +#define MATRIX_2DARRAY Kokkos::View +#define MATRIX_VIEW_1DARRAY Kokkos::View +#define MATRIX_VIEW_2DARRAY Kokkos::View +#define THREAD_1DARRAY Kokkos::View #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +163,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -192,8 +191,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s);*/ \ - a.resize(s); \ + a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -202,7 +200,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -212,8 +209,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = ENTRY_2DARRAY("entry_2d",s);*/ \ - a.resize(s); \ + a = ENTRY_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -222,7 +218,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -232,7 +227,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = BOOL_2DARRAY("bool_2d", s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -242,8 +236,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_1DARRAY("matrix_1d",s)*/ \ - a.resize(s); \ + a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ + Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,8 +246,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_2DARRAY("matrix_2d",s);*/ \ - a.resize(s); \ + a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ + Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -262,8 +256,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s);*/ \ - a.resize(s); \ + a = MATRIX_VIEW_1DARRAY(Kokkos::view_alloc("matrix_view_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -272,8 +265,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s);*/ \ - a.resize(s); \ + a = MATRIX_VIEW_2DARRAY(Kokkos::view_alloc("matrix_view_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -282,26 +274,12 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = THREAD_1DARRAY("thread_1d",s);*/ \ - a.resize(s); \ + a = THREAD_1DARRAY(Kokkos::view_alloc("thread_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ } -//RESIZE (with copy) -#define RESIZE_1DARRAY(a,os,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT RESIZE 1D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s); \ - } -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) \ - { \ - BASKER_ASSERT(s1 >= 0 && s2 >= 0, "BASKER ASSERT RESIZE 2D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s1,s2); \ - } -#define RESIZE_INT_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) -#define RESIZE_ENTRY_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) //REALLOC (no copy) #define REALLOC_1DARRAY(a,os,s) \ { \ @@ -315,6 +293,7 @@ enum BASKER_INCOMPLETE_CODE } #define REALLOC_INT_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) #define REALLOC_ENTRY_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) + //Set values #define SET_INT_1DARRAY(a, b, s) \ { \ @@ -341,80 +320,67 @@ enum BASKER_INCOMPLETE_CODE #define FREE_INT_1DARRAY_PAIRS(a) \ { \ - /*a = INT_1DARRAY_PAIRS();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_1DARRAY(a) \ { \ - /*a = INT_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_RANK2DARRAY(a) \ { \ - /*a = INT_RANK2DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_2DARRAY(a,n) \ { \ - /*a = INT_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_ENTRY_1DARRAY(a) \ { \ - /*a = ENTRY_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_ENTRY_2DARRAY(a,n) \ { \ - /*a = ENTRY_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_BOOL_1DARRAY(a) \ { \ - /*a = BOOL_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_BOOL_2DARRAY(a,n) \ { \ - /*a = BOOL_2DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_MATRIX_1DARRAY(a) \ { \ - /*a = MATRIX_1DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_2DARRAY(a,n) \ { \ - /*a = MATRIX_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ { \ - /*a = MATRIX_VIEW_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_2DARRAY(a,n) \ { \ - /*a = MATRIX_VIEW_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_THREAD_1DARRAY(a) \ { \ - /*a = THREAD_1DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #else // not BASKER_KOKKOS @@ -456,11 +422,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -//Resize (copy old data) (come back and add) -#define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP -#define RESIZE_INT_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_ENTRY_1DARRAY(a,os,s) BASKER_NO_OP //Realloc (dont copy old data) #define REALLOC_1DARRAY(a,os,s) BASKER_NO_OP #define REALLOC_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 3691d73c63ba..715ac1c13f5f 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -358,7 +358,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -378,7 +378,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -402,13 +402,13 @@ namespace BaskerNS LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S[0][my_row_leader]; + b - S(0)(my_row_leader); - Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); //JDB TEST PASS U_row = my_new_row; @@ -460,7 +460,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -524,7 +524,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -550,13 +550,13 @@ namespace BaskerNS for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S[0][my_row_leader]; + b - S(0)(my_row_leader); - Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); if( (b > 14) && // NDE: Why is 14 specifically used here? (b > LU_size(U_col)) && @@ -640,7 +640,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -692,7 +692,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", @@ -731,10 +731,10 @@ namespace BaskerNS //TEST Int my_leader = find_leader(kid,l-1); - Int my_leader_row = S[0][my_leader]; + Int my_leader_row = S(0)(my_leader); //Int my_col_size = pow(2,l); Not used Int my_new_row = - (S[lvl][kid] - my_leader_row); + (S(lvl)(kid) - my_leader_row); //my_new_row = my_new_row%my_col_size; /* @@ -745,7 +745,7 @@ namespace BaskerNS my_col_size, my_new_row); */ - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int U_row = my_new_row; //Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); @@ -823,7 +823,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int l = 0; l < LL_size(b); l++) { @@ -885,7 +885,7 @@ namespace BaskerNS //printf("C: size: %d kid: %d \n", // iws_size, kid); - //thread_array[kid].C.init_matrix("cwork", + //thread_array(kid).C.init_matrix("cwork", // 0, iws_size, // 0, 2, // iws_size*2); @@ -895,7 +895,7 @@ namespace BaskerNS } //Also workspace matrix //This could be made smaller - thread_array[kid].C.init_matrix("cwork", 0, max_sep_size, + thread_array(kid).C.init_matrix("cwork", 0, max_sep_size, 0, 2, max_sep_size*2); } //end if btf_tabs_offset != 0 @@ -905,19 +905,19 @@ namespace BaskerNS { // if any left over for BLK factorization if(Options.btf == BASKER_TRUE) { - Int iws_mult = thread_array[kid].iws_mult; - Int iws_size = thread_array[kid].iws_size; - Int ews_mult = thread_array[kid].ews_mult; - Int ews_size = thread_array[kid].ews_size; + Int iws_mult = thread_array(kid).iws_mult; + Int iws_size = thread_array(kid).iws_size; + Int ews_mult = thread_array(kid).ews_mult; + Int ews_size = thread_array(kid).ews_size; for(Int i=0; i < iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0.0; + thread_array(kid).ews[i] = 0.0; } } } @@ -928,23 +928,23 @@ namespace BaskerNS { if(btf_tabs_offset != 0) { - INT_1DARRAY &ws = thread_array[kid].iws; - ENTRY_1DARRAY &X = thread_array[kid].ews; - Int iws_size = thread_array[kid].iws_size; - Int iws_mult = thread_array[kid].iws_mult; - Int ews_size = thread_array[kid].ews_size; - Int ews_mult = thread_array[kid].ews_mult; + INT_1DARRAY &ws = thread_array(kid).iws; + ENTRY_1DARRAY &X = thread_array(kid).ews; + Int iws_size = thread_array(kid).iws_size; + Int iws_mult = thread_array(kid).iws_mult; + Int ews_size = thread_array(kid).ews_size; + Int ews_mult = thread_array(kid).ews_mult; } } printf("init_workspace 1d, kid: %d size: %d %d %d %d \n", kid, iws_mult, iws_size, ews_mult, ews_size); for(Int i=0; i< iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0; + thread_array(kid).ews[i] = 0; } #endif //endif def basker_2dl //return 0; @@ -2467,12 +2467,12 @@ namespace BaskerNS Int Basker::find_leader(Int kid, Int l) { l = l+1; - Int my_token = S[l][kid]; + Int my_token = S(l)(kid); Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S[l][my_loc] != my_token) + if(S(l)(my_loc) != my_token) { my_loc++; break; From da3a195e7b4c9753da8628a830fcd57617b975f9 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 25 Oct 2024 20:27:04 -0600 Subject: [PATCH 11/20] ShyLU - Basker : replace brackes back to parenthesis Signed-off-by: iyamazaki --- .../basker/src/shylubasker_error_manager.hpp | 26 +-- .../basker/src/shylubasker_nfactor_blk.hpp | 94 ++++---- .../src/shylubasker_nfactor_blk_inc.hpp | 144 ++++++------ .../basker/src/shylubasker_nfactor_col.hpp | 180 +++++++-------- .../basker/src/shylubasker_nfactor_col2.hpp | 40 ++-- .../src/shylubasker_nfactor_col_inc.hpp | 110 ++++----- .../basker/src/shylubasker_nfactor_diag.hpp | 8 +- .../basker/src/shylubasker_sfactor.hpp | 86 +++---- .../basker/src/shylubasker_sfactor_inc.hpp | 28 +-- .../basker/src/shylubasker_solve_rhs.hpp | 16 +- .../basker/src/shylubasker_solve_rhs_tr.hpp | 16 +- .../basker/src/shylubasker_stats.hpp | 10 +- .../basker/src/shylubasker_tree.hpp | 28 +-- .../basker/src/shylubasker_util.hpp | 214 +++++++++--------- 14 files changed, 490 insertions(+), 510 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index d9695c6e5c78..cd2c9f57bf0a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -94,8 +94,8 @@ namespace BaskerNS { Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; - if(LL[blkcol][0].nnz >= - LU[blkcol][blkUrow].nnz) + if(LL(blkcol)(0).nnz >= + LU(blkcol)(blkUrow).nnz) { resize_U = thread_array(ti).error_info; } @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][thread_array(ti).error_subblk]; + LL(thread_array(ti).error_blk)(thread_array(ti).error_subblk); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU[thread_array(ti).error_blk][0]; + LU(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][0]; + LL(thread_array(ti).error_blk)(0); L.clear_pend(); } @@ -167,7 +167,7 @@ namespace BaskerNS sb++) { BASKER_MATRIX &SL = - LL[thread_array(ti).error_blk][sb]; + LL(thread_array(ti).error_blk)(sb); for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -307,7 +307,7 @@ namespace BaskerNS { const Int tsb = (-1*thread_array(ti).error_subblk)-1; BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][tsb]; + LL(thread_array(ti).error_blk)(tsb); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -324,7 +324,7 @@ namespace BaskerNS { const Int tsb = thread_array(ti).error_subblk; BASKER_MATRIX &U = - LU[thread_array(ti).error_blk][tsb]; + LU(thread_array(ti).error_blk)(tsb); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -352,7 +352,7 @@ namespace BaskerNS //Clear workspace, whole column for(Int sb = 0; sb < LL_size(blk); sb++) { - BASKER_MATRIX &SL = LL[blk][sb]; + BASKER_MATRIX &SL = LL(blk)(sb); for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -372,7 +372,7 @@ namespace BaskerNS Int blk = S(error_sep_lvl)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { - BASKER_MATRIX &TM = LL[blk][0]; + BASKER_MATRIX &TM = LL(blk)(0); //printf( " > p=%d: scol_top = %d, scol = %d, ncol = %d\n",p,scol_top,TM.scol,TM.ncol ); for(Int i = scol_top + TM.scol; i < scol_top + (TM.scol+TM.ncol); i++) { @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL[thread_array(ti).error_blk][0]; + LL(thread_array(ti).error_blk)(0); //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -512,7 +512,7 @@ namespace BaskerNS } //Resize L - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, @@ -533,7 +533,7 @@ namespace BaskerNS } //Resize U - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 030d526299a1..2e0434796e33 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -150,14 +150,14 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; Int b = S(0)(kid); //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; - BASKER_MATRIX &M = ALM[b][0]; //A->blk + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &M = ALM(b)(0); //A->blk #ifdef BASKER_2DL //printf("Accessing blk: %d kid: %d \n", b, kid); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(kid).ews; @@ -983,8 +983,8 @@ namespace BaskerNS //Setup variables const Int wsb = S(0)(kid); - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,7 +1014,7 @@ namespace BaskerNS const Int b = S(lvl)(kid); //const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); const Int U_col = S(lvl)(kid); Int U_row = LU_size(U_col)-1; if(lvl > 0) @@ -1022,7 +1022,7 @@ namespace BaskerNS //U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); } - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int brow = L.srow; @@ -1130,12 +1130,12 @@ namespace BaskerNS //Setup variables const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -1281,10 +1281,10 @@ namespace BaskerNS //Setup variables const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; #else INT_1DARRAY ws = thread_array(kid).iws; Int ws_size = thread_array(kid).iws_size; @@ -1454,11 +1454,11 @@ namespace BaskerNS { const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + Int ws_size = LL(wsb)(l).iws_size; #else INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(kid).ews; @@ -1534,10 +1534,10 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; //const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; @@ -1607,8 +1607,8 @@ namespace BaskerNS if (blkcol == 2 && blkrow == 1) printf( " L.colptr(%d) = %d\n",k+1,lnnz ); #endif - //LL[X_col][X_row].p_size = 0; - LL[X_col][X_row].p_size = 0; + //LL(X_col)(X_row).p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_dense_offdiag_mov_L() @@ -1623,12 +1623,12 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; - const Int p_size = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; + const Int p_size = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -1714,7 +1714,7 @@ namespace BaskerNS } #endif - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_offdiag_mov_L() @@ -1733,17 +1733,17 @@ namespace BaskerNS BASKER_BOOL A_option) { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //printf( " t_dense_back_solve_offdiag( LL(%d,%d) and ALM(%d,%d)\n", blkcol,blkrow,blkcol,blkrow ); #ifdef BASKER_DEBUG_NFACTOR_BLK - Int ws_size = LL[X_col][X_row].iws_size; + Int ws_size = LL(X_col)(X_row).iws_size; const Int brow = L.srow; const Int bcol = L.scol; printf("\n\n"); @@ -1831,8 +1831,8 @@ namespace BaskerNS }//over all nonzero in left #ifdef BASKER_2DL - //LL[X_col][X_row].p_size = nnz; - LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug @@ -1878,14 +1878,14 @@ namespace BaskerNS { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -2056,8 +2056,8 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - //LL[X_col][X_row].p_size = nnz; - LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index cf6fd8b3c0d9..c9e696f50786 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -670,8 +670,8 @@ namespace BaskerNS BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -990,8 +990,8 @@ namespace BaskerNS BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1555,14 +1555,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK printf("t_back_solve_diag, kid: %d blkcol: %d blkrow: %d \n", @@ -1696,7 +1696,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return; @@ -1717,14 +1717,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; //Int bcol = L.scol; @@ -1846,7 +1846,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -1869,14 +1869,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; Int brow = L.srow; Int bcol = L.scol; @@ -2046,7 +2046,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -2065,12 +2065,12 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; - const Int p_size = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; + const Int p_size = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -2155,14 +2155,14 @@ namespace BaskerNS //Fix later if(Options.same_pattern == BASKER_FALSE) { - for(Int i = 0; i < LL[X_col][X_row].nrow; i++) + for(Int i = 0; i < LL(X_col)(X_row).nrow; i++) { stack[i] = BASKER_MAX_IDX; } } L.col_ptr(k+1) = lnnz; - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_offdiag_mov_L_inc_lvl() @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); /* @@ -2740,14 +2740,14 @@ namespace BaskerNS LP_col, LP_row, kid); */ - BASKER_MATRIX *UPP = &LU[UP_col][0]; + BASKER_MATRIX *UPP = &LU(UP_col)(0); if(UP_row != BASKER_MAX_IDX) { - UPP = &(LU[UP_col][UP_row]); + UPP = &(LU(UP_col)(UP_row)); } BASKER_MATRIX &UP = *(UPP); - BASKER_MATRIX *LPP = &LU[LP_col][0]; + BASKER_MATRIX *LPP = &LU(LP_col)(0); if(LP_row != BASKER_MAX_IDX) { LPP = &(LL(LP_col)(LP_row)); @@ -2968,14 +2968,14 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -3105,7 +3105,7 @@ namespace BaskerNS */ - Int temp = INC_LVL_TEMP(k_i+LL[blkcol][0].srow) + L.inc_lvl(j) + 1; + Int temp = INC_LVL_TEMP(k_i+LL(blkcol)(0).srow) + L.inc_lvl(j) + 1; /* printf("lower row: %d kid: %d inc: %d %d %d j: %d \n", @@ -3182,7 +3182,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug @@ -3218,11 +3218,11 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; //NDE - warning: unused @@ -3295,7 +3295,7 @@ namespace BaskerNS } L.col_ptr(k+1) = lnnz; - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_dense_offdiag_mov_L_inv_lvl() @@ -3314,12 +3314,12 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; @@ -3438,11 +3438,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3575,11 +3575,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3757,16 +3757,16 @@ namespace BaskerNS BASKER_MATRIX *B; if(lower == BASKER_TRUE) { - B = &(ALM[blkcol][blkrow]); + B = &(ALM(blkcol)(blkrow)); } else { - B = &(AVM[blkcol][blkrow]); + B = &(AVM(blkcol)(blkrow)); } BASKER_MATRIX &M = *B; //BASKER_MATRIX &M = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -3856,12 +3856,12 @@ namespace BaskerNS for(Int blk = l+1; blk < endblk; ++blk) { // ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &wsL = LL[leader_idx][blk].iws; + INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //Int p_sizeL = LL(leader_idx)(blk).p_size; - Int ws_sizeL = LL[leader_idx][blk].iws_size; + Int ws_sizeL = LL(leader_idx)(blk).iws_size; // ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &ws = LL[my_idx][blk].iws; - const Int ws_size = LL[my_idx][blk].iws_size; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; + const Int ws_size = LL(my_idx)(blk).iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -3874,7 +3874,7 @@ namespace BaskerNS Int *stackL = &(patternL[ws_sizeL]); //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { //if(kid==3) // { diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 68246464f757..289ee65f7ccd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -137,9 +137,9 @@ namespace BaskerNS Int U_col = S(lvl)(kid); Int U_row = 0; - const Int scol = LU[U_col][U_row].scol; - const Int ecol = LU[U_col][U_row].ecol; - const Int ncol = LU[U_col][U_row].ncol; + const Int scol = LU(U_col)(U_row).scol; + const Int ecol = LU(U_col)(U_row).ecol; + const Int ncol = LU(U_col)(U_row).ncol; //for(Int k = scol; k < ecol; k++) //might have to use k+scol for barrier @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -488,7 +488,7 @@ namespace BaskerNS //if(sep_flg == BASKER_FALSE) if(l == 0) { - Bp = &(AVM[U_col][U_row]); + Bp = &(AVM(U_col)(U_row)); //bbcol = Bp->scol; } else @@ -503,9 +503,9 @@ namespace BaskerNS // kid, X_col, X_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -871,16 +871,16 @@ namespace BaskerNS Int X_col = S(0)(my_leader); Int X_row = l; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); const Int bcol = U.scol; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only work with with 2D layout"); #endif #ifdef BASKER_2DL - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only works with 2D layout"); #endif @@ -960,7 +960,7 @@ namespace BaskerNS { Int b = S(l)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(team_leader).ews; Int ws_size = thread_array(kid).iws_size; @@ -1080,8 +1080,8 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); BASKER_MATRIX &B = thread_array(kid).C; @@ -1098,9 +1098,9 @@ namespace BaskerNS //B.print(); - INT_1DARRAY ws = LL[X_col][l+1].iws; - const Int ws_size = LL[X_col][l+1].iws_size; - ENTRY_1DARRAY X = LL[X_col][l+1].ews; + INT_1DARRAY ws = LL(X_col)(l+1).iws; + const Int ws_size = LL(X_col)(l+1).iws_size; + ENTRY_1DARRAY X = LL(X_col)(l+1).ews; Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -1648,12 +1648,12 @@ namespace BaskerNS Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; //U.fill(); + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //U.fill(); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int bcol = U.scol; @@ -1746,7 +1746,7 @@ namespace BaskerNS Int A_col = S(lvl)(kid); Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); - BASKER_MATRIX &B = AVM[A_col][A_col]; + BASKER_MATRIX &B = AVM(A_col)(A_col); const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); @@ -1769,17 +1769,17 @@ namespace BaskerNS //Split over threads (leader and nonleader) for(Int blk=l+1; blk Accumulate the update from (l-1)th level: // LU(U_col)(U_row) -= L(U_col)(l-1) * U(l-1)(U_row) t_add_extend(thread, kid, lvl, l-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_FALSE); if(kid%((Int)pow(2, l)) == 0) @@ -248,7 +248,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier(thread, kid, my_leader, - b_size, 3, LU[U_col][U_row].scol, 0); + b_size, 3, LU(U_col)(U_row).scol, 0); for(Int ti = 0; ti < num_threads; ti++) { if (thread_array(kid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; @@ -287,7 +287,7 @@ namespace BaskerNS printf( " kid=%d: calling t_add_extend(k=%d/%d)\n",kid,k,ncol ); fflush(stdout); #endif t_add_extend(thread, kid,lvl,lvl-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_TRUE); } #ifdef BASKER_TIMER @@ -515,7 +515,7 @@ namespace BaskerNS Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); #ifdef BASKER_DEBUG_NFACTOR_COL2 if(L_row >= LL_size(L_col)) { @@ -609,10 +609,10 @@ namespace BaskerNS Int endblk = (lower)?(LL_size(my_idx)):(l+2); for(Int blk = l+1; blk < endblk; ++blk) { - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; - Int p_sizeL = LL[leader_idx][blk].p_size; - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + Int p_sizeL = LL(leader_idx)(blk).p_size; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; Int *color = &(ws[0]); //printf( " + t_dense_blk_col_copy_atomic2(kid=%d: LL(%d)(%d) += LL(%d)(%d)\n",kid,leader_idx, blk,my_idx,blk); @@ -629,7 +629,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { color[jj] = 0; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -677,7 +677,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; } else { @@ -685,7 +685,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL[leader_idx][blk].p_size = p_sizeL; + LL(leader_idx)(blk).p_size = p_sizeL; //p_size = 0; //not needed }//over all blks } @@ -735,12 +735,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("copy, kid: %d bl: %d A: %d %d \n", @@ -749,7 +749,7 @@ namespace BaskerNS // X += B(:, k) BASKER_MATRIX &B = *Bp; - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; //printf( " -- t_dense_copy_update_matrix2(kid=%d: LL(%d)(%d) += B)\n",kid,leader_idx,bl ); //printf("ADDING UPDATES TO B\n"); //B.info(); @@ -800,9 +800,9 @@ namespace BaskerNS //For recounting patterns in dense blk //Need better sparse update - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; - const Int nrow = LL[leader_idx][bl].nrow; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; + const Int nrow = LL(leader_idx)(bl).nrow; Int *color = &(ws(0)); #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("moving, kid: %d A: %d %d %d %d p_size: %d \n", @@ -886,7 +886,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 02fde7c7ccad..c6ddadf55092 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -101,7 +101,7 @@ namespace BaskerNS //for(Int k = 0; k < 1; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -148,7 +148,7 @@ namespace BaskerNS //barrier k = 0 usedl1 t_basker_barrier_inc_lvl(thread,kid,my_leader, - b_size, 0, LU[U_col][U_row].scol, 0); + b_size, 0, LU(U_col)(U_row).scol, 0); //printf("1 kid: %d error_leader: %d lvl: %d \n", kid, error_leader, lvl); BASKER_BOOL error_flag = BASKER_FALSE; basker_barrier.ExitGet(error_leader, error_flag); @@ -172,7 +172,7 @@ namespace BaskerNS { //for(Int k = 2; k < 3; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -181,7 +181,7 @@ namespace BaskerNS #endif t_add_extend_inc_lvl(thread, kid,lvl,l-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_FALSE); //where to start again @@ -234,7 +234,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier_inc_lvl(thread, kid, my_leader, - b_size, 7, LU[U_col][U_row].scol, 0); + b_size, 7, LU(U_col)(U_row).scol, 0); #ifdef BASKER_DEBUG_NFACTOR_COL_INC if(kid == 0) @@ -248,7 +248,7 @@ namespace BaskerNS //if(lvl < 2) { //for(Int k=0; k < 1; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -259,7 +259,7 @@ namespace BaskerNS //printf("test: %d \n", LU(U_col)(U_row).scol); t_add_extend_inc_lvl(thread, kid,lvl,lvl-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_TRUE); Entry pivot = 0; if((kid%(Int)(pow(2,lvl))) == 0) @@ -654,13 +654,13 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; if(l == 0) { - Bp = &(AVM[U_col][U_row]); + Bp = &(AVM(U_col)(U_row)); } else { @@ -674,9 +674,9 @@ namespace BaskerNS // } //B.print(); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -1121,7 +1121,7 @@ namespace BaskerNS //Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1272,7 +1272,7 @@ namespace BaskerNS Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Need to give them the output pattern @@ -1453,7 +1453,7 @@ namespace BaskerNS Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1564,12 +1564,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1580,10 +1580,10 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; Int *color = &(ws(0)); - LL[leader_idx][bl].p_size = 0; + LL(leader_idx)(bl).p_size = 0; //Get the columns pattern Int U_pattern_col = A_col; @@ -1606,7 +1606,7 @@ namespace BaskerNS //Copy into C - BASKER_MATRIX &Up = LU[U_pattern_col][U_pattern_row]; + BASKER_MATRIX &Up = LU(U_pattern_col)(U_pattern_row); for(Int i = Up.col_ptr(k); i < Up.col_ptr(k+1); i++) { const Int j = Up.row_idx(i); @@ -1620,7 +1620,7 @@ namespace BaskerNS //if there is a L if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &Lp = LL[L_pattern_col][L_pattern_row]; + BASKER_MATRIX &Lp = LL(L_pattern_col)(L_pattern_row); for(Int i = Lp.col_ptr(k)+1; i < Lp.col_ptr(k+1);i++) { const Int j = Lp.row_idx(i); @@ -1708,12 +1708,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1724,8 +1724,8 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; //const Int brow = LL(leader_idx)(bl).srow; //const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; @@ -1789,11 +1789,11 @@ namespace BaskerNS //Int CM_idx = kid; - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; - const Int ws_size = LL[leader_idx][bl].ews_size; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; + const Int ws_size = LL(leader_idx)(bl).ews_size; // const Int brow = LL(leader_idx)(bl).srow; //NU //NDE - warning: unused - const Int nrow = LL[leader_idx][bl].nrow; + const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk @@ -1902,8 +1902,8 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); BASKER_MATRIX &B = thread_array(kid).C; @@ -1926,9 +1926,9 @@ namespace BaskerNS } */ - INT_1DARRAY ws = LL[X_col][l+1].iws; - const Int ws_size = LL[X_col][l+1].iws_size; - ENTRY_1DARRAY X = LL[X_col][l+1].ews; + INT_1DARRAY ws = LL(X_col)(l+1).iws; + const Int ws_size = LL(X_col)(l+1).iws_size; + ENTRY_1DARRAY X = LL(X_col)(l+1).ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -2471,11 +2471,11 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; + INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; //const Int brow = U.srow; //const Int bcol = U.scol; @@ -2592,11 +2592,11 @@ namespace BaskerNS //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; + INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; if(kid == leader_id) { @@ -2636,15 +2636,15 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused // Int p_sizeL = LL(leader_idx)(blk).p_size; //NDE - warning: unused // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; // const Int ws_size = LL(my_idx)(blk).iws_size; //NDE - warning: unused //Int p_size = LL(my_idx)(blk).p_size; - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; Int *color = &(ws[0]); // Int *pattern = &(color[ws_size]); //NDE - warning: unused // Int *stack = &(pattern[ws_size]); //NDE - warning: unused @@ -2716,7 +2716,7 @@ namespace BaskerNS if(U_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &UP = LU[U_pattern_col][U_pattern_row]; + BASKER_MATRIX &UP = LU(U_pattern_col)(U_pattern_row); for(Int jj = UP.col_ptr(k); jj < UP.col_ptr(k+1); @@ -2730,7 +2730,7 @@ namespace BaskerNS }//if UPattern if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &LP = LL[L_pattern_col][L_pattern_row]; + BASKER_MATRIX &LP = LL(L_pattern_col)(L_pattern_row); for(Int jj = LP.col_ptr(k); jj < LP.col_ptr(k+1); jj++) @@ -2807,13 +2807,13 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused - Int p_sizeL = LL[leader_idx][blk].p_size; + Int p_sizeL = LL(leader_idx)(blk).p_size; // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; - const Int ws_size = LL[my_idx][blk].iws_size; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; + const Int ws_size = LL(my_idx)(blk).iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -2845,7 +2845,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { color[jj] = 0; @@ -2910,7 +2910,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; } else { @@ -2918,7 +2918,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL[leader_idx][blk].p_size = p_sizeL; + LL(leader_idx)(blk).p_size = p_sizeL; //p_size = 0; NOT USED }//over all blks } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index b87a0f48eadf..ccbd5a33b827 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -258,8 +258,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); Int k = btf_tabs(c); Int bcol = M.scol; @@ -336,8 +336,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); Int bcol = M.scol; //JDB: brow hack: fix. diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index ef9bdb8084ef..c955ff952551 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -349,35 +349,35 @@ int Basker::sfactor() #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; - e_tree (ALM[blk][0], stree_p, 1); + e_tree (ALM(blk)(0), stree_p, 1); #else - e_tree (ALM[blk][0], stree, 1); + e_tree (ALM(blk)(0), stree, 1); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - post_order(ALM[blk][0], stree_p); + post_order(ALM(blk)(0), stree_p); #else - post_order(ALM[blk][0], stree); + post_order(ALM(blk)(0), stree); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - col_count (ALM[blk][0], stree_p); + col_count (ALM(blk)(0), stree_p); #else - col_count (ALM[blk][0], stree); + col_count (ALM(blk)(0), stree); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif //Assign nnz here - //leaf_assign_nnz(LL[blk][0], stree, 0); - //leaf_assign_nnz(LU[blk][LU_size[blk]-1], stree, 0); + //leaf_assign_nnz(LL(blk)(0), stree, 0); + //leaf_assign_nnz(LU(blk)(LU_size[blk]-1), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); @@ -387,11 +387,11 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - leaf_assign_nnz(LL[blk][0], stree_p, 0); - leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree_p, 0); + leaf_assign_nnz(LL(blk)(0), stree_p, 0); + leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree_p, 0); #else - leaf_assign_nnz(LL[blk][0], stree, 0); - leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree, 0); + leaf_assign_nnz(LL(blk)(0), stree, 0); + leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); @@ -441,10 +441,10 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM[U_col][U_row], stree_p, + U_blk_sfactor(AVM(U_col)(U_row), stree_p, gScol(l), gSrow(glvl), off_diag); #else - U_blk_sfactor(AVM[U_col][U_row], stree, + U_blk_sfactor(AVM(U_col)(U_row), stree, gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER @@ -460,8 +460,8 @@ int Basker::sfactor() // stree, gScol, gSrow); //Assign nnz counts for leaf off-diag - //U_assign_nnz(LU[U_col][U_row], stree, 0); - //L_assign_nnz(LL[blk][l+1], stree, 0); + //U_assign_nnz(LU(U_col)(U_row), stree, 0); + //L_assign_nnz(LL(blk)(l+1), stree, 0); #ifdef BASKER_TIMER timer1.reset(); #endif @@ -472,11 +472,11 @@ int Basker::sfactor() printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); - L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); + L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); #else - U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); - L_assign_nnz(LL[blk][l+1], stree, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); + L_assign_nnz(LL(blk)(l+1), stree, fill_factor, 0); #endif #ifdef BASKER_TIMER time2 += timer1.seconds(); @@ -540,43 +540,43 @@ int Basker::sfactor() //gScol(lvl), gSrow(pp)); #ifdef BASKER_TIMER - printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); fflush(stdout); #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; - S_blk_sfactor(ALM[U_col][U_row], stree_p, + S_blk_sfactor(ALM(U_col)(U_row), stree_p, gScol(lvl), gSrow(pp)); #else - S_blk_sfactor(ALM[U_col][U_row], stree, + S_blk_sfactor(ALM(U_col)(U_row), stree, gScol(lvl), gSrow(pp)); #endif #ifdef BASKER_TIMER - printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); + printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); fflush(stdout); #endif - //S_assign_nnz(LL[U_col][U_row], stree, 0); + //S_assign_nnz(LL(U_col)(U_row), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LL[U_col][U_row], stree_p, 0); + S_assign_nnz(LL(U_col)(U_row), stree_p, 0); #else - S_assign_nnz(LL[U_col][U_row], stree, 0); + S_assign_nnz(LL(U_col)(U_row), stree, 0); #endif - //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); + //S_assign_nnz(LU(U_col)(LU_size[U_col]-1), stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree_p, 0); + S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); #else - S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree, 0); + S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); #endif #ifdef BASKER_TIMER - printf( " >>> -> nnz = %d\n",LU[U_col][LU_size(U_col)-1].nnz); fflush(stdout); + printf( " >>> -> nnz = %d\n",LU(U_col)(LU_size(U_col)-1).nnz); fflush(stdout); #endif } #ifdef SHYLU_BASKER_STREE_LIST @@ -614,10 +614,10 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM[U_col][U_row], stree_p, + U_blk_sfactor(AVM(U_col)(U_row), stree_p, gScol(l), gSrow(pp), off_diag); #else - U_blk_sfactor(AVM[U_col][U_row], stree, + U_blk_sfactor(AVM(U_col)(U_row), stree, gScol(l), gSrow(pp), off_diag); #endif @@ -638,11 +638,11 @@ int Basker::sfactor() fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); - L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); + L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); #else - U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); - L_assign_nnz(LL[inner_blk][l-lvl], stree, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); + L_assign_nnz(LL(inner_blk)(l-lvl), stree, fill_factor, 0); #endif //printf("Here 1 \n"); } @@ -2491,7 +2491,7 @@ int Basker::sfactor() #ifdef BASKER_TIMER printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - L_D[i].init_matrix("LBFT", + L_D(i).init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2499,12 +2499,12 @@ int Basker::sfactor() nnz); //For pruning - L_D[i].init_pend(); + L_D(i).init_pend(); #ifdef BASKER_TIMER printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - U_D[i].init_matrix("UBFT", + U_D(i).init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2546,7 +2546,7 @@ int Basker::sfactor() #ifdef BASKER_TIMER printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - LBTF[i-btf_tabs_offset].init_matrix("LBFT", + LBTF(i-btf_tabs_offset).init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2555,12 +2555,12 @@ int Basker::sfactor() //For pruning //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); - LBTF[i-btf_tabs_offset].init_pend(); + LBTF(i-btf_tabs_offset).init_pend(); #ifdef BASKER_TIMER printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - UBTF[i-btf_tabs_offset].init_matrix("UBFT", + UBTF(i-btf_tabs_offset).init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 890bc8a17fca..622bdf39a0fd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -100,9 +100,9 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM[blk][0], - LL[blk][0], - LU[blk][LU_size(blk)-1]); + sfactor_nd_dom_estimate(ALM(blk)(0), + LL(blk)(0), + LU(blk)(LU_size(blk)-1)); for(Int l=0; l < tree.nlvls; l++) { @@ -124,11 +124,11 @@ namespace BaskerNS //JDB TEST PASSED U_row = my_new_row; - sfactor_nd_upper_estimate(AVM[U_col][U_row], - LU[U_col][U_row]); + sfactor_nd_upper_estimate(AVM(U_col)(U_row), + LU(U_col)(U_row)); - sfactor_nd_lower_estimate(ALM[blk][l+1], - LL[blk][l+1]); + sfactor_nd_lower_estimate(ALM(blk)(l+1), + LL(blk)(l+1)); } // end for l @@ -141,9 +141,9 @@ namespace BaskerNS Int U_col = S(lvl+1)(ppp); Int U_row = 0; - sfactor_nd_sep_estimate(ALM[U_col][U_row], - LL[U_col][U_row], - LU[U_col][LU_size(U_col)-1]); + sfactor_nd_sep_estimate(ALM(U_col)(U_row), + LL(U_col)(U_row), + LU(U_col)(LU_size(U_col)-1)); Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) @@ -167,12 +167,12 @@ namespace BaskerNS //JDB TEST PASS U_row = my_new_row; - sfactor_nd_sep_upper_estimate(AVM[U_col][U_row], - LU[U_col][U_row]); + sfactor_nd_sep_upper_estimate(AVM(U_col)(U_row), + LU(U_col)(U_row)); sfactor_nd_sep_lower_estimate( - ALM[innerblk][l-lvl], - LL[innerblk][l-lvl]); + ALM(innerblk)(l-lvl), + LL(innerblk)(l-lvl)); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp index b2fa1204cd86..b01d3ec72632 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp @@ -293,7 +293,7 @@ namespace BaskerNS for(Int b = nblks_c-1; b>= 0; b--) { //---Lower solve - BASKER_MATRIX &LC = LBTF[b]; + BASKER_MATRIX &LC = LBTF(b); #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n btf b=%ld (%d x %d), LBTF(%d)\n", (long)b, (int)LC.nrow, (int)LC.ncol, (int)b); #endif @@ -303,7 +303,7 @@ namespace BaskerNS //printVec(y,gn); - BASKER_MATRIX &UC = UBTF[b]; + BASKER_MATRIX &UC = UBTF(b); //U(C)\x -> y upper_tri_solve(UC,x,y); @@ -420,7 +420,7 @@ namespace BaskerNS for(Int b = btf_top_tabs_offset-1; b>= 0; b--) { //L(C)\x -> y - BASKER_MATRIX &LC = L_D[b]; + BASKER_MATRIX &LC = L_D(b); lower_tri_solve(LC, x, y); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after L solve (b=%d)\n",b ); fflush(stdout); @@ -429,7 +429,7 @@ namespace BaskerNS #endif //U(C)\y -> x - BASKER_MATRIX &UC = U_D[b]; + BASKER_MATRIX &UC = U_D(b); upper_tri_solve(UC, y, x); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after U solve\n" ); fflush(stdout); @@ -476,7 +476,7 @@ namespace BaskerNS //Forward solve on A for(Int b = 0; b < tree.nblks; ++b) { - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); //L\x -> y lower_tri_solve(L, x, y, scol_top); @@ -500,7 +500,7 @@ namespace BaskerNS //Update offdiag for(Int bb = 1; bb < LL_size(b); ++bb) { - BASKER_MATRIX &LD = LL[b][bb]; + BASKER_MATRIX &LD = LL(b)(bb); //x = LD*y; #ifdef BASKER_DEBUG_SOLVE_RHS char filename[200]; @@ -549,7 +549,7 @@ namespace BaskerNS #endif //U\y -> x - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); upper_tri_solve(U, y, x, scol_top); // NDE: y , x positions swapped... // seems role of x and y changed... #ifdef BASKER_DEBUG_SOLVE_RHS @@ -568,7 +568,7 @@ namespace BaskerNS #endif //y = UB*x; - BASKER_MATRIX &UB = LU[b][bb]; + BASKER_MATRIX &UB = LU(b)(bb); neg_spmv(UB, x, y, scol_top); #ifdef BASKER_DEBUG_SOLVE_RHS diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp index bfd6e2460062..f950e9bd6132 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp @@ -346,10 +346,10 @@ namespace BaskerNS // Update off-diag in the block-row before the diag solve for(int bb = LL_size(b)-1; bb > 0; bb--) { - BASKER_MATRIX &LD = LL[b][bb]; + BASKER_MATRIX &LD = LL(b)(bb); neg_spmv_perm_tr(LD, x, y, scol_top); // update y as mod. rhs, x as solution } - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); if (L.nrow != 0 && L.ncol != 0) // Avoid degenerate case e.g. empty block following nd-partitioning lower_tri_solve_tr(L, y, x, scol_top); // x and y should be equal after in M range... } @@ -373,10 +373,10 @@ namespace BaskerNS for(Int bb = 0; bb < LU_size(b)-1; bb++) { // update offdiag corresponding to the block-row - BASKER_MATRIX &UB = LU[b][bb]; + BASKER_MATRIX &UB = LU(b)(bb); neg_spmv_tr(UB, x, y, scol_top); } - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); if (U.nrow != 0 && U.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(U, x, y, scol_top); } @@ -410,7 +410,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of { for(Int b = 0; b < btf_top_tabs_offset; b++) { - BASKER_MATRIX &UC = U_D[b]; + BASKER_MATRIX &UC = U_D(b); if ( b > 0 ) spmv_BTF_tr(b, BTF_D, x, y, false); @@ -418,7 +418,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC, x, y); - BASKER_MATRIX &LC = L_D[b]; + BASKER_MATRIX &LC = L_D(b); if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC, x, y); @@ -462,7 +462,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (nblks_c > 0) { Int offset = 0; for(Int b = 0; b < nblks_c; b++) { - BASKER_MATRIX &UC = UBTF[b]; + BASKER_MATRIX &UC = UBTF(b); // Update off-diag // Update X with Y @@ -472,7 +472,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC,x,y); - BASKER_MATRIX &LC = LBTF[b]; + BASKER_MATRIX &LC = LBTF(b); if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC,x,y); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp index 995bad188542..c7f804794f67 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp @@ -148,8 +148,8 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - MATRIX &myL = LL[l][0]; - stats.Lnnz += LL[l][0].nnz; + MATRIX &myL = LL(l)(0); + stats.Lnnz += LL(l)(0).nnz; }//over all Ls return stats.Lnnz; @@ -166,10 +166,10 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - for(Int r=0; r 0 ? U_view_count(i) : 1); if (U_view_size > 0) { - MALLOC_MATRIX_1DARRAY(AVM[i], U_view_size); - MALLOC_MATRIX_1DARRAY(LU[i], U_view_size); + MALLOC_MATRIX_1DARRAY(AVM(i), U_view_size); + MALLOC_MATRIX_1DARRAY(LU(i), U_view_size); } //Malloc AL subarray // NOTE: size at least one to allow empty block Int L_view_size = (L_view_count(i) > 0 ? L_view_count(i): 1); if (L_view_size > 0) { - MALLOC_MATRIX_1DARRAY(ALM[i], L_view_size); - MALLOC_MATRIX_1DARRAY(LL[i], L_view_size); + MALLOC_MATRIX_1DARRAY(ALM(i), L_view_size); + MALLOC_MATRIX_1DARRAY(LL(i), L_view_size); } LU_size(i) = U_view_count(i); @@ -855,11 +855,11 @@ namespace BaskerNS #endif for(Int j=i; j != -flat.ncol; j=tree.treetab[j]) { - MATRIX_1DARRAY &UMtemp = AVM[j]; - MATRIX_1DARRAY &LMtemp = ALM[i]; + MATRIX_1DARRAY &UMtemp = AVM(j); + MATRIX_1DARRAY &LMtemp = ALM(i); - MATRIX_1DARRAY &LUtemp = LU[j]; - MATRIX_1DARRAY &LLtemp = LL[i]; + MATRIX_1DARRAY &LUtemp = LU(j); + MATRIX_1DARRAY &LLtemp = LL(i); #ifdef MY_DEBUG printf( " AVM(%d)(%d).set_shape(%dx%d)\n",j,U_view_count[j], tree.col_tabs[i+1]-tree.col_tabs[i],tree.col_tabs[j+1]-tree.col_tabs[j] ); @@ -1056,7 +1056,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((L_row+1 < LL_size(L_col)) && - (tree.row_tabs(r_idx+1) == ALM[L_col][L_row+1].srow)) + (tree.row_tabs(r_idx+1) == ALM(L_col)(L_row+1).srow)) { //printf( " > ALM(%d)(%d).srow = %d, row_tab(%d) = %d\n",L_col,L_row+1,ALM(L_col)(L_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); L_row++; @@ -1071,7 +1071,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((U_row+1 < LU_size(U_col)) && - (tree.row_tabs(r_idx+1) == AVM[U_col][U_row+1].srow)) + (tree.row_tabs(r_idx+1) == AVM(U_col)(U_row+1).srow)) { //printf( " + AVM(%d)(%d).srow = %d, row_tab(%d) = %d\n",U_col,U_row+1,AVM(U_col)(U_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); U_row++; @@ -1095,8 +1095,8 @@ namespace BaskerNS //Get Matrix Ref - BASKER_MATRIX &Ltemp = ALM[L_col][L_row]; - BASKER_MATRIX &Utemp = AVM[U_col][U_row]; + BASKER_MATRIX &Ltemp = ALM(L_col)(L_row); + BASKER_MATRIX &Utemp = AVM(U_col)(U_row); Int bcol = Ltemp.scol; //diag blk @@ -1162,11 +1162,11 @@ namespace BaskerNS for(Int sb = 0; sb < LL_size(b); ++sb) { //printf( " ALM(%d)(%d).clean_col()\n",b,sb ); - ALM[b][sb].clean_col(); + ALM(b)(sb).clean_col(); } for(Int sb = 0; sb < LU_size(b); ++sb) { - AVM[b][sb].clean_col(); + AVM(b)(sb).clean_col(); } }//for - over all blks diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 715ac1c13f5f..2d8322c05de2 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -327,7 +327,7 @@ namespace BaskerNS { for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = LBTF[b-btf_tabs_offset]; + BASKER_MATRIX &L = LBTF(b-btf_tabs_offset); L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -342,7 +342,7 @@ namespace BaskerNS #if defined(BASKER_SPLIT_A) for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = L_D[b]; + BASKER_MATRIX &L = L_D(b); L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -364,11 +364,11 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif - LL[b][row].clear_pend(); - LL[b][row].nnz = LL[b][row].mnnz; + LL(b)(row).clear_pend(); + LL(b)(row).nnz = LL(b)(row).mnnz; }//end over all row }//end select which thread @@ -383,13 +383,13 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif //LU(b)(LU_size(b)-1).nnz = 0; - for(Int kk = 0; kk < LU[b][LU_size(b)-1].ncol+1; kk++) + for(Int kk = 0; kk < LU(b)(LU_size(b)-1).ncol+1; kk++) { - LU[b][LU_size(b)-1].col_ptr(kk) = 0; + LU(b)(LU_size(b)-1).col_ptr(kk) = 0; } /* @@ -399,7 +399,7 @@ namespace BaskerNS LU(b)(LU_size(b)-1).mnnz); */ - LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; + LU(b)(LU_size(b)-1).nnz = LU(b)(LU_size(b)-1).mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { Int U_col = S(l)(kid); @@ -416,12 +416,12 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif - for(Int kk = 0; kk < LU[U_col][U_row].ncol+1; kk++) + for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) { - LU[U_col][U_row].col_ptr(kk) = 0; + LU(U_col)(U_row).col_ptr(kk) = 0; } /* printf("flipU (%d,%d) %d %d \n", @@ -430,7 +430,7 @@ namespace BaskerNS LU(U_col)(U_row).mnnz); */ - LU[U_col][U_row].nnz = LU[U_col][U_row].mnnz; + LU(U_col)(U_row).nnz = LU(U_col)(U_row).mnnz; //LU(U_col)(U_row).nnz = 0; }//over inner lvls @@ -466,19 +466,19 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif #ifdef BASKER_TIMER timer_init_matrixL.reset(); - printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL[b][row].nnz, (int)LL[b][row].mnnz); fflush(stdout); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); fflush(stdout); #endif - LL[b][row].init_matrix("Loffdig", - LL[b][row].srow, - LL[b][row].nrow, - LL[b][row].scol, - LL[b][row].ncol, - LL[b][row].nnz); + LL(b)(row).init_matrix("Loffdig", + LL(b)(row).srow, + LL(b)(row).nrow, + LL(b)(row).scol, + LL(b)(row).ncol, + LL(b)(row).nnz); #ifdef BASKER_TIMER printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); @@ -487,20 +487,20 @@ namespace BaskerNS //Fix when this all happens in the future if(Options.incomplete == BASKER_TRUE) { - LL[b][row].init_inc_lvl(); + LL(b)(row).init_inc_lvl(); } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); - printf( " ++ zero out (%d) ++\n",int(LL[b][row].col_ptr.extent(0)) ); fflush(stdout); + printf( " ++ zero out (%d) ++\n",int(LL(b)(row).col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - LL[b][row].init_ptr(); + LL(b)(row).init_ptr(); //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER - printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL[b][row].ncol ); fflush(stdout); + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - LL[b][row].init_pend(); + LL(b)(row).init_pend(); #ifdef BASKER_TIMER printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); #endif @@ -529,23 +529,23 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif #ifdef BASKER_TIMER printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, - (int)LU[b][LU_size(b)-1].nrow,(int)LU[b][LU_size(b)-1].ncol,(int)LU[b][LU_size(b)-1].nnz, (int)LU[b][LU_size(b)-1].mnnz, - (int)LU[b][LU_size(b)-1].srow,(int)LU[b][LU_size(b)-1].scol); + (int)LU(b)(LU_size(b)-1).nrow,(int)LU(b)(LU_size(b)-1).ncol,(int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz, + (int)LU(b)(LU_size(b)-1).srow,(int)LU(b)(LU_size(b)-1).scol); #endif - LU[b][LU_size(b)-1].init_matrix("Udiag", - LU[b][LU_size(b)-1].srow, - LU[b][LU_size(b)-1].nrow, - LU[b][LU_size(b)-1].scol, - LU[b][LU_size(b)-1].ncol, - LU[b][LU_size(b)-1].nnz); + LU(b)(LU_size(b)-1).init_matrix("Udiag", + LU(b)(LU_size(b)-1).srow, + LU(b)(LU_size(b)-1).nrow, + LU(b)(LU_size(b)-1).scol, + LU(b)(LU_size(b)-1).ncol, + LU(b)(LU_size(b)-1).nnz); //LU(b)(LU_size(b)-1).fill(); - LU[b][LU_size(b)-1].init_ptr(); + LU(b)(LU_size(b)-1).init_ptr(); //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) @@ -583,29 +583,29 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif #ifdef BASKER_TIMER printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, - (int)LU[U_col][U_row].nrow,(int)LU[U_col][U_row].ncol, - (int)LU[U_col][U_row].nnz, (int)LU[U_col][U_row].mnnz, - (int)LU[U_col][U_row].srow,(int)LU[U_col][U_row].scol); + (int)LU(U_col)(U_row).nrow,(int)LU(U_col)(U_row).ncol, + (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz, + (int)LU(U_col)(U_row).srow,(int)LU(U_col)(U_row).scol); #endif - LU[U_col][U_row].init_matrix("Uoffdiag", - LU[U_col][U_row].srow, - LU[U_col][U_row].nrow, - LU[U_col][U_row].scol, - LU[U_col][U_row].ncol, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).init_matrix("Uoffdiag", + LU(U_col)(U_row).srow, + LU(U_col)(U_row).nrow, + LU(U_col)(U_row).scol, + LU(U_col)(U_row).ncol, + LU(U_col)(U_row).nnz); //LU(U_col)(U_row).fill(); - LU[U_col][U_row].init_ptr(); + LU(U_col)(U_row).init_ptr(); //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { - LU[U_col][U_row].init_inc_lvl(); + LU(U_col)(U_row).init_inc_lvl(); } }//over inner lvls @@ -646,9 +646,9 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("ALM Factor Init: %d %d , kid: %d, nnz: %d nrow: %d ncol: %d \n", - b, row, kid, ALM[b][row].nnz, - ALM[b][row].nrow, - ALM[b][row].ncol); + b, row, kid, ALM(b)(row).nnz, + ALM(b)(row).nrow, + ALM(b)(row).ncol); #endif /*if (kid == 1) @@ -663,7 +663,7 @@ namespace BaskerNS printf("ALM(%d,%d: %dx%d) alloc with A: kid=%d btf=%d\n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid, Options.btf); #endif - ALM[b][row].convert2D(A, alloc, kid); + ALM(b)(row).convert2D(A, alloc, kid); } else { @@ -672,7 +672,7 @@ namespace BaskerNS printf("ALM(%d,%d, %dx%d) alloc (btf) with BTF_A: kid=%d \n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid); #endif - ALM[b][row].convert2D(BTF_A, alloc, kid); + ALM(b)(row).convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < ALM(b)(row).ncol; j++) { @@ -697,9 +697,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", b, LU_size(b)-1, kid, - AVM[b][LU_size(b)-1].nnz, - AVM[b][LU_size(b)-1].nrow, - AVM[b][LU_size(b)-1].ncol); + AVM(b)(LU_size(b)-1).nnz, + AVM(b)(LU_size(b)-1).nrow, + AVM(b)(LU_size(b)-1).ncol); #endif /*if (kid == 1) { @@ -708,13 +708,13 @@ namespace BaskerNS }*/ if(Options.btf == BASKER_FALSE) { - AVM[b][LU_size(b)-1].convert2D(A, alloc, kid); + AVM(b)(LU_size(b)-1).convert2D(A, alloc, kid); } else { //printf("Using BTF AU\n"); //printf(" > kid=%d: convert2D AVM(%d,%d)\n", kid, b, LU_size(b)-1); - AVM[b][LU_size(b)-1].convert2D(BTF_A, alloc, kid); + AVM(b)(LU_size(b)-1).convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < AVM(b)(LU_size(b)-1).ncol; j++) { @@ -771,9 +771,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init AUM: %d %d lvl: %d l: %d kid: %d nnz: %d nrow: %d ncol: %d \n", U_col, U_row, lvl, l, kid, - AVM[U_col][U_row].nnz, - AVM[U_col][U_row].nrow, - AVM[U_col][U_row].ncol); + AVM(U_col)(U_row).nnz, + AVM(U_col)(U_row).nrow, + AVM(U_col)(U_row).ncol); #endif #if 0 @@ -793,7 +793,7 @@ namespace BaskerNS //printf("2nd convert AVM: %d %d size:%d kid: %d\n", // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); - AVM[U_col][U_row].convert2D(BTF_A, alloc, kid); + AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); } @@ -828,17 +828,17 @@ namespace BaskerNS for(Int l = 0; l < LL_size(b); l++) { //defining here - LL[b][l].iws_size = LL[b][l].nrow; + LL(b)(l).iws_size = LL(b)(l).nrow; //This can be made smaller, see notes in Sfactor_old - LL[b][l].iws_mult = 5; - LL[b][l].ews_size = LL[b][l].nrow; + LL(b)(l).iws_mult = 5; + LL(b)(l).ews_size = LL(b)(l).nrow; //This can be made smaller, see notes in sfactor_old - LL[b][l].ews_mult = 2; + LL(b)(l).ews_mult = 2; - Int iws_size = LL[b][l].iws_size; - Int iws_mult = LL[b][l].iws_mult; - Int ews_size = LL[b][l].ews_size; - Int ews_mult = LL[b][l].ews_mult; + Int iws_size = LL(b)(l).iws_size; + Int iws_mult = LL(b)(l).iws_mult; + Int ews_size = LL(b)(l).ews_size; + Int ews_mult = LL(b)(l).ews_mult; if(iws_size > max_sep_size) { @@ -851,10 +851,10 @@ namespace BaskerNS } BASKER_ASSERT((iws_size*iws_mult)>0, "util iws"); - MALLOC_INT_1DARRAY(LL[b][l].iws, iws_size*iws_mult); + MALLOC_INT_1DARRAY(LL(b)(l).iws, iws_size*iws_mult); for(Int i=0; i 0) { BASKER_ASSERT((ews_size*ews_mult)>0, "util ews"); - MALLOC_ENTRY_1DARRAY(LL[b][l].ews, ews_size*ews_mult); + MALLOC_ENTRY_1DARRAY(LL(b)(l).ews, ews_size*ews_mult); for(Int i=0; i Date: Fri, 25 Oct 2024 20:42:26 -0600 Subject: [PATCH 12/20] ShyLU - Basker : cleanups Signed-off-by: iyamazaki --- packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp | 1 - packages/shylu/shylu_node/basker/src/shylubasker_types.hpp | 4 ---- 2 files changed, 5 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index 5b6ae49e5e14..784df704eb59 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -1178,7 +1178,6 @@ namespace BaskerNS BASKER_INLINE int Basker::sfactor_copy() { - printf( " .. sfactor_copy ..\n" ); fflush(stdout); //Reorder A; //Match order if(match_flag == BASKER_TRUE) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 193ecb11e24a..f57447b10906 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -172,7 +172,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -182,7 +181,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - /*a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1);*/ \ Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -237,7 +235,6 @@ enum BASKER_INCOMPLETE_CODE BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ - Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -247,7 +244,6 @@ enum BASKER_INCOMPLETE_CODE BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ - Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ From 1c01d54ee5d624f132f386fee433d7f8b61aa969 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 29 Oct 2024 16:02:57 -0600 Subject: [PATCH 13/20] Tpetra: Refactor FEWhichActive and FillState Signed-off-by: Christian Glusa --- .../tpetra/core/src/Tpetra_ConfigDefs.hpp | 18 ++++++ .../core/src/Tpetra_FECrsGraph_decl.hpp | 17 +---- .../tpetra/core/src/Tpetra_FECrsGraph_def.hpp | 38 +++++------ .../core/src/Tpetra_FECrsMatrix_decl.hpp | 19 ++---- .../core/src/Tpetra_FECrsMatrix_def.hpp | 64 +++++++++---------- .../core/src/Tpetra_FEMultiVector_decl.hpp | 17 +---- .../core/src/Tpetra_FEMultiVector_def.hpp | 34 +++++----- 7 files changed, 95 insertions(+), 112 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp b/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp index 64269d97d7f9..ec909ed0aef9 100644 --- a/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp +++ b/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp @@ -198,6 +198,24 @@ namespace Tpetra { Backward, Symmetric }; + + // FE* enums + namespace FE { + + // Enum for activity + enum WhichActive + { + ACTIVE_OWNED, + ACTIVE_OWNED_PLUS_SHARED + }; + + enum class FillState + { + open, // matrix is "open". Values can freely summed in to and replaced + modify, // matrix is open for modification. *local* values can be replaced + closed + }; + } } // For backwards compatibility diff --git a/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp index d14f6b3da408..adb8c325d2f8 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp @@ -13,6 +13,7 @@ /// \file Tpetra_FECrsGraph_decl.hpp /// \brief Declaration of the Tpetra::FECrsGraph class +#include "Tpetra_ConfigDefs.hpp" #include "Tpetra_FECrsGraph_fwd.hpp" #include "Tpetra_CrsGraph_decl.hpp" @@ -548,25 +549,13 @@ namespace Tpetra { // template // Teuchos::RCP makeOwnedColMap (ViewType ownedGraphIndices); - // Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED, - FE_ACTIVE_OWNED_PLUS_SHARED - }; - - enum class FillState - { - open, // matrix is "open". Values can freely inserted - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP fillState_; // This is whichever graph isn't currently active Teuchos::RCP > inactiveCrsGraph_; // This is in RCP to make shallow copies of the FECrsGraph work correctly - Teuchos::RCP activeCrsGraph_; + Teuchos::RCP activeCrsGraph_; // The importer between the rowmaps of the two graphs Teuchos::RCP ownedRowsImporter_; diff --git a/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp index c8eb4ab9dfb9..6ef09873bea7 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp @@ -182,8 +182,8 @@ setup(const Teuchos::RCP & ownedRowMap, if(ownedPlusSharedColMap.is_null()) this->allocateIndices(GlobalIndices); else this->allocateIndices(LocalIndices); - activeCrsGraph_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED_PLUS_SHARED)); - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + activeCrsGraph_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED_PLUS_SHARED)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); // Use a very strong map equivalence check bool maps_are_the_same = ownedRowMap->isSameAs(*ownedPlusSharedRowMap); @@ -221,7 +221,7 @@ setup(const Teuchos::RCP & ownedRowMap, template void FECrsGraph::doOwnedPlusSharedToOwned(const CombineMode CM) { const char tfecfFuncName[] = "FECrsGraph::doOwnedPlusSharedToOwned(CombineMode): "; - if(!ownedRowsImporter_.is_null() && *activeCrsGraph_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(!ownedRowsImporter_.is_null() && *activeCrsGraph_ == FE::ACTIVE_OWNED_PLUS_SHARED) { Teuchos::RCP ownedRowMap = ownedRowsImporter_->getSourceMap(); // Do a self-export in "restricted mode" @@ -296,10 +296,10 @@ void FECrsGraph::doOwnedToOwnedPlusShared(con template void FECrsGraph::switchActiveCrsGraph() { - if(*activeCrsGraph_ == FE_ACTIVE_OWNED_PLUS_SHARED) - *activeCrsGraph_ = FE_ACTIVE_OWNED; + if(*activeCrsGraph_ == FE::ACTIVE_OWNED_PLUS_SHARED) + *activeCrsGraph_ = FE::ACTIVE_OWNED; else - *activeCrsGraph_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeCrsGraph_ = FE::ACTIVE_OWNED_PLUS_SHARED; if(inactiveCrsGraph_.is_null()) return; @@ -318,10 +318,10 @@ void FECrsGraph::endFill( doing finite differences, things are easy --- just call fillComplete(). If, we are in the parallel FE case, then: - Precondition: FE_ACTIVE_OWNED_PLUS_SHARED mode + Precondition: FE::ACTIVE_OWNED_PLUS_SHARED mode Postconditions: - 1) FE_ACTIVE_OWNED mode + 1) FE::ACTIVE_OWNED mode 2) The OWNED graph has been fillCompleted with an Aztec-compatible column map 3) rowptr & (local) colinds are aliased between the two graphs 4) The OWNED_PLUS_SHARED graph has been fillCompleted with a column map whose first chunk @@ -333,7 +333,7 @@ void FECrsGraph::endFill( */ // Precondition const char tfecfFuncName[] = "FECrsGraph::endFill(domainMap, rangeMap): "; - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ != FE_ACTIVE_OWNED_PLUS_SHARED,std::runtime_error, "must be in owned+shared mode."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ != FE::ACTIVE_OWNED_PLUS_SHARED,std::runtime_error, "must be in owned+shared mode."); if(ownedRowsImporter_.is_null()) { // The easy case: One graph switchActiveCrsGraph(); @@ -365,7 +365,7 @@ void FECrsGraph::beginFill() { // Unlike FECrsMatrix and FEMultiVector, we do not allow you to call beginFill() after calling endFill() // So we throw an exception if you're in owned mode - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ == FE_ACTIVE_OWNED,std::runtime_error, "can only be called once."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ == FE::ACTIVE_OWNED,std::runtime_error, "can only be called once."); } @@ -373,11 +373,11 @@ template void FECrsGraph::beginAssembly() { const char tfecfFuncName[] = "FECrsGraph::beginAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginAssembly, matrix is not in a closed state" ); - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } @@ -385,11 +385,11 @@ template void FECrsGraph::endAssembly() { const char tfecfFuncName[] = "FECrsGraph::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot endAssembly, matrix is not open to fill but is closed." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } @@ -400,11 +400,11 @@ void FECrsGraph::endAssembly( { const char tfecfFuncName[] = "FECrsGraph::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot endAssembly, matrix is not open to fill but is closed." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(domainMap, rangeMap); } @@ -428,7 +428,7 @@ FECrsGraph::insertGlobalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertGlobalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); @@ -445,7 +445,7 @@ FECrsGraph::insertGlobalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertGlobalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); @@ -461,7 +461,7 @@ FECrsGraph::insertLocalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertLocalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); diff --git a/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp index 73d9db1d1b1c..2bf93ae03896 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp @@ -14,6 +14,7 @@ /// \file Tpetra_FECrsMatrix_decl.hpp /// \brief Declaration of the Tpetra::FECrsMatrix class +#include "Tpetra_ConfigDefs.hpp" #include "Tpetra_CrsMatrix_decl.hpp" #include "Tpetra_FECrsGraph.hpp" @@ -292,12 +293,6 @@ class FECrsMatrix : //@} private: - // Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED, - FE_ACTIVE_OWNED_PLUS_SHARED - }; // The FECrsGraph from construction time Teuchos::RCP > feGraph_; @@ -305,15 +300,9 @@ class FECrsMatrix : // This is whichever multivector isn't currently active Teuchos::RCP > inactiveCrsMatrix_; // This is in RCP to make shallow copies of the FECrsMatrix work correctly - Teuchos::RCP activeCrsMatrix_; - - enum class FillState - { - open, // matrix is "open". Values can freely summed in to and replaced - modify, // matrix is open for modification. *local* values can be replaced - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP activeCrsMatrix_; + + Teuchos::RCP fillState_; }; // end class FECrsMatrix diff --git a/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp index 012c8ec6b6a5..892db94b2fa7 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp @@ -36,8 +36,8 @@ FECrsMatrix(const Teuchos::RCP& graph, "fillComplete. In that case, you must call fillComplete on the graph " "again."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - ( *graph->activeCrsGraph_!= fe_crs_graph_type::FE_ACTIVE_OWNED,std::runtime_error, - "Input graph must be in FE_ACTIVE_OWNED mode when this constructor is called."); + ( *graph->activeCrsGraph_!= FE::ACTIVE_OWNED,std::runtime_error, + "Input graph must be in FE::ACTIVE_OWNED mode when this constructor is called."); bool start_owned = false; if (! params.is_null ()) { @@ -46,9 +46,9 @@ FECrsMatrix(const Teuchos::RCP& graph, } } if(start_owned) { - activeCrsMatrix_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED)); + activeCrsMatrix_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED)); } else { - activeCrsMatrix_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED_PLUS_SHARED)); + activeCrsMatrix_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED_PLUS_SHARED)); } // Make an "inactive" matrix, if we need to @@ -58,14 +58,14 @@ FECrsMatrix(const Teuchos::RCP& graph, inactiveCrsMatrix_ = Teuchos::rcp(new crs_matrix_type(*this,graph)); } - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); } template void FECrsMatrix::doOwnedPlusSharedToOwned(const CombineMode CM) { - if(!inactiveCrsMatrix_.is_null() && *activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(!inactiveCrsMatrix_.is_null() && *activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) { // Do a self-export in "restricted mode" this->doExport(*this,*feGraph_->ownedRowsImporter_,CM,true); inactiveCrsMatrix_->fillComplete(); @@ -81,10 +81,10 @@ void FECrsMatrix::doOwnedToOwnedPlusS template void FECrsMatrix::switchActiveCrsMatrix() { - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) - *activeCrsMatrix_ = FE_ACTIVE_OWNED; + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) + *activeCrsMatrix_ = FE::ACTIVE_OWNED; else - *activeCrsMatrix_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeCrsMatrix_ = FE::ACTIVE_OWNED_PLUS_SHARED; if(inactiveCrsMatrix_.is_null()) return; @@ -95,7 +95,7 @@ void FECrsMatrix::switchActiveCrsMatr template void FECrsMatrix::endFill() { - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) { doOwnedPlusSharedToOwned(Tpetra::ADD); switchActiveCrsMatrix(); } @@ -107,7 +107,7 @@ template void FECrsMatrix::beginFill() { // Note: This does not throw an error since the on construction, the FECRS is in overlap mode. Ergo, calling beginFill(), // like one should expect to do in a rational universe, should not cause an error. - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED) { + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED) { this->resumeFill(); switchActiveCrsMatrix(); } @@ -117,59 +117,59 @@ void FECrsMatrix::beginFill() { template void FECrsMatrix::beginAssembly() { const char tfecfFuncName[] = "FECrsMatrix::beginAssembly: "; - if (*fillState_ != FillState::closed) + if (*fillState_ != FE::FillState::closed) { std::ostringstream errmsg; errmsg << "Cannot begin assembly, matrix is not in a closed state " << "but is currently open for " - << (*fillState_ == FillState::open ? "assembly" : "modification"); + << (*fillState_ == FE::FillState::open ? "assembly" : "modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } template void FECrsMatrix::endAssembly() { const char tfecfFuncName[] = "FECrsMatrix::endAssembly: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot end assembly, matrix is not open for assembly " << "but is currently " - << (*fillState_ == FillState::closed ? "closed" : "open for modification"); + << (*fillState_ == FE::FillState::closed ? "closed" : "open for modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } template void FECrsMatrix::beginModify() { const char tfecfFuncName[] = "FECrsMatrix::beginModify: "; - if (*fillState_ != FillState::closed) + if (*fillState_ != FE::FillState::closed) { std::ostringstream errmsg; errmsg << "Cannot begin modifying, matrix is not in a closed state " << "but is currently open for " - << (*fillState_ == FillState::open ? "assembly" : "modification"); + << (*fillState_ == FE::FillState::open ? "assembly" : "modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::modify; + *fillState_ = FE::FillState::modify; this->resumeFill(); } template void FECrsMatrix::endModify() { const char tfecfFuncName[] = "FECrsMatrix::endModify: "; - if (*fillState_ != FillState::modify) + if (*fillState_ != FE::FillState::modify) { std::ostringstream errmsg; errmsg << "Cannot end modifying, matrix is not open to modify but is currently " - << (*fillState_ == FillState::open ? "open for assembly" : "closed"); + << (*fillState_ == FE::FillState::open ? "open for assembly" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->fillComplete(); } @@ -184,12 +184,12 @@ FECrsMatrix::replaceGlobalValuesImpl( const LocalOrdinal numElts) { const char tfecfFuncName[] = "FECrsMatrix::replaceGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot replace global values, matrix is not open for assembly " << "but is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::replaceGlobalValuesImpl(rowVals, graph, rowInfo, inds, newVals, numElts); @@ -206,7 +206,7 @@ FECrsMatrix::replaceLocalValuesImpl( const LocalOrdinal numElts) { const char tfecfFuncName[] = "FECrsMatrix::replaceLocalValues: "; - if (*fillState_ != FillState::open && *fillState_ != FillState::modify) + if (*fillState_ != FE::FillState::open && *fillState_ != FE::FillState::modify) { std::ostringstream errmsg; errmsg << "Cannot replace local values, matrix is not open to fill/modify. " @@ -228,12 +228,12 @@ FECrsMatrix::sumIntoGlobalValuesImpl( const bool atomic) { const char tfecfFuncName[] = "FECrsMatrix::sumIntoGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot sum in to global values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::sumIntoGlobalValuesImpl( @@ -253,12 +253,12 @@ FECrsMatrix::sumIntoLocalValuesImpl( const bool atomic) { const char tfecfFuncName[] = "FECrsMatrix::sumIntoLocalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot sum in to local values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::sumIntoLocalValuesImpl( @@ -276,12 +276,12 @@ FECrsMatrix::insertGlobalValuesImpl( const size_t numInputEnt) { const char tfecfFuncName[] = "FECrsMatrix::insertGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot insert global values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::insertGlobalValuesImpl(graph, rowInfo, gblColInds, vals, numInputEnt); diff --git a/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp index 74ff5cbaadb2..c505c54d7f52 100644 --- a/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp @@ -174,20 +174,7 @@ namespace Tpetra { /// you call this method. void replaceMap (const Teuchos::RCP& map); - //! Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED_PLUS_SHARED, - FE_ACTIVE_OWNED - }; - - enum class FillState - { - open, // matrix is "open". Values can freely summed in to and replaced - modify, // matrix is open for modification. *local* values can be replaced - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP fillState_; //! Whichever MultiVector is not currently active. Teuchos::RCP inactiveMultiVector_; @@ -197,7 +184,7 @@ namespace Tpetra { /// /// This is an RCP in order to make shallow copies of the /// FEMultiVector work correctly. - Teuchos::RCP activeMultiVector_; + Teuchos::RCP activeMultiVector_; //! Import object used for communication between the two MultiVectors. Teuchos::RCP> importer_; diff --git a/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp index 68e20b0517a6..a217f64711d5 100644 --- a/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp @@ -29,7 +29,7 @@ FEMultiVector (const Teuchos::RCP& map, const bool zeroOut) : base_type (importer.is_null () ? map : importer->getTargetMap (), numVecs, zeroOut), - activeMultiVector_ (Teuchos::rcp (new FEWhichActive (FE_ACTIVE_OWNED_PLUS_SHARED))), + activeMultiVector_ (Teuchos::rcp (new FE::WhichActive (FE::ACTIVE_OWNED_PLUS_SHARED))), importer_ (importer) { const char tfecfFuncName[] = "FEMultiVector constructor: "; @@ -60,7 +60,7 @@ FEMultiVector (const Teuchos::RCP& map, inactiveMultiVector_ = Teuchos::rcp (new base_type (*this, importer_->getSourceMap(), 0)); } - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); } template @@ -70,7 +70,7 @@ beginFill () { // The FEMultiVector is in owned+shared mode on construction, so we // do not throw in that case. - if (*activeMultiVector_ == FE_ACTIVE_OWNED) { + if (*activeMultiVector_ == FE::ACTIVE_OWNED) { switchActiveMultiVector (); } } @@ -82,7 +82,7 @@ endFill () { const char tfecfFuncName[] = "endFill: "; - if (*activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if (*activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { doOwnedPlusSharedToOwned (Tpetra::ADD); switchActiveMultiVector (); } @@ -97,11 +97,11 @@ template void FEMultiVector::beginAssembly() { const char tfecfFuncName[] = "FEMultiVector::beginAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginAssembly, matrix is not in a closed state" ); - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } @@ -109,11 +109,11 @@ template void FEMultiVector::endAssembly() { const char tfecfFuncName[] = "FEMultiVector::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::runtime_error, "Cannot endAssembly, matrix is not open to fill." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } @@ -121,22 +121,22 @@ template void FEMultiVector::beginModify() { const char tfecfFuncName[] = "FEMultiVector::beginModify: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginModify, matrix is not in a closed state" ); - *fillState_ = FillState::modify; + *fillState_ = FE::FillState::modify; } template void FEMultiVector::endModify() { const char tfecfFuncName[] = "FEMultiVector::endModify: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::modify, + *fillState_ != FE::FillState::modify, std::runtime_error, "Cannot endModify, matrix is not open to modify." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; } template @@ -164,7 +164,7 @@ FEMultiVector:: doOwnedPlusSharedToOwned (const CombineMode CM) { if (! importer_.is_null () && - *activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + *activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { inactiveMultiVector_->doExport (*this, *importer_, CM); } } @@ -175,7 +175,7 @@ FEMultiVector:: doOwnedToOwnedPlusShared (const CombineMode CM) { if (! importer_.is_null () && - *activeMultiVector_ == FE_ACTIVE_OWNED) { + *activeMultiVector_ == FE::ACTIVE_OWNED) { inactiveMultiVector_->doImport (*this, *importer_, CM); } } @@ -185,11 +185,11 @@ void FEMultiVector:: switchActiveMultiVector () { - if (*activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { - *activeMultiVector_ = FE_ACTIVE_OWNED; + if (*activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { + *activeMultiVector_ = FE::ACTIVE_OWNED; } else { - *activeMultiVector_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeMultiVector_ = FE::ACTIVE_OWNED_PLUS_SHARED; } if (importer_.is_null ()) { From 8432626143178985ad85bbc7111cbe8c985ffc8c Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 29 Oct 2024 16:03:33 -0600 Subject: [PATCH 14/20] PyTrilinos2: Expose Tpetra::FE* Signed-off-by: Christian Glusa --- packages/PyTrilinos2/CMakeLists.txt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/PyTrilinos2/CMakeLists.txt b/packages/PyTrilinos2/CMakeLists.txt index d912b7efe078..3813a95d5cf0 100644 --- a/packages/PyTrilinos2/CMakeLists.txt +++ b/packages/PyTrilinos2/CMakeLists.txt @@ -29,7 +29,7 @@ PYTRILINOS2_CMAKE_ERROR TRIBITS_ADD_OPTION_AND_DEFINE(PyTrilinos2_BINDER_VERBOSE PYTRILINOS2_B_VERBOSE "Increase the verbosity of binder." - OFF ) + OFF ) SET(PyTrilinos2_BINDER_NUM_FILES "100" CACHE STRING "Maxinum number of generated files by binder.") @@ -184,7 +184,7 @@ FOREACH(line IN LISTS eti_files_without_dir) ENDFOREACH(line) file(WRITE ${all_ETI_files_list} ${CONTENTS}) -SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector") +SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector;Tpetra_FEMultiVector;Tpetra_FECrsMatrix") SET(CONTENTS "") FOREACH(line IN LISTS ETI_classes) SET(CONTENTS "${CONTENTS}${line}\n") @@ -229,7 +229,7 @@ IF(PYTRILINOS2_B_VERBOSE) ENDIF() IF(PYTRILINOS2_SUPPRESS_ERRORS) list(APPEND BINDER_OPTIONS --suppress-errors) -ENDIF() +ENDIF() list(APPEND BINDER_OPTIONS --config ${CMAKE_CURRENT_SOURCE_DIR}/scripts/PyTrilinos2_config.cfg) list(APPEND BINDER_OPTIONS --) IF(TPL_ENABLE_CUDA) @@ -241,6 +241,14 @@ if (NOT(MPI_BASE_DIR STREQUAL "")) list(APPEND BINDER_OPTIONS -I${MPI_BASE_DIR}/include) ENDIF() list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/mdspan) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/View/MDSpan) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p0009_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p1684_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2389_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2630_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2642_bits) list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/src) list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_SOURCE_DIR}/src) IF(NOT DEFINED PyTrilinos2_BINDER_GCC_TOOLCHAIN) From f07a54fbacbf35d6d7954fba0d7611c47c112761 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Tue, 29 Oct 2024 17:18:12 -0600 Subject: [PATCH 15/20] Fix spack.yml syntax Signed-off-by: Anderson Chauphan --- .github/workflows/spack.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index c722c1287ed1..59976c1d9b3e 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -4,9 +4,9 @@ on: types: - opened - synchronize - branches: - - master - - develop + branches: + - master + - develop workflow_dispatch: # Cancels any in progress 'workflow' associated with this PR From 6ed29e6bdb7d3854d8175c3773e7f69f1360952e Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Tue, 29 Oct 2024 17:18:36 -0600 Subject: [PATCH 16/20] Fix AT2.yml syntax Signed-off-by: Anderson Chauphan --- .github/workflows/AT2.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 01937667d612..b232051eddf2 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -6,8 +6,8 @@ on: - opened - synchronize branches: - - master - - develop + - master + - develop workflow_dispatch: # Cancels any in progress 'workflows' associated with this PR From 16ed3aba8b676f0eb73c7f3f4761ecf67c7145d7 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 29 Oct 2024 19:37:06 -0600 Subject: [PATCH 17/20] Teuchos: Delete ConstNonconstObjectContainer::count method Signed-off-by: Christian Glusa --- .../teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp b/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp index fae32b32a9f2..a882494097e3 100644 --- a/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp +++ b/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp @@ -328,9 +328,6 @@ class ConstNonconstObjectContainer { /** \brief Perform an implicit conversion to an RCP. */ operator RCP() const { return getConstObj(); } - /** \brief Return the internal count. */ - int count() const - { return constObj_.count(); } private: RCP constObj_; From 0acffe8caab7916cf1e688bfe636db0283a43af7 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 30 Oct 2024 09:48:01 -0600 Subject: [PATCH 18/20] tpetra: replace use of impl_dualview_is_single_device replace use of Kokkos impl_* routine, preemptive change in case internal impl routines become private members Signed-off-by: Nathan Ellingwood --- packages/tpetra/core/src/Tpetra_MultiVector_def.hpp | 2 +- .../core/test/ImportExport2/ImportExport2_UnitTests.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 068927e39ed2..9fafc1502c56 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -1795,7 +1795,7 @@ void MultiVector::copyAndPermute( // - CombineMode needs to be INSERT. // - The number of vectors needs to be 1, otherwise we need to // reorder the received data. - if ((dual_view_type::impl_dualview_is_single_device::value || + if ((std::is_same_v || (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) || (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) && areRemoteLIDsContiguous && diff --git a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp index 1482f3132e3e..728df1dcd5b0 100644 --- a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp +++ b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp @@ -730,7 +730,7 @@ namespace { // MV::imports_ and MV::view_ have the same memory space, the // imports_ view is aliased to the data view of the target MV. if ((myImageID == collectRank) && (myImageID == 0)) { - if (mv_type::dual_view_type::impl_dualview_is_single_device::value) + if (std::is_same_v) TEUCHOS_ASSERT(tgt_mv->importsAreAliased()); // else { // We do not know if copyAndPermute was run on host or device. @@ -800,7 +800,7 @@ namespace { // MV::imports_ and MV::view_ have the same memory space, the // imports_ view is aliased to the data view of the target MV. if ((myImageID == collectRank) && (myImageID == 0)) { - if (mv_type::dual_view_type::impl_dualview_is_single_device::value) + if (std::is_same_v) TEUCHOS_ASSERT(tgt_mv->importsAreAliased()); // else { // We do not know if copyAndPermute was run on host or device. From 6555e68d6d82188baaa9cd37fa9751fa831b9332 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 30 Oct 2024 15:16:39 -0600 Subject: [PATCH 19/20] shylubasker: remove unused code resolve compilation errors with printRHS and printSOL Signed-off-by: Nathan Ellingwood --- .../basker/src/shylubasker_decl.hpp | 2 - .../shylu_node/basker/src/shylubasker_def.hpp | 4 -- .../basker/src/shylubasker_thread.hpp | 2 +- .../basker/src/shylubasker_util.hpp | 55 ------------------- 4 files changed, 1 insertion(+), 62 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp index f9b33e325bd7..09e3f6f98382 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp @@ -1160,8 +1160,6 @@ namespace BaskerNS void printMTX(std::string fname, BASKER_MATRIX &M); void printMTX(std::string fname, BASKER_MATRIX &M, BASKER_BOOL off); void readMTX(std::string fname, BASKER_MATRIX &M); - int printRHS(); - int printSOL(); void printTree(); BASKER_INLINE diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp index c7b9d66311ab..35d8588b0bd9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp @@ -2307,10 +2307,6 @@ namespace BaskerNS printU(); printUMTX(); std::cout << "U printed" << std::endl; - //printRHS(); - std::cout << "RHS printed" << std::endl; - //printSOL(); - std::cout << "SOL printed" << std::endl; //printTree(); std::cout << "Tree printed" << std::endl; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp index ebce20c9875f..6e4d1554c754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp @@ -272,7 +272,7 @@ namespace BaskerNS BASKER_INLINE void atomic_barrier_fanout(volatile Int &value, const Int l_size) { - Kokkos::atomic_inc(&(value)) + Kokkos::atomic_inc(&(value)); while(value < l_size) { BASKER_NO_OP; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 2d8322c05de2..455b76004a98 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -1472,61 +1472,6 @@ namespace BaskerNS }//end readMTX() - //Print out RHS RHS.txt - template - int Basker::printRHS() - { - if(solve_flag == false) - {return -1;} - - FILE *fp; - fp = fopen("RHS.txt", "w"); - - //over each row - for(Int r = 0; r < A.nrow; r++) - { - //over each column NOTE: come back to - //for(Int k = 0; k < rhs.size(); k++) - for(Int k = 0; k < 1; k++) - { - //fprintf(fp, "%ld %ld %f, ", (long)r, (long)gperm[r], rhs[k][r]); - fprintf(fp, "%ld %ld %.16e, ", (long)r, (long)gperm[r], rhs[k][r]); - }//end over each column - fprintf(fp, "\n"); - }//end over each row - - fclose(fp); - - return 0; - }//end printRHS() - - //Print solution SOL.txt - template - int Basker::printSOL() - { - if(solve_flag == false) - {return -1;} - - FILE *fp; - fp = fopen("SOL.txt", "w"); - - //over each row - for(Int r = 0; r < A.nrow; r++) - { - //over each column Note: come back to - //for(Int k = 0; k < rhs.size(); k++) - for(Int k = 0 ; k < 1; k++) - { - fprintf(fp, "%ld %ld %f, ", (long)r, (long)gperm[r], sol[k][r]); - }//end over each column - fprintf(fp, "\n"); - }//end over each row - - fclose(fp); - - return 0; - }//end printSOL() - //Prints the given tree into a file to analyze template void Basker::printTree() From e4752f04f5c31c4382601ec9796f242335bcb74c Mon Sep 17 00:00:00 2001 From: mperego Date: Fri, 1 Nov 2024 08:33:16 -0600 Subject: [PATCH 20/20] Intrepid2: Implementation of team-level Basis::getValues (#13437) - Implemented team-level getValues for classic Lagrangian basis functions. - Modified/added tests to compare the team-level getValues with host getValues - Modified impelementation of JacobiPolynomial to reduce FAD temporaries Signed-off-by: Mauro Perego --- .../Discretization/Basis/Intrepid2_Basis.hpp | 55 ++++ .../Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp | 51 ++++ .../Basis/Intrepid2_HCURL_HEX_In_FEM.hpp | 32 ++- .../Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp | 112 ++++++-- .../Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp | 32 ++- .../Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp | 53 +++- .../Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp | 32 ++- .../Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp | 103 ++++++-- .../Basis/Intrepid2_HCURL_TET_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp | 51 ++++ .../Basis/Intrepid2_HCURL_TET_In_FEM.hpp | 23 +- .../Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp | 95 +++++-- .../Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp | 51 +++- .../Basis/Intrepid2_HCURL_TRI_In_FEM.hpp | 23 +- .../Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp | 94 +++++-- .../Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp | 52 ++++ .../Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp | 52 ++++ .../Basis/Intrepid2_HDIV_HEX_In_FEM.hpp | 34 ++- .../Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp | 102 ++++++-- .../Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp | 32 ++- .../Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp | 51 +++- .../Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp | 33 ++- .../Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp | 104 ++++++-- .../Basis/Intrepid2_HDIV_TET_I1_FEM.hpp | 35 ++- .../Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp | 52 ++++ .../Basis/Intrepid2_HDIV_TET_In_FEM.hpp | 227 ++++++++-------- .../Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp | 101 ++++++-- .../Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp | 35 ++- .../Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp | 52 ++++ .../Basis/Intrepid2_HDIV_TRI_In_FEM.hpp | 60 +++-- .../Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp | 94 +++++-- .../Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp | 53 ++++ .../Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp | 49 ++++ .../Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp | 49 ++++ .../Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp | 65 ++++- .../Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp | 51 +++- .../Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp | 51 +++- .../Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp | 92 +++++-- .../Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp | 50 +++- .../Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp | 50 +++- .../Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp | 58 +++++ .../Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp | 61 +++++ .../Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp | 127 ++++++--- .../Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp | 49 ++++ .../Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp | 49 ++++ .../Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp | 21 +- .../Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp | 51 +++- .../Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp | 92 ++++--- .../Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp | 128 ++++++--- .../Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp | 58 +++++ .../Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp | 58 +++++ .../Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp | 96 ++++--- .../Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp | 147 +++++++---- .../Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp | 61 ----- .../Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp | 49 ++++ .../Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp | 17 ++ .../Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp | 82 ++++-- .../Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp | 35 ++- .../Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp | 102 ++++++-- .../Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp | 34 ++- .../Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp | 86 ++++-- .../Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp | 36 ++- .../Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp | 102 ++++++-- .../Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp | 42 ++- .../Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp | 94 +++++-- .../Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp | 43 ++- .../Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp | 101 ++++++-- ...Intrepid2_CubatureControlVolumeSideDef.hpp | 2 +- .../src/Shared/Intrepid2_PolylibDef.hpp | 217 ++++++++-------- .../intrepid2/src/Shared/Intrepid2_Utils.hpp | 26 ++ .../Basis/HCURL_HEX_I1_FEM/CMakeLists.txt | 82 +++++- .../Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_HEX_I1_FEM/test_02.hpp | 187 ++++++++++++++ .../Basis/HCURL_HEX_In_FEM/CMakeLists.txt | 81 +++++- .../Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_HEX_In_FEM/test_02.hpp | 203 +++++++++++++++ .../Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt | 81 +++++- .../HCURL_QUAD_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_QUAD_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HCURL_QUAD_In_FEM/CMakeLists.txt | 79 +++++- .../HCURL_QUAD_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_QUAD_In_FEM/test_02.hpp | 189 ++++++++++++++ .../Basis/HCURL_TET_I1_FEM/CMakeLists.txt | 82 +++++- .../Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_TET_I1_FEM/test_02.hpp | 187 ++++++++++++++ .../Basis/HCURL_TET_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_TET_In_FEM/test_02.hpp | 205 +++++++++++++++ .../Basis/HCURL_TRI_I1_FEM/CMakeLists.txt | 81 +++++- .../Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_TRI_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HCURL_TRI_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_TRI_In_FEM/test_02.hpp | 189 ++++++++++++++ .../Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt | 82 +++++- .../HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HCURL_WEDGE_I1_FEM/test_02.hpp | 188 ++++++++++++++ .../Basis/HDIV_HEX_I1_FEM/CMakeLists.txt | 84 +++++- .../Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_HEX_I1_FEM/test_02.hpp | 186 +++++++++++++ .../Basis/HDIV_HEX_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_HEX_In_FEM/test_02.hpp | 190 ++++++++++++++ .../Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt | 84 +++++- .../Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_QUAD_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HDIV_QUAD_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_QUAD_In_FEM/test_02.hpp | 190 ++++++++++++++ .../Basis/HDIV_TET_I1_FEM/CMakeLists.txt | 84 +++++- .../Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_TET_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HDIV_TET_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_TET_In_FEM/test_02.hpp | 190 ++++++++++++++ .../Basis/HDIV_TRI_I1_FEM/CMakeLists.txt | 84 +++++- .../Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_TRI_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HDIV_TRI_In_FEM/CMakeLists.txt | 79 +++++- .../Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_TRI_In_FEM/test_02.hpp | 189 ++++++++++++++ .../Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt | 81 +++++- .../HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HDIV_WEDGE_I1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt | 74 ++++++ .../Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in | 52 ++++ .../Basis/HGRAD_HEX_C1_FEM/test_03.hpp | 184 +++++++++++++ .../Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt | 86 +++++- .../Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_HEX_C2_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt | 8 + .../Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in | 5 +- .../Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in | 35 ++- .../Basis/HGRAD_HEX_Cn_FEM/test_02.hpp | 201 +++++++++------ .../Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt | 80 +++++- .../HGRAD_LINE_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_LINE_C1_FEM/test_02.hpp | 185 +++++++++++++ .../Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt | 80 +++++- .../HGRAD_LINE_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_LINE_C2_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt | 78 +++++- .../HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in | 6 +- .../HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_LINE_Cn_FEM/test_02.hpp | 188 ++++++++++++++ .../Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt | 80 +++++- .../Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_PYR_C1_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt | 82 +++++- .../Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_PYR_I2_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt | 80 +++++- .../HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_QUAD_C1_FEM/test_02.hpp | 228 ++++++++++++++++ .../Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt | 86 +++++- .../HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_QUAD_C2_FEM/test_02.hpp | 228 ++++++++++++++++ .../Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt | 8 + .../HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in | 5 +- .../HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in | 33 ++- .../Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp | 244 ++++++++++++------ .../Basis/HGRAD_TET_C1_FEM/CMakeLists.txt | 80 +++++- .../Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_TET_C1_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_TET_C2_FEM/CMakeLists.txt | 80 +++++- .../Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_TET_C2_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt | 80 +++++- .../HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_TET_COMP12_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt | 8 + .../Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in | 6 +- .../Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in | 33 ++- .../Basis/HGRAD_TET_Cn_FEM/test_02.hpp | 202 +++++++++------ .../Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt | 80 +++++- .../Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_TRI_C1_FEM/test_02.hpp | 228 ++++++++++++++++ .../Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt | 80 +++++- .../Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_TRI_C2_FEM/test_02.hpp | 228 ++++++++++++++++ .../Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt | 8 + .../Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in | 8 +- .../Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in | 31 ++- .../Basis/HGRAD_TRI_Cn_FEM/test_02.hpp | 243 +++++++++++------ .../Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt | 80 +++++- .../HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp | 184 +++++++++++++ .../Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt | 87 ++++++- .../HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp | 182 +++++++++++++ .../Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt | 79 +++++- .../Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HVOL_HEX_Cn_FEM/test_02.hpp | 144 +++++++++++ .../Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt | 79 +++++- .../Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HVOL_LINE_Cn_FEM/test_02.hpp | 144 +++++++++++ .../Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt | 79 +++++- .../Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HVOL_QUAD_Cn_FEM/test_02.hpp | 144 +++++++++++ .../Basis/HVOL_TET_Cn_FEM/CMakeLists.txt | 79 +++++- .../Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HVOL_TET_Cn_FEM/test_02.hpp | 144 +++++++++++ .../Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt | 79 +++++- .../Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in | 52 ++++ .../Basis/HVOL_TRI_Cn_FEM/test_02.hpp | 145 +++++++++++ .../unit-test/Shared/Polylib/test_01.hpp | 16 +- 226 files changed, 17059 insertions(+), 1394 deletions(-) create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in create mode 100644 packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp index 74b34efb6681..5779d95741e8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp @@ -379,6 +379,61 @@ using HostBasisPtr = BasisPtrinputPoints is only used to deduce the type of the points where to evaluate basis functions. + The rank of inputPoints and its size are not relevant, however, + when using DFAD types, inputPoints cannot be empty, + otherwise the size of the scracth space needed won't be deduced correctly. + + \param space [in] - inputPoints + \param perTeamSpaceSize [out] - size of the scratch space needed per team + \param perThreadeSize [out] - size of the scratch space beeded per thread + */ + virtual + void getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error, + ">>> ERROR (Basis::getValuesScratchSpace): this method is not supported or should be overridden accordingly by derived classes."); + } + + + /** \brief Team-level evaluation of basis functions on a reference cell. + + Returns values of operatorType acting on basis functions for a set of + points in the reference cell for which the basis is defined. + + The interface allow also to select basis functions associated to a particular entity. + As an example, if subcellDim==1 (edges) and subcellOrdinal==0, outputValues will contain all the basis functions associated with the first edge. + outputValues will contain all the cell basis functions when the default value (-1) is used for subcellDim and subcellOrdinal + + \param outputValues [out] - variable rank array with the basis values + \param inputPoints [in] - rank-2 array (P,D) with the evaluation points + \param operatorType [in] - the operator acting on the basis functions + \param teamMember [in] - team member of the Kokkos::TemaPolicy + \param scratchStorage [in] - scratch space to use by each team + \param subcellDim [in] - the dimension of the subcells, the default values of -1 returns basis functions associated to subcells of all dimensions + \param subcellOrdinal [in] - the ordinal of the subcell, the default values of -1 returns basis functions associated to subcells of all ordinals + + \remark This function is supposed to be called within a TeamPolicy kernel. + The size of the required scratch space is determined by the getScratchSpaceSize function. + */ + KOKKOS_INLINE_FUNCTION + virtual + void getValues( OutputViewType /* outputValues */, + const PointViewType /* inputPoints */, + const EOperator /* operatorType */, + const typename Kokkos::TeamPolicy::member_type& teamMember, + const typename ExecutionSpace::scratch_memory_space &scratchStorage, + const ordinal_type subcellDim=-1, + const ordinal_type subcellOrdinal=-1) const { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error, + ">>> ERROR (Basis::getValues): this method is not supported or should be overridden accordingly by derived classes."); + } + /** \brief Evaluation of a FEM basis on a reference cell. Returns values of operatorType acting on FEM basis functions for a set of diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp index 299054557fca..72d0e9112c01 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp @@ -185,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp index 7eff91667e1b..71ea78656fc1 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp @@ -330,6 +330,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_HEX_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_HEX_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp index 1af120be9949..64327bb29c08 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp @@ -148,20 +148,21 @@ namespace Intrepid2 { class Basis_HCURL_HEX_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_HEX_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -184,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp index 1d18b7887096..182c05d721b0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_HEX_In_FEM::Serial:: + Basis_HCURL_HEX_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -44,22 +44,22 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data() + cardLine*npts*dim_s; auto ptr2 = work.data() + 2*cardLine*npts*dim_s; auto ptr3 = work.data() + 3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -142,12 +142,12 @@ namespace Intrepid2 { auto ptr4 = work.data() + 4*cardLine*npts*dim_s; auto ptr5 = work.data() + 5*cardLine*npts*dim_s; - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType outputLine_DA(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); - viewType outputLine_DB(Kokkos::view_wrap(ptr4, vcprop), cardLine, npts, 1); - viewType outputBubble(Kokkos::view_wrap(ptr5, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType outputLine_DA(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); + ViewType outputLine_DB(Kokkos::view_wrap(ptr4, vcprop), cardLine, npts, 1); + ViewType outputBubble(Kokkos::view_wrap(ptr5, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -588,6 +588,70 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffsHost); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + + template + void + Basis_HCURL_HEX_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? + 3*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0): + 5*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0); + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_HEX_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? + 3*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0): + 5*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0); + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_HEX_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp index 15f266e2db91..24c4b26bf746 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp @@ -144,20 +144,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HCURL_QUAD_I1_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_QUAD_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -178,6 +179,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp index 548929fb74cc..8380a4665a05 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp @@ -60,7 +60,7 @@ namespace Intrepid2 { default: { INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE && opType != OPERATOR_CURL, - ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues) operator is not supported"); + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::Serial::getValues) operator is not supported"); } } //end switch } @@ -219,7 +219,56 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_QUAD_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } -}// namespace Intrepid2 + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_QUAD_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::getValues), Operator Type not supported."); + } + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp index 077f6de07afb..13d0c227d421 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp @@ -136,20 +136,21 @@ namespace Intrepid2 { class Basis_HCURL_QUAD_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_QUAD_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -172,6 +173,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp index 13a732abb88d..b00248a51fc8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_QUAD_In_FEM::Serial:: + Basis_HCURL_QUAD_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -43,19 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const int dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -101,11 +101,11 @@ namespace Intrepid2 { case OPERATOR_CURL: { ordinal_type idx = 0; { // x - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x bubble value - viewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // y line grad - viewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, workLine, vinvBubble); @@ -120,11 +120,11 @@ namespace Intrepid2 { output.access(idx,k) = -output_x.access(i,k)*output_y.access(j,k,0); } { // y - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x line grad - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); // y bubble value - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, workLine, vinvBubble); @@ -386,6 +386,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + template + void + Basis_HCURL_QUAD_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_QUAD_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_QUAD_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp index 6d90318a4961..d293da0e38c0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp @@ -184,6 +184,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp index 9c3d2b2d1c23..4d38583f8e49 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp @@ -255,6 +255,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_TET_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TET_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TET_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_TET_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp index cae49e5b09a5..ed253d57ec13 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp @@ -217,9 +217,26 @@ class Basis_HCURL_TET_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( rank(dofCoords) != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp index 8bb82254291f..56149a4a1820 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp @@ -26,18 +26,18 @@ namespace Intrepid2 { namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HCURL_TET_In_FEM::Serial:: +Basis_HCURL_TET_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -54,17 +54,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -282,7 +282,7 @@ Basis_HCURL_TET_In_FEM( const ordinal_type order, #ifdef HAVE_INTREPID2_DEBUG ordinal_type num_nonzero_sv = 0; for (int i=0;i tolerence()); + num_nonzero_sv += (S(i,0) > 10*tolerence()); INTREPID2_TEST_FOR_EXCEPTION( num_nonzero_sv != card, std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HCURL_TET_In_FEM( order, pointType), Matrix V1 should have rank equal to the cardinality of HCURL space"); @@ -562,5 +562,64 @@ Basis_HCURL_TET_In_FEM( const ordinal_type order, posDfOrd); } } + +template +void +Basis_HCURL_TET_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); +} + +template +KOKKOS_INLINE_FUNCTION +void +Basis_HCURL_TET_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TET_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_TET_In_FEM): getValues not implemented for this operator"); + } + } +} } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp index 816b999560a6..109c96988649 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp @@ -187,6 +187,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp index 85e639ea8f10..813b764608db 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp @@ -208,7 +208,56 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_TRI_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TRI_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_!1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp index 3c34d125847a..a030f292fb50 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp @@ -209,9 +209,26 @@ class Basis_HCURL_TRI_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( rank(dofCoords) != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp index 7d10682a5e45..6cb65ab386de 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp @@ -25,18 +25,18 @@ namespace Intrepid2 { namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_TRI_In_FEM::Serial:: + Basis_HCURL_TRI_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -53,17 +53,16 @@ namespace Intrepid2 { } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts), dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -452,5 +451,66 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HCURL_TRI_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TRI_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_TRI_In_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp index c7587cf3eec1..d2831d0ac47a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp @@ -185,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp index 59ad4da436e8..754355ffbd7d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp @@ -276,5 +276,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_WEDGE_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_WEDGE_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_WEDGE_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_WEDGE_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp index 1de1d7c654c7..66ab525b3aec 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp @@ -190,6 +190,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp index 79e9aaef60f8..b7e865178e64 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp @@ -236,5 +236,57 @@ namespace Intrepid2 { } + template + void + Basis_HDIV_HEX_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_HEX_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp index 4ed98a89967f..f563bd998237 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp @@ -138,20 +138,21 @@ namespace Intrepid2 { class Basis_HDIV_HEX_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_HEX_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -174,6 +175,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { @@ -254,8 +272,6 @@ namespace Intrepid2 { }// namespace Intrepid2 - - #include "Intrepid2_HDIV_HEX_In_FEMDef.hpp" #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp index 0bae2c8b1b3d..0d5d25113bdb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HDIV_HEX_In_FEM::Serial:: + Basis_HDIV_HEX_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -44,21 +44,21 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+(2*cardLine+cardBubble)*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); - viewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -138,13 +138,13 @@ namespace Intrepid2 { break; } case OPERATOR_DIV: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // A line value - viewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // B line value - viewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // Line grad - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); // tensor product ordinal_type idx = 0; @@ -508,6 +508,64 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffsHost); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + + template + void + Basis_HDIV_HEX_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+2*this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_HEX_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+2*this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_HEX_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp index fab13618142c..66c5843d4da0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp @@ -145,20 +145,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_QUAD_I1_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_QUAD_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -179,6 +180,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp index 1924df01978b..22eeef61929a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp @@ -213,10 +213,59 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffs); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); - } + template + void + Basis_HDIV_QUAD_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_QUAD_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp index 3db4472991f2..60da55f64220 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp @@ -135,20 +135,21 @@ namespace Intrepid2 { class Basis_HDIV_QUAD_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_QUAD_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -170,6 +171,24 @@ namespace Intrepid2 { this->vinvBubble_, operatorType ); } + + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp index f6958e8152c2..ee5bdc9458c4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HDIV_QUAD_In_FEM::Serial:: + Basis_HDIV_QUAD_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -43,20 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -100,11 +99,11 @@ namespace Intrepid2 { case OPERATOR_DIV: { ordinal_type idx = 0; { // x - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x bubble value - viewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // y line grad - viewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, workLine, vinvBubble); @@ -119,11 +118,11 @@ namespace Intrepid2 { output.access(idx,k) = output_x.access(i,k)*output_y.access(j,k,0); } { // y - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x line grad - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); // y bubble value - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, workLine, vinvBubble); @@ -381,6 +380,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + template + void + Basis_HDIV_QUAD_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_QUAD_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_QUAD_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp index a5b72c79d540..40b58117d658 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp @@ -144,19 +144,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_TET_I1_FEM: public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HDIV_TET_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -177,6 +179,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp index febd2f87333c..7aac987e9439 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp @@ -230,6 +230,58 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); } + template + void + Basis_HDIV_TET_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TET_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp index c2f17ac83aef..9224322f9539 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp @@ -165,131 +165,144 @@ template class Basis_HDIV_TET_In_FEM : public Basis { - public: - typedef typename Basis::OrdinalTypeArray1DHost OrdinalTypeArray1DHost; - typedef typename Basis::OrdinalTypeArray2DHost OrdinalTypeArray2DHost; - typedef typename Basis::OrdinalTypeArray3DHost OrdinalTypeArray3DHost; - - /** \brief Constructor. - */ - Basis_HDIV_TET_In_FEM(const ordinal_type order, - const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; - - virtual - void - getValues( /* */ OutputViewType outputValues, - const PointViewType inputPoints, - const EOperator operatorType = OPERATOR_VALUE) const override { + public: + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; + + /** \brief Constructor. + */ + Basis_HDIV_TET_In_FEM(const ordinal_type order, + const EPointType pointType = POINTTYPE_EQUISPACED); + + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; + + virtual + void + getValues( /* */ OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override { #ifdef HAVE_INTREPID2_DEBUG - Intrepid2::getValues_HDIV_Args(outputValues, + Intrepid2::getValues_HDIV_Args(outputValues, inputPoints, operatorType, this->getBaseCellTopology(), this->getCardinality() ); #endif -constexpr ordinal_type numPtsPerEval = Parameters::MaxNumPtsPerBasisEval; -Impl::Basis_HDIV_TET_In_FEM:: -getValues( outputValues, - inputPoints, - this->coeffs_, - operatorType); - } - - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + constexpr ordinal_type numPtsPerEval = Parameters::MaxNumPtsPerBasisEval; + Impl::Basis_HDIV_TET_In_FEM:: + getValues( outputValues, + inputPoints, + this->coeffs_, + operatorType); + } + + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG - // Verify rank of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) rank = 2 required for dofCoords array"); - // Verify 0th dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoords.extent(0)) != this->getCardinality(), std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) mismatch in number of dof and 0th dimension of dofCoords array"); - // Verify 1st dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoords.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) incorrect reference cell (1st) dimension in dofCoords array"); + // Verify rank of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) rank = 2 required for dofCoords array"); + // Verify 0th dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoords.extent(0)) != this->getCardinality(), std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) mismatch in number of dof and 0th dimension of dofCoords array"); + // Verify 1st dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoords.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) incorrect reference cell (1st) dimension in dofCoords array"); #endif - Kokkos::deep_copy(dofCoords, this->dofCoords_); - } + Kokkos::deep_copy(dofCoords, this->dofCoords_); + } - virtual - void - getDofCoeffs( ScalarViewType dofCoeffs ) const override { -#ifdef HAVE_INTREPID2_DEBUG - // Verify rank of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.rank() != 2, std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) rank = 2 required for dofCoeffs array"); - // Verify 0th dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoeffs.extent(0)) != this->getCardinality(), std::invalid_argument, + virtual + void + getDofCoeffs( ScalarViewType dofCoeffs ) const override { + #ifdef HAVE_INTREPID2_DEBUG + // Verify rank of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.rank() != 2, std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) rank = 2 required for dofCoeffs array"); + // Verify 0th dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoeffs.extent(0)) != this->getCardinality(), std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) mismatch in number of dof and 0th dimension of dofCoeffs array"); - // Verify 1st dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, + // Verify 1st dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) incorrect reference cell (1st) dimension in dofCoeffs array"); #endif - Kokkos::deep_copy(dofCoeffs, this->dofCoeffs_); - } - - void - getExpansionCoeffs( ScalarViewType coeffs ) const { - // has to be same rank and dimensions - Kokkos::deep_copy(coeffs, this->coeffs_); - } - - virtual - const char* - getName() const override { - return "Intrepid2_HDIV_TET_In_FEM"; - } - - virtual - bool - requireOrientation() const override { - return true; - } - - /** \brief returns the basis associated to a subCell. - - The bases of the subCell are the restriction to the subCell of the bases of the parent cell, - projected along normal to the subCell. - - \param [in] subCellDim - dimension of subCell - \param [in] subCellOrd - position of the subCell among of the subCells having the same dimension - \return pointer to the subCell basis of dimension subCellDim and position subCellOrd - */ - BasisPtr - getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{ + Kokkos::deep_copy(dofCoeffs, this->dofCoeffs_); + } - if(subCellDim == 2) { - return Teuchos::rcp(new - Basis_HVOL_TRI_Cn_FEM - (this->basisDegree_-1, pointType_)); + void + getExpansionCoeffs( ScalarViewType coeffs ) const { + // has to be same rank and dimensions + Kokkos::deep_copy(coeffs, this->coeffs_); } - INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds"); - } - BasisPtr - getHostBasis() const override{ - return Teuchos::rcp(new Basis_HDIV_TET_In_FEM(this->basisDegree_, pointType_)); - } - private: + virtual + const char* + getName() const override { + return "Intrepid2_HDIV_TET_In_FEM"; + } - /** \brief expansion coefficients of the nodal basis in terms of the - orthgonal one */ - Kokkos::DynRankView coeffs_; + virtual + bool + requireOrientation() const override { + return true; + } - /** \brief type of lattice used for creating the DoF coordinates */ - EPointType pointType_; + /** \brief returns the basis associated to a subCell. -}; + The bases of the subCell are the restriction to the subCell of the bases of the parent cell, + projected along normal to the subCell. + + \param [in] subCellDim - dimension of subCell + \param [in] subCellOrd - position of the subCell among of the subCells having the same dimension + \return pointer to the subCell basis of dimension subCellDim and position subCellOrd + */ + BasisPtr + getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{ + + if(subCellDim == 2) { + return Teuchos::rcp(new + Basis_HVOL_TRI_Cn_FEM + (this->basisDegree_-1, pointType_)); + } + INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds"); + } + + BasisPtr + getHostBasis() const override{ + return Teuchos::rcp(new Basis_HDIV_TET_In_FEM(this->basisDegree_, pointType_)); + } + private: + + /** \brief expansion coefficients of the nodal basis in terms of the orthgonal one */ + Kokkos::DynRankView coeffs_; + + /** \brief type of lattice used for creating the DoF coordinates */ + EPointType pointType_; + }; }// namespace Intrepid2 diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp index e8c97199198f..96e0e7cf2267 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp @@ -25,18 +25,18 @@ namespace Intrepid2 { namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HDIV_TET_In_FEM::Serial:: +Basis_HDIV_TET_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -53,17 +53,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -104,10 +104,10 @@ typename inputPointValueType, class ...inputPointProperties, typename vinvValueType, class ...vinvProperties> void Basis_HDIV_TET_In_FEM:: -getValues( /* */ Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView coeffs, - const EOperator operatorType) { +getValues( Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView coeffs, + const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; typedef Kokkos::DynRankView vinvViewType; @@ -450,5 +450,64 @@ Basis_HDIV_TET_In_FEM( const ordinal_type order, posDfOrd); } } + +template +void +Basis_HDIV_TET_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); +} + +template +KOKKOS_INLINE_FUNCTION +void +Basis_HDIV_TET_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_TET_In_FEM): getValues not implemented for this operator"); + } + } +} } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp index 6f6596778567..8315ac027c92 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp @@ -142,19 +142,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_TRI_I1_FEM: public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HDIV_TRI_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -175,6 +177,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp index a7c13864b3a2..ef00a009fdc7 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp @@ -72,6 +72,7 @@ namespace Intrepid2 { typedef Kokkos::DynRankView inputPointViewType; typedef typename ExecSpace::ExecSpaceType ExecSpaceType; + // Number of evaluation points = dim 0 of inputPoints const auto loopSize = inputPoints.extent(0); Kokkos::RangePolicy > policy(0, loopSize); @@ -215,5 +216,56 @@ namespace Intrepid2 { } + template + void + Basis_HDIV_TRI_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TRI_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_!1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp index 71fd4f8ae99d..1ca324c938ef 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp @@ -169,31 +169,30 @@ template class Basis_HDIV_TRI_In_FEM : public Basis { - public: - typedef typename Basis::OrdinalTypeArray1DHost OrdinalTypeArray1DHost; - typedef typename Basis::OrdinalTypeArray2DHost OrdinalTypeArray2DHost; - typedef typename Basis::OrdinalTypeArray3DHost OrdinalTypeArray3DHost; - - /** \brief Constructor. - */ - Basis_HDIV_TRI_In_FEM(const ordinal_type order, + public: + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; + + /** \brief Constructor. + */ + Basis_HDIV_TRI_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using HostBasis = Basis_HDIV_TRI_In_FEM; + using HostBasis = Basis_HDIV_TRI_In_FEM; - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void - getValues( /* */ OutputViewType outputValues, - const PointViewType inputPoints, - const EOperator operatorType = OPERATOR_VALUE) const override { + getValues( OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override { #ifdef HAVE_INTREPID2_DEBUG Intrepid2::getValues_HDIV_Args(outputValues, inputPoints, @@ -209,9 +208,26 @@ class Basis_HDIV_TRI_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp index e412989caa56..89c86f5274e9 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp @@ -24,18 +24,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HDIV_TRI_In_FEM::Serial:: -getValues( /* */ OutputViewType output, - const inputViewType input, - /* */ workViewType work, - const vinvViewType coeffs ) { +Basis_HDIV_TRI_In_FEM::Serial:: +getValues( OutputViewType output, + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -52,17 +52,17 @@ getValues( /* */ OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -447,5 +447,65 @@ Basis_HDIV_TRI_In_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HDIV_TRI_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TRI_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using WorkViewType = Kokkos::DynRankView< scalarType, typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_TRI_In_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp index d30c7d1ee4a3..56a257b9ce54 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp @@ -159,6 +159,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp index 79dabbc6bfa6..ceefba8ac59e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp @@ -183,7 +183,60 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffs); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); + + } + + template + void + Basis_HDIV_WEDGE_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_WEDGE_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_WEDGE_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_WEDGE_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp index 2ea99d81a411..59dfdfff0451 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp @@ -177,6 +177,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp index 96a5fcf39429..3c2813faa81e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp @@ -413,6 +413,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_HEX_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_HEX_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_HEX_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp index 9823b1fe23bd..1af419c4300e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp @@ -245,6 +245,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp index 22e1a54b74b4..c5a5ec102087 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp @@ -1626,8 +1626,57 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); + } + template + void + Basis_HGRAD_HEX_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_HEX_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_HEX_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + default: {} + } + } }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp index 4bcefb52d286..987206b0a1e0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp @@ -184,6 +184,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp index ac50cde72fb3..36139dfb95f4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp @@ -44,14 +44,14 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView viewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { @@ -382,7 +382,64 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - + + template + void + Basis_HGRAD_HEX_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + (void) operatorType; //avoid warning for unused variable + perTeamSpaceSize = 0; + perThreadSpaceSize = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp index c07fdfd71f04..71ee77e7d55a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp @@ -161,6 +161,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp index 4572b5b5c899..05291e830a3d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp @@ -177,6 +177,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_LINE_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp index 595cc815e9be..4e141fc5ad41 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp @@ -161,6 +161,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp index c08bd3692a27..cb3157028b0e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp @@ -173,6 +173,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_LINE_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp index c5e0e2308545..ac2bfdbcae0b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp @@ -192,6 +192,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp index fc116eb32ebe..6140c13821cb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp @@ -23,16 +23,16 @@ namespace Intrepid2 { template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -42,12 +42,12 @@ namespace Intrepid2 { const ordinal_type order = card - 1; const double alpha = 0.0, beta = 0.0; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta); @@ -75,7 +75,7 @@ namespace Intrepid2 { case OPERATOR_Dn: { // dkcard is always 1 for 1D element const ordinal_type dkcard = 1; - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta, opDn); @@ -326,22 +326,64 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HGRAD_LINE_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_LINE_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif - - - - - - - - - - - - - - - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp index 36463fc8baec..7f00c2a90a2d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp @@ -163,6 +163,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp index e25212499fbd..078ad0e97178 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp @@ -256,6 +256,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_PYR_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_PYR_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_PYR_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp index aad5bae97d00..81a09cc56c91 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp @@ -180,6 +180,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp index 3247c5aaa0b7..a6ce124d1c2b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp @@ -371,6 +371,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_PYR_I2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_PYR_I2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_PYR_I2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_I2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_I2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp index 17af8c1c8685..e6b9d7b10c44 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp @@ -167,6 +167,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp index 39504493f063..246a92ac44d5 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp @@ -245,5 +245,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_QUAD_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp index d82127eb1ead..038214825966 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp @@ -194,6 +194,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp index 4ec411a0ffc1..36ae4378091c 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp @@ -557,5 +557,66 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_QUAD_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialCurl = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialCurl::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_DEG2_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp index d07b46cafb28..00a206c12ec8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp @@ -181,6 +181,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp index 582b465a6c57..9e232352285e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp @@ -21,18 +21,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HGRAD_QUAD_Cn_FEM::Serial:: + Basis_HGRAD_QUAD_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -43,19 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const int dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -73,29 +73,29 @@ namespace Intrepid2 { } case OPERATOR_CURL: { for (auto l=0;l<2;++l) { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x, output_y; + ViewType output_x, output_y; - typename workViewType::value_type s = 0.0; + typename WorkViewType::value_type s = 0.0; if (l) { // l = 1 - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv, 1); - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); s = -1.0; } else { // l = 0 - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, 1); @@ -122,33 +122,33 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const auto dkcard = opDn + 1; for (auto l=0;l:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } @@ -357,7 +357,72 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -}// namespace Intrepid2 + + template + void + Basis_HGRAD_QUAD_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_QUAD_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp index a2bd5cce665e..d8ed43bad7c7 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp @@ -164,6 +164,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp index 65d632b1b578..c8e0cc996c65 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp @@ -202,5 +202,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TET_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp index 3b544a29b8b9..703eef86b224 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp @@ -183,6 +183,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp index 36aa3d7df7b8..7f4f39634cd0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp @@ -323,5 +323,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TET_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp index 2bd2814a2d6e..6cb9802be376 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp @@ -203,12 +203,23 @@ namespace Intrepid2 { operatorType); } - /** \brief Returns spatial locations (coordinates) of degrees of freedom on a - reference Tetrahedron. + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; - \param DofCoords [out] - array with the coordinates of degrees of freedom, - dimensioned (F,D) - */ virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp index ca7c10d67005..09888eddc924 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp @@ -408,6 +408,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_TET_COMP12_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_COMP12_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_COMP12_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_COMP12_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_COMP12_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp index 39b8900291f4..d23d95594bdb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp @@ -62,53 +62,57 @@ namespace Intrepid2 { /** \brief See Intrepid2::Basis_HGRAD_TET_Cn_FEM */ - template + template struct Serial { - template + template KOKKOS_INLINE_FUNCTION static void - getValues( outputValueViewType outputValues, - const inputPointViewType inputPoints, - workViewType work, - const vinvViewType vinv ); + getValues( OutputValueViewType outputValues, + const InputPointViewType inputPoints, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order); }; template + typename OutputValueValueType, class ...OutputValueProperties, + typename InputPointValueType, class ...InputPointProperties, + typename VinvValueType, class ...VinvProperties> static void getValues( const typename DeviceType::execution_space& space, - Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView vinv, + Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType); /** \brief See Intrepid2::Basis_HGRAD_TET_Cn_FEM */ - template struct Functor { - outputValueViewType _outputValues; - const inputPointViewType _inputPoints; - const vinvViewType _vinv; - workViewType _work; + OutputValueViewType _outputValues; + const InputPointViewType _inputPoints; + const VinvViewType _vinv; + WorkViewType _work; + const ordinal_type _order; KOKKOS_INLINE_FUNCTION - Functor( outputValueViewType outputValues_, - inputPointViewType inputPoints_, - vinvViewType vinv_, - workViewType work_) + Functor( OutputValueViewType outputValues_, + InputPointViewType inputPoints_, + VinvViewType vinv_, + WorkViewType work_, + ordinal_type order_) : _outputValues(outputValues_), _inputPoints(inputPoints_), - _vinv(vinv_), _work(work_) {} + _vinv(vinv_), _work(work_), _order(order_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type iter) const { @@ -118,15 +122,15 @@ namespace Intrepid2 { const auto ptRange = Kokkos::pair(ptBegin, ptEnd); const auto input = Kokkos::subview( _inputPoints, ptRange, Kokkos::ALL() ); - typename workViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); + typename WorkViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); auto vcprop = Kokkos::common_view_alloc_prop(_work); - workViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); + WorkViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } case OPERATOR_GRAD : @@ -135,7 +139,7 @@ namespace Intrepid2 { //case OPERATOR_D3 : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange, Kokkos::ALL() ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } default: { @@ -204,9 +208,29 @@ namespace Intrepid2 { outputValues, inputPoints, this->vinv_, + this->basisDegree_, operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp index c2c5aaf6cbe3..1c8715525bc0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp @@ -24,44 +24,36 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HGRAD_TET_Cn_FEM::Serial:: +Basis_HGRAD_TET_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order ) { constexpr ordinal_type spaceDim = 3; const ordinal_type card = vinv.extent(0), npts = input.extent(0); - // compute order - ordinal_type order = 0; - for (ordinal_type p=0;p<=Parameters::MaxOrder;++p) { - if (card == Intrepid2::getPnCardinality(p)) { - order = p; - break; - } - } - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - viewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - viewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i outputValues, const Kokkos::DynRankView inputPoints, const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; @@ -156,7 +149,7 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_GRAD: @@ -164,23 +157,16 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_D2: { typedef Functor FunctorType; workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality*outputValues.extent(2), inputPoints.extent(0)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } - /* case OPERATOR_D3: { - typedef Functor FunctorType; - workViewType work(Kokkos::view_alloc("Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0), outputValues.extent(2)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); - break; - }*/ default: { INTREPID2_TEST_FOR_EXCEPTION( true , std::invalid_argument, ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): Operator type not implemented" ); @@ -431,5 +417,65 @@ Basis_HGRAD_TET_Cn_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HGRAD_TET_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = getWorkSizePerPoint(operatorType)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + constexpr ordinal_type spaceDim = 3; + auto sizePerPoint = (operatorType==OPERATOR_VALUE) ? + this->vinv_.extent(0)*get_dimension_scalar(inputPoints) : + (2*spaceDim+1)*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp index 46349310b210..81439bdf3f50 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp @@ -162,6 +162,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp index e771ae7a3ee0..bc926788c290 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp @@ -204,5 +204,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TRI_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp index 9eb45f9c2716..627fa113720e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp @@ -173,6 +173,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp index 87bb96e2bbe0..86df77f41a27 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp @@ -263,5 +263,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TRI_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C2_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp index c8bc97c3fb76..17ada895efe0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp @@ -60,53 +60,57 @@ namespace Intrepid2 { work is a rank 1 view having the same value_type of inputPoints and having size equal to getWorkSizePerPoint()*inputPoints.extent(0); */ - template + template struct Serial { - template + template KOKKOS_INLINE_FUNCTION static void - getValues( outputValueViewType outputValues, - const inputPointViewType inputPoints, - workViewType work, - const vinvViewType vinv ); + getValues( OutputValueViewType outputValues, + const InputPointViewType inputPoints, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order); }; template + typename OutputValueValueType, class ...OutputValueProperties, + typename InputPointValueType, class ...InputPointProperties, + typename VinvValueType, class ...VinvProperties> static void - getValues(const typename DeviceType::execution_space& space, - Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView vinv, - const EOperator operatorType); + getValues( const typename DeviceType::execution_space& space, + Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView vinv, + const ordinal_type order, + const EOperator operatorType); /** \brief See Intrepid2::Basis_HGRAD_TRI_Cn_FEM */ - template struct Functor { - outputValueViewType _outputValues; - const inputPointViewType _inputPoints; - const vinvViewType _vinv; - workViewType _work; + OutputValueViewType _outputValues; + const InputPointViewType _inputPoints; + const VinvViewType _vinv; + WorkViewType _work; + const ordinal_type _order; KOKKOS_INLINE_FUNCTION - Functor( outputValueViewType outputValues_, - inputPointViewType inputPoints_, - vinvViewType vinv_, - workViewType work_) + Functor( OutputValueViewType outputValues_, + InputPointViewType inputPoints_, + VinvViewType vinv_, + WorkViewType work_, + ordinal_type order_) : _outputValues(outputValues_), _inputPoints(inputPoints_), - _vinv(vinv_), _work(work_) {} + _vinv(vinv_), _work(work_), _order(order_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type iter) const { @@ -116,22 +120,22 @@ namespace Intrepid2 { const auto ptRange = Kokkos::pair(ptBegin, ptEnd); const auto input = Kokkos::subview( _inputPoints, ptRange, Kokkos::ALL() ); - typename workViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); + typename WorkViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); auto vcprop = Kokkos::common_view_alloc_prop(_work); - workViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); + WorkViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } case OPERATOR_CURL: case OPERATOR_D1: case OPERATOR_D2: { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange, Kokkos::ALL() ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } default: { @@ -200,9 +204,29 @@ namespace Intrepid2 { outputValues, inputPoints, this->vinv_, + this->basisDegree_, operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp index 681148713a06..c7b7a40cfa7b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp @@ -23,44 +23,36 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HGRAD_TRI_Cn_FEM::Serial:: +Basis_HGRAD_TRI_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order ) { constexpr ordinal_type spaceDim = 2; const ordinal_type card = vinv.extent(0), npts = input.extent(0); - // compute order - ordinal_type order = 0; - for (ordinal_type p=0;p<=Parameters::MaxOrder;++p) { - if (card == Intrepid2::getPnCardinality(p) ) { - order = p; - break; - } - } - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - viewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - viewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i outputValues, const Kokkos::DynRankView inputPoints, const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; @@ -175,7 +168,7 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_GRAD: @@ -183,30 +176,23 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_CURL: { workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_D2: { typedef Functor FunctorType; workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*outputValues.extent(2), inputPoints.extent(0)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } - /* case OPERATOR_D3: { - typedef Functor FunctorType; - workViewType work(Kokkos::view_alloc("Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0), outputValues.extent(2)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); - break; - }*/ default: { INTREPID2_TEST_FOR_EXCEPTION( true , std::invalid_argument, ">>> ERROR (Basis_HGRAD_TRI_Cn_FEM): Operator type not implemented" ); @@ -242,7 +228,7 @@ Basis_HGRAD_TRI_Cn_FEM( const ordinal_type order, PointTools::getLattice( dofCoords, cellTopo, order, offset, - pointType_ ); + this->pointType_ ); this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); @@ -384,5 +370,74 @@ Basis_HGRAD_TRI_Cn_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HGRAD_TRI_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = getWorkSizePerPoint(operatorType)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + constexpr ordinal_type spaceDim = 2; + auto sizePerPoint = (operatorType==OPERATOR_VALUE) ? + this->vinv_.extent(0)*get_dimension_scalar(inputPoints) : + (2*spaceDim+1)*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TRI_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp index 6f5e5abb1dd0..a2b4271518ba 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp @@ -198,69 +198,8 @@ void OrthPolynomialTri::ge const inputViewType /* input */, workViewType /* work */, const ordinal_type /* order */ ) { -#if 0 //#ifdef HAVE_INTREPID2_SACADO - -constexpr ordinal_type spaceDim = 2; -constexpr ordinal_type maxCard = Intrepid2::getPnCardinality(); - -typedef typename OutputViewType::value_type value_type; -typedef Sacado::Fad::SFad fad_type; - -const ordinal_type -npts = input.extent(0), -card = output.extent(0); - -// use stack buffer -fad_type inBuf[Parameters::MaxNumPtsPerBasisEval][spaceDim], -outBuf[maxCard][Parameters::MaxNumPtsPerBasisEval][n]; - -typedef typename inputViewType::memory_space memory_space; -typedef typename Kokkos::View outViewType; -typedef typename Kokkos::View inViewType; -auto vcprop = Kokkos::common_view_alloc_prop(input); - -inViewType in(Kokkos::view_wrap((value_type*)&inBuf[0][0], vcprop), npts, spaceDim); -outViewType out(Kokkos::view_wrap((value_type*)&outBuf[0][0][0], vcprop), card, npts, n); - -for (ordinal_type i=0;i outViewType_; -outViewType_ workView; -if (n==2) { - //char outBuf[bufSize*sizeof(typename inViewType::value_type)]; - fad_type outBuf[maxCard][Parameters::MaxNumPtsPerBasisEval][spaceDim+1]; - auto vcprop = Kokkos::common_view_alloc_prop(in); - workView = outViewType_( Kokkos::view_wrap((value_type*)&outBuf[0][0][0], vcprop), card, npts, spaceDim+1); -} -OrthPolynomialTri::generate(out, in, workView, order); - -for (ordinal_type i=0;i 0) { - //n=2: (f_x)_x, (f_y)_x - //n=3: (f_xx)_x, (f_xy)_x, (f_yy)_x - ordinal_type i_Dnm1 = i_dy; - output.access(i,j,i_Dn) = out(i,j,i_Dnm1).dx(0); - } - else { - //n=2: (f_y)_y, (f_z)_y - //n=3: (f_yy)_y - ordinal_type i_Dnm1 = i_dy-1; - output.access(i,j,i_Dn) = out(i,j,i_Dnm1).dx(1); - } - } - } -#else INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_Cn_FEM_ORTH::OrthPolynomialTri) Computing of second and higher-order derivatives is not currently supported"); -#endif } diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp index 15daedfbfe49..5b8b73634bb4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp @@ -166,6 +166,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp index 9d2c461edca2..8d76318a49e0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp @@ -245,5 +245,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_WEDGE_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_WEDGE_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_WEDGE_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_WEDGE_C1_FEM::template Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_WEDGE_C1_FEM::template Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp index d4cb38e7ca55..c952afcf0e6e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp @@ -215,6 +215,23 @@ namespace Intrepid2 { operatorType );; } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp index 363d21ad19ea..9f5327d94187 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp @@ -30,12 +30,13 @@ namespace Intrepid2 { Basis_HGRAD_WEDGE_DEG2_FEM::Serial:: getValues( OutputViewType output, const inputViewType input ) { + typedef typename inputViewType::value_type value_type; switch (opType) { case OPERATOR_VALUE: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); - const auto w = 1.0 - x - y; + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); + const value_type w = 1.0 - x - y; // output is a rank-1 array with dimensions (basisCardinality_) if constexpr (!serendipity) { @@ -80,9 +81,9 @@ namespace Intrepid2 { break; } case OPERATOR_GRAD: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); if constexpr (!serendipity) { output.access(0, 0) = ((-3 + 4*x + 4*y)*(-1 + z)*z)/2.; @@ -158,7 +159,7 @@ namespace Intrepid2 { output.access(17, 1) = 4*(-1 + x + 2*y)*(-1 + z*z); output.access(17, 2) = 8*y*(-1 + x + y)*z; } else { - const auto w = 1.0 - x - y; + const value_type w = 1.0 - x - y; output.access(0, 0) = -(2.0*w - 1.0 - 0.5*z)*(1.0 - z); output.access(0, 1) = -(2.0*w - 1.0 - 0.5*z)*(1.0 - z); @@ -223,9 +224,9 @@ namespace Intrepid2 { break; } case OPERATOR_D2: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); if constexpr (!serendipity) { output.access(0, 0) = 2.*(-1. + z)*z; @@ -356,7 +357,7 @@ namespace Intrepid2 { } else { //serendipity element - const auto w = 1.0 - x - y; + const value_type w = 1.0 - x - y; output.access(0, 0) = 2.0*(1.0 - z); output.access(0, 1) = 2.0*(1.0 - z); output.access(0, 2) = 2.0*w - 0.5 - z; @@ -466,9 +467,9 @@ namespace Intrepid2 { } case OPERATOR_D3: { if constexpr (!serendipity) { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); output.access(0, 0) = 0.; output.access(0, 1) = 0.; @@ -1082,5 +1083,56 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_WEDGE_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_WEDGE_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_WEDGE_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_WEDGE_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_WEDGE_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp index 96cf0a64405b..388eb9ccdd1b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp @@ -137,20 +137,22 @@ namespace Intrepid2 { class Basis_HVOL_HEX_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_HEX_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -172,6 +174,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp index 652df8ee2689..617eeb9cad84 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp @@ -9,7 +9,7 @@ /** \file Intrepid2_HVOL_HEX_Cn_FEMDef.hpp \brief Definition file for FEM basis functions of degree n for H(vol) functions on HEX cells - \author Created by M. Perego, based on the Intrepid2::HGRAD_HEX_Cn_FEM class + \author Created by M. Perego, based on the Intrepid2::HVOL_HEX_Cn_FEM class */ #ifndef __INTREPID2_HVOL_HEX_CN_FEMDEF_HPP__ @@ -20,18 +20,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_HEX_Cn_FEM::Serial:: + Basis_HVOL_HEX_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -43,21 +43,21 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType output_z(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType output_z(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -88,7 +88,7 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const ordinal_type dkcard = opDn + 1; @@ -105,35 +105,35 @@ namespace Intrepid2 { if (mult_x < 0) { // pass } else { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); decltype(work_line) output_x, output_y, output_z; if (mult_x) { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } if (mult_z) { - output_z = viewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); + output_z = ViewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_z, input_z, work_line, vinv, mult_z); } else { - output_z = viewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); + output_z = ViewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_z, input_z, work_line, vinv); } @@ -316,7 +316,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -}// namespace Intrepid2 + + template + void + Basis_HVOL_HEX_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_HEX_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_HEX_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_HEX_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp index 380438f33bb4..0be4ce27fba8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp @@ -145,15 +145,16 @@ namespace Intrepid2 { : public Basis { public: using BasisBase = Basis; + using HostBasis = Basis_HVOL_LINE_Cn_FEM; - - using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; - - using OutputViewType = typename BasisBase::OutputViewType; - using PointViewType = typename BasisBase::PointViewType ; - using ScalarViewType = typename BasisBase::ScalarViewType; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ @@ -182,6 +183,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp index 3d742d4a30a4..dc8f25d3cd7e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp @@ -9,7 +9,7 @@ /** \file Intrepid2_HVOL_LINE_Cn_FEMDef.hpp \brief Definition file for FEM basis functions of degree n for H(vol) functions on LINE. - \author Created by M. Perego, based on the Intrepid2::HGRAD_LINE_Cn_FEM class + \author Created by M. Perego, based on the Intrepid2::HVOL_LINE_Cn_FEM class */ #ifndef __INTREPID2_HVOL_LINE_CN_FEM_DEF_HPP__ @@ -22,16 +22,16 @@ namespace Intrepid2 { template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void Basis_HVOL_LINE_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -41,12 +41,12 @@ namespace Intrepid2 { const ordinal_type order = card - 1; const double alpha = 0.0, beta = 0.0; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta); @@ -74,7 +74,7 @@ namespace Intrepid2 { case OPERATOR_Dn: { // dkcard is always 1 for 1D element const ordinal_type dkcard = 1; - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta, opDn); @@ -289,22 +289,56 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HVOL_LINE_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_LINE_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_LINE_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_LINE_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif - - - - - - - - - - - - - - - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp index 496522a6278c..6329c3cb30ce 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp @@ -132,20 +132,22 @@ namespace Intrepid2 { class Basis_HVOL_QUAD_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_QUAD_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -167,6 +169,24 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp index 2a9e2678b771..f492b6a65f7c 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp @@ -19,22 +19,22 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - - template + + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_QUAD_Cn_FEM::Serial:: + Basis_HVOL_QUAD_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; - + const ordinal_type cardLine = vinv.extent(0); const ordinal_type npts = input.extent(0); @@ -42,19 +42,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); - - switch (opType) { + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); + + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -81,33 +81,33 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const auto dkcard = opDn + 1; for (auto l=0;l:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } @@ -282,7 +282,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -} + + template + void + Basis_HVOL_QUAD_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_QUAD_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_QUAD_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_QUAD_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp index d47afbf7724f..8f9010f619b8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp @@ -156,23 +156,23 @@ namespace Intrepid2 { class Basis_HVOL_TET_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_TET_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void @@ -194,6 +194,24 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp index a0945a008159..7927a1e124f6 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp @@ -23,18 +23,18 @@ namespace Intrepid2 { namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_TET_Cn_FEM::Serial:: + Basis_HVOL_TET_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -50,17 +50,17 @@ namespace Intrepid2 { } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - workViewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const + ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i + void + Basis_HVOL_TET_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TET_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_TET_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp index 43a2161c9050..ff20e7426957 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp @@ -151,25 +151,24 @@ namespace Intrepid2 { class Basis_HVOL_TRI_Cn_FEM : public Basis { public: + using BasisBase = Basis; using HostBasis = Basis_HVOL_TRI_Cn_FEM; - - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_TRI_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void @@ -191,6 +190,24 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp index f870940f506b..aa6f54065ff6 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp @@ -22,18 +22,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template -template -KOKKOS_INLINE_FUNCTION -void -Basis_HVOL_TRI_Cn_FEM::Serial:: -getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + template + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TRI_Cn_FEM::Serial:: + getValues( OutputViewType output, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -49,17 +49,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - workViewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i + void + Basis_HVOL_TRI_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TRI_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_TRI_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_TRI_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp index ad2308807bf7..cb90d54c2c08 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp @@ -113,7 +113,7 @@ namespace Intrepid2 { const auto numSideNodeMaps = (spaceDim == 2 ? 1 : 2); const ordinal_type sideOrd[2] = { 1, 5 }; - Kokkos::pair nodeRangePerSide[2]; + Kokkos::pair nodeRangePerSide[2]={}; // the second rage is cell specific to handle remained sides switch (primaryCellTopo_.getKey()) { diff --git a/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp b/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp index 26d71b652d17..7a69b74f9fa7 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp @@ -226,21 +226,22 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); - + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); JacobiPolynomialDerivative(np, z, pd, np, alpha, beta); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - //D(i*np+j) = pd(j)/(pd(i)*(z(j)-z(i))); <--- This is either a bug, or the derivative matrix is not defined consistently. - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - D(i,j) = (alpha - beta + (alpha + beta + two)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + D(i,i) = (alpha - beta + (alpha + beta + two)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -260,13 +261,8 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); - - pd(0) = pow(-one,np-1)*GammaFunction(np+beta+one); - pd(0) /= GammaFunction(np)*GammaFunction(beta+two); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); + pd(0) = pow(-one,np-1)*GammaFunction(np+beta+one) / (GammaFunction(np)*GammaFunction(beta+two)); auto pd_plus_1 = Kokkos::subview(pd, Kokkos::pair(1, pd.extent(0))); auto z_plus_1 = Kokkos::subview( z, Kokkos::pair(1, z.extent(0))); @@ -275,17 +271,22 @@ namespace Intrepid2 { for(ordinal_type i = 1; i < np; ++i) pd(i) *= (1+z(i)); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == 0) - D(i,j) = -(np + alpha + beta + one)*(np - one)/ - (two*(beta + two)); - else - D(i,j) = (alpha - beta + one + (alpha + beta + one)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == 0) + D(i,i) = -(np + alpha + beta + one)*(np - one) / (two*(beta + two)); + else + D(i,i) = (alpha - beta + one + (alpha + beta + one)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -305,29 +306,30 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); JacobiPolynomialDerivative(np-1, z, pd, np-1, alpha+1, beta); for (ordinal_type i = 0; i < np-1; ++i) pd(i) *= (1-z(i)); - pd(np-1) = -GammaFunction(np+alpha+one); - pd(np-1) /= GammaFunction(np)*GammaFunction(alpha+two); - - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == np-1) - D(i,j) = (np + alpha + beta + one)*(np - one)/ - (two*(alpha + two)); - else - D(i,j) = (alpha - beta - one + (alpha + beta + one)*z(j))/ - (two*(one - z(j)*z(j))); + pd(np-1) = -GammaFunction(np+alpha+one) / (GammaFunction(np)*GammaFunction(alpha+two)); + + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == np-1) + D(i,i) = (np + alpha + beta + one)*(np - one) / (two*(alpha + two)); + else + D(i,i) = (alpha - beta - one + (alpha + beta + one)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -347,10 +349,7 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); pd(0) = two*pow(-one,np)*GammaFunction(np + beta); pd(0) /= GammaFunction(np - one)*GammaFunction(beta + two); @@ -359,24 +358,32 @@ namespace Intrepid2 { auto z_plus_1 = Kokkos::subview( z, Kokkos::pair(1, z.extent(0))); JacobiPolynomialDerivative(np-2, z_plus_1, pd_plus_1, np-2, alpha+1, beta+1); - for (ordinal_type i = 1; i < np-1; ++i) - pd(i) *= (one-z(i)*z(i)); + for (ordinal_type i = 1; i < np-1; ++i) { + const auto & z_i = z(i); + pd(i) *= (one-z_i*z_i); + } pd(np-1) = -two*GammaFunction(np + alpha); pd(np-1) /= GammaFunction(np - one)*GammaFunction(alpha + two); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == 0) - D(i,j) = (alpha - (np-1)*(np + alpha + beta))/(two*(beta+ two)); - else if (j == np-1) - D(i,j) =-(beta - (np-1)*(np + alpha + beta))/(two*(alpha+ two)); - else - D(i,j) = (alpha - beta + (alpha + beta)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == 0) + D(i,i) = (alpha - (np-1)*(np + alpha + beta))/(two*(beta+ two)); + else if (i == np-1) + D(i,i) =-(beta - (np-1)*(np + alpha + beta))/(two*(alpha+ two)); + else + D(i,i) = (alpha - beta + (alpha + beta)*z_i)/(two*(one - z_i*z_i)); + } } } @@ -591,57 +598,51 @@ namespace Intrepid2 { for (ordinal_type i = 0; i < np; ++i) polyd(i) = 0.5*(alpha + beta + two); } else { - double a1, a2, a3, a4; - const double apb = alpha + beta; + INTREPID2_TEST_FOR_ABORT(polyd.data() && !polyd.data() , + ">>> ERROR (Polylib::Serial::JacobiPolynomial): polyi view needed to compute polyd view."); + if(!polyi.data()) return; - typename polyiViewType::value_type - poly[MaxPolylibPoint]={}, polyn1[MaxPolylibPoint]={}, polyn2[MaxPolylibPoint]={}; + constexpr ordinal_type maxOrder = 2*MaxPolylibPoint-1; - if (polyi.data()) - for (ordinal_type i=0;i>> ERROR (Polylib::Serial::JacobiPolynomial): Requested order exceeds maxOrder ."); + + double a2[maxOrder-1]={}, a3[maxOrder-1]={}, a4[maxOrder-1]={}; + double ad1(0.0), ad2(0.0), ad3(0.0); + const double apb = alpha + beta; + const double amb = alpha - beta; - for (ordinal_type i = 0; i < np; ++i) { - polyn2[i] = one; - polyn1[i] = 0.5*(alpha - beta + (alpha + beta + two)*z(i)); - } for (auto k = 2; k <= n; ++k) { - a1 = two*k*(k + apb)*(two*k + apb - two); - a2 = (two*k + apb - one)*(alpha*alpha - beta*beta); - a3 = (two*k + apb - two)*(two*k + apb - one)*(two*k + apb); - a4 = two*(k + alpha - one)*(k + beta - one)*(two*k + apb); - - a2 /= a1; - a3 /= a1; - a4 /= a1; - - for (ordinal_type i = 0; i < np; ++i) { - poly [i] = (a2 + a3*z(i))*polyn1[i] - a4*polyn2[i]; - polyn2[i] = polyn1[i]; - polyn1[i] = poly [i]; - } + double a1 = two*k*(k + apb)*(two*k + apb - two); + a2[k-2] = (two*k + apb - one)*(apb*amb)/a1; + a3[k-2] = (two*k + apb - two)*(two*k + apb - one)*(two*k + apb)/a1; + a4[k-2] = two*(k + alpha - one)*(k + beta - one)*(two*k + apb)/a1; } if (polyd.data()) { - a1 = n*(alpha - beta); - a2 = n*(two*n + alpha + beta); - a3 = two*(n + alpha)*(n + beta); - a4 = (two*n + alpha + beta); - a1 /= a4; - a2 /= a4; - a3 /= a4; - - // note polyn2 points to polyn1 at end of poly iterations - for (ordinal_type i = 0; i < np; ++i) { - polyd(i) = (a1- a2*z(i))*poly[i] + a3*polyn2[i]; - polyd(i) /= (one - z(i)*z(i)); - } + double ad4 = (two*n + alpha + beta); + ad1 = n*(alpha - beta)/ad4; + ad2 = n*(two*n + alpha + beta)/ad4; + ad3 = two*(n + alpha)*(n + beta)/ad4; } - if (polyi.data()) - for (ordinal_type i=0;i::value && std::is_trivial::value) ? 0 : get_dimension_scalar(view); } + + /// Struct for deleting device instantiation + template + struct DeviceDeleter { + template + void operator()(T* ptr) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,1), + KOKKOS_LAMBDA (const int i) { ptr->~T(); }); + typename Device::execution_space().fence(); + Kokkos::kokkos_free(ptr); + } + }; + + /// Function for creating a vtable on device (requires copy ctor for + /// derived object). Allocates device memory and must be called from + /// host. + template + std::unique_ptr> + copy_virtual_class_to_device(const Derived& host_source) + { + auto* p = static_cast(Kokkos::kokkos_malloc(sizeof(Derived))); + Kokkos::parallel_for(Kokkos::RangePolicy(0,1), + KOKKOS_LAMBDA (const int i) {new (p) Derived(host_source); }); + typename Device::execution_space().fence(); + return std::unique_ptr>(p); + } } // end namespace Intrepid2 #endif diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt index f080139e2292..f0c9e31cd911 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_HEX_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..cd4d6dabdf43 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_HEX_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_HEX_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..7f4e2807360d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp @@ -0,0 +1,187 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_HEX_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_HEX_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_HEX_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_HEX_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_HEX_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt index b682181c9d5b..2e5f6844ed27 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -83,3 +86,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_HEX_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..41f15e65574e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_HEX_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_HEX_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp new file mode 100644 index 000000000000..e9ecf8ca65a4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp @@ -0,0 +1,203 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_HEX_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_HEX_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_HEX_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_HEX_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_HEX_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt index 716000daf9b3..89117d0742fb 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_QUAD_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..01d5359f6b02 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_QUAD_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_QUAD_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..ecef9a26d9d4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_QUAD_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_QUAD_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_QUAD_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_QUAD_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_QUAD_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt index c831e83ec896..2f44c158238f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_QUAD_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..daa3176be226 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_QUAD_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_QUAD_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp new file mode 100644 index 000000000000..2ae8438a11ac --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_QUAD_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_QUAD_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_QUAD_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_QUAD_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_QUAD_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt index 46e84774b70d..234b8e2d6fed 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TET_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..8fadc4a2c865 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TET_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TET_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..9c112664aff0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp @@ -0,0 +1,187 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TET_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TET_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TET_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TET_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_TET_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt index 46e4453c0d57..c40f3503ccf9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TET_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..278f9326b54c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TET_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TET_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp new file mode 100644 index 000000000000..d51a4ed29ae1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp @@ -0,0 +1,205 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TET_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TET_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TET_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TET_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 7; + + try { + for (int order=1;order <= maxOrder;++order) { + using BasisType = Basis_HCURL_TET_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1.0e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt index 2bf7bfdee691..b87adda0a338 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TRI_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..2fe8396db2d1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TRI_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TRI_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..3b255303dd1f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TRI_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TRI_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TRI_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TRI_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_TRI_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt index 49f5b786efc9..ec30d2154004 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TRI_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..509fff60809f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TRI_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TRI_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp new file mode 100644 index 000000000000..aa19ce2114e0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TRI_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TRI_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TRI_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TRI_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_TRI_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt index ba0496748a48..cb1ebf7b3de1 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_WEDGE_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1b963155651c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_WEDGE_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_WEDGE_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..de75f4cf2d72 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp @@ -0,0 +1,188 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_WEDGE_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_WEDGE_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_WEDGE_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_WEDGE_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_WEDGE_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt index fd4d688a591d..3fb8fc747f9c 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_HEX_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..ab24cfec247d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_HEX_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_HEX_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..fb05ad186945 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp @@ -0,0 +1,186 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_HEX_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_HEX_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_HEX_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_HEX_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_HEX_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt index a0e677500751..7a81181c8403 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_HEX_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_HEX_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_HEX_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..71b715c78833 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_HEX_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_HEX_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp new file mode 100644 index 000000000000..61c3d844f5dd --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_HEX_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_HEX_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + + template + int HDIV_HEX_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_HEX_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_HEX_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt index 5900fa72e32a..b21760f88ec4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_QUAD_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..5e1eb4c2dc79 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_QUAD_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_QUAD_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..a811df7230c7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_QUAD_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_QUAD_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_QUAD_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_QUAD_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_QUAD_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt index 59d15e7c716c..cdc2989d6036 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_QUAD_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..328d40fda920 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_QUAD_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_QUAD_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp new file mode 100644 index 000000000000..529007a97787 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_QUAD_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_QUAD_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_QUAD_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_QUAD_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_QUAD_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt index ea49b4cde715..6b8c89a459f9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TET_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..72f12aed7a9b --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TET_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TET_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..2da333826892 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TET_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TET_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TET_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TET_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_TET_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt index 264ce7c056ff..f4a2093e0e4f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TET_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..c08e06044acf --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TET_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TET_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp new file mode 100644 index 000000000000..1d5f9059327c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TET_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TET_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TET_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TET_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 7; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_TET_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt index 4f47ee20c141..581f594311e0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TRI_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..99b3fb273163 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TRI_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TRI_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..15b4152a781a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TRI_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TRI_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TRI_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TRI_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_TRI_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt index 4f11a0b1e70c..f06b5f1bb859 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TRI_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..060c322dc641 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TRI_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TRI_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp new file mode 100644 index 000000000000..1d3c940090d7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TRI_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TRI_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TRI_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TRI_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_TRI_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt index 67fcf71311f7..d801c634869e 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_WEDGE_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..16906a746c00 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_WEDGE_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_WEDGE_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..c991769a4852 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_WEDGE_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_WEDGE_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_WEDGE_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_WEDGE_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_WEDGE_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt index 88da0999c2ab..4982bd4f8dff 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt @@ -3,6 +3,7 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -67,6 +68,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") + IF(Kokkos_ENABLE_CUDA) LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") @@ -106,3 +108,75 @@ IF (${ETI_DEVICE_COUNT} GREATER_EQUAL 0) ENDFOREACH() ENDFOREACH() ENDIF() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_03") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_HEX_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_HEX_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in new file mode 100644 index 000000000000..a88bf31183c7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_03.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_03.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_HEX_C1_FEM_Test03<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp new file mode 100644 index 000000000000..9d326a80cf33 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_03.hpp + \brief Unit tests for the Intrepid2::HGRAD_HEX_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_HEX_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_HEX_C1_FEM_Test03(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_C1_FEM, Test 3", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_HEX_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt index a9bfc7f38abb..29e3244386c9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_HEX_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_HEX_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..f10b05aa223e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_HEX_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..a29875462280 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_HEX_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_HEX_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_HEX_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_HEX_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt index 793d773f707f..ba86fece89df 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in index 487708632660..25426631d6de 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in @@ -28,9 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -constexpr int num_deriv = 9; -constexpr int max_deriv = 10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in index 6a200d58b21d..d314677fd1db 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in @@ -7,22 +7,45 @@ // ***************************************************************************** // @HEADER -/** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_Cn_FEM. +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp index b98955113bde..e392f1540447 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_HEX_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,100 +23,151 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_HEX_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_HEX_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - // for higher orders in certain environments, this test can take a while to run in ctest. So we reduce the number of points as we go beyond 10th order. Also, @mperego is replacing this test, so for now we just restrict to the 10 orders we supported until recently. - for (int order=1;order<10;++order) { - Basis_HGRAD_HEX_Cn_FEM basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_HEX_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 3; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); - // random values between (-1,1) x (-1,1) + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*4; - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); - - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_HEX_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;i 1.0e-9)) { - std::cout << " order = " << order - << " i = " << i << " j = " << j - << " val A = " << outputValuesA_Host(i,j) - << " val B = " << outputValuesB_Host(i,j) - << " diff = " << (outputValuesA_Host(i,j) - outputValuesB_Host(i,j)) - << std::endl; - } + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); } - if (diff/sum > 1.0e-9) { - errorFlag = -1; + } + + *outStream << "Order: " << order << ": Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt index 940d4ad3ebb4..186c0369d09e 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + # MESSAGE(STATUS "Generating TEST HGRAD_LINE_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..69bb74d6a746 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_C1_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..c0d1db740ce9 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + template + int HGRAD_LINE_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_LINE_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + // problem setup + // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. + // A is compuated via serial interface and B is computed with top-level interface. + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt index f26e93eb35f6..47fc663ffd89 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_LINE_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..76e7d225aa79 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_C2_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..7c40e6e00dd1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_LINE_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_LINE_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt index 40be3eb7ddf0..088e2285ac8f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in index 070fba1f3916..b662965e7493 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in @@ -28,10 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..2ff629694b47 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_Cn_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..23dafa935f7e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp @@ -0,0 +1,188 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function.s + template + int HGRAD_LINE_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + for (int order=1;order<=Parameters::MaxOrder;++order) { + using BasisType = Basis_HGRAD_LINE_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt index aac1913c1e91..60b3eaa1ed20 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_PYR_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_PYR_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..ac8e1393df20 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_PYR_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_PYR_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..ae1ba8b9b47d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_PYR_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_PYR_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_PYR_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_PYR_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_PYR_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt index fdbf58124c2e..813910ef9f3d 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,6 +71,79 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + TRIBITS_COPY_FILES_TO_BINARY_DIR(HGRAD_PYR_I2TestDataCopy SOURCE_FILES PYR_I2_D2Vals.dat diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..b1bc686c303d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_PYR_I2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_PYR_I2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp new file mode 100644 index 000000000000..39b7903b384a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_PYR_I2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_PYR_I2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_PYR_I2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_PYR_I2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_PYR_I2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt index ef324d6c681f..593042946b82 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_QUAD_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_QUAD_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..7650cb60968c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_C1_FEM. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_QUAD_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..2dba47d6f022 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_QUAD_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_QUAD_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_QUAD_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_QUAD_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt index 6d92bb337ac4..14d863a19fc7 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_QUAD_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_QUAD_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..3dac2095b0b6 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_C2_FEM. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_QUAD_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..cf2ba0043d7b --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_QUAD_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_QUAD_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + namespace Test { + + template + int HGRAD_QUAD_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_QUAD_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt index 9a6190ea4405..50f38bf22177 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in index 46bd4b13ccfb..0cae06ee9e31 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in @@ -28,9 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -constexpr int num_deriv = 9; -constexpr int max_deriv = 10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in index ccb60ba60798..07adf2c5e888 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in @@ -8,21 +8,44 @@ // @HEADER /** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_Cn_FEM. + \brief Unit test of Intrepid2::Basis_HGRAD_QUAD_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp index 36a858dec901..2e2bdf715470 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_QUAD_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,98 +23,196 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_QUAD_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_QUAD_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - for (int order=1;order basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_QUAD_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 2; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + using ScalarType = typename ScalarTraits::scalar_type; - // random values between (-1,1) x (-1,1) + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*3; - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_QUAD_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt index 37135caa841f..325ab37afd81 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..7fab72655932 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..48d5b3eb9e2f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TET_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt index 2d0041218982..cac75a8505b4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..b01c59418753 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..5b788cc85328 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + namespace Test { + + template + int HGRAD_TET_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt index 09fddb77ac02..3bc181264b2b 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_COMP12_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_COMP12_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..c7a155d2f8a9 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_COMP12_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_COMP12_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp new file mode 100644 index 000000000000..9120a9bf8b53 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_COMP12_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_COMP12_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TET_COMP12_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_COMP12_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_COMP12_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt index cb2c34d9f2e6..b669b1b2ba1f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in index 74f1bccc00db..c997523b3120 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in @@ -28,10 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in index c26586d323cf..a6b3263eb7c0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in @@ -8,21 +8,44 @@ // @HEADER /** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_TET_Cn_FEM. + \brief Unit test of Intrepid2::Basis_HGRAD_TET_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp index 4f6c6c3a33e0..711fd6d35bdb 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_TET_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,99 +23,153 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_TET_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_TET_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 7; try { - for (int order=1;order<10;++order) { - Basis_HGRAD_TET_Cn_FEM basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_TET_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 3; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); - // random values between (-1,1) x (-1,1) + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*(Parameters::MaxOrder+1)*(Parameters::MaxOrder+1); - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); - - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_TET_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1.0e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt index eb726da6bb26..c5f307a89f52 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TRI_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TRI_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1a918203d7cc --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TRI_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TRI_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..928394ded0a4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TRI_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TRI_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TRI_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TRI_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt index 21c4f220d58a..ae831c937e39 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TRI_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TRI_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..cd49ca800b02 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TRI_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TRI_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..60b8c49aa454 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.hpp + \brief Unit tests for the Intrepid2::HGRAD_TRI_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TRI_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TRI_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TRI_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt index 28b96612c334..4855e54c85a1 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in index 513cf7cb37bc..eaaead469fb6 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in @@ -14,7 +14,6 @@ #include "Kokkos_Core.hpp" - #define ETI_SACADO @ETI_SACADO@ #if (ETI_SACADO != 0) /// SACADO #include "Kokkos_ViewFactory.hpp" @@ -29,16 +28,15 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) #endif + #include "test_01.hpp" int main(int argc, char *argv[]) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in index c78997d6ea0c..a5343a485d3a 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in @@ -14,15 +14,38 @@ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp index 80d75c9bf099..a82178b45f9a 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_TRI_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,99 +23,196 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_TRI_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_TRI_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - for (int order=1;order basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_TRI_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 2; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + using ScalarType = typename ScalarTraits::scalar_type; - // random values between (-1,1) x (-1,1) + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*(Parameters::MaxOrder+1); - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_TRI_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt index a32463e45988..4b1a7626d4ff 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_WEDGE_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_WEDGE_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..759d2a05ad2a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_WEDGE_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_WEDGE_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..33f9bb85137d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_WEDGE_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_WEDGE_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_WEDGE_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_WEDGE_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_WEDGE_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt index 759b1bc6bc22..f82d6a4f7fc4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_WEDGE_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_WEDGE_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -83,4 +159,3 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(HGRAD_WEDGE_C2TestDataCopy SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/testdata" DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/testdata" ) - diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..86de2ff60329 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_WEDGE_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_WEDGE_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..35f0e18c3dbe --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp @@ -0,0 +1,182 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_WEDGE_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_WEDGE_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + template + int HGRAD_WEDGE_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_WEDGE_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_WEDGE_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt index ebf2144cc03f..fa03caf7d02b 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_HEX_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_HEX_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_HEX_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..766fea986506 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_HEX_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..115a371bc82a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_HEX_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_HEX_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_HEX_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_HEX_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_HEX_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt index f5caa97dcaf1..24d663415a74 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_LINE_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..a6f42f8ba7b8 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_LINE_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_LINE_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..c71f401eda49 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_LINE_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_LINE_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_LINE_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_LINE_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_LINE_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt index 769157316641..aedb132dcc82 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_QUAD_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_QUAD_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_QUAD_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1c01cd896135 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_QUAD_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..044f8fad53a0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_QUAD_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_QUAD_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_QUAD_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_QUAD_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_QUAD_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt index 0f61f9a7cfff..7dfea7c9986c 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_TET_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_TET_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_TET_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..d15050227457 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_TET_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..d27c666355c1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_TET_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_TET_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_TET_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_TET_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_TET_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt index 700140bb8337..49398919d584 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_TRI_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_TRI_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_TRI_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..4b98bc03263a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_TRI_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..aaefaa951c33 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp @@ -0,0 +1,145 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_TRI_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_TRI_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_TRI_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_TRI_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_TRI_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp b/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp index 5f71f3b5e376..4dcc02da30ce 100644 --- a/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp +++ b/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp @@ -233,7 +233,8 @@ namespace Intrepid2 { outStream->precision(5); - const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; // npUpper: 31 right now + const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; + const ordinal_type npUpperStep1 = 21; // we cover all np values from npLower to npUpperStep1; we only cover every 5th one after that const ValueType tol = 1000.0 * tolerence(); const double lowOrderTol = tol; const double highOrderTol = tol * 100; @@ -268,7 +269,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - for (auto np = npLower; np <= npUpper; ++np){ + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep){ const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); @@ -281,6 +283,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " integral was " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; } @@ -296,7 +299,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - for (auto np = npLower; np <= npUpper; ++np) { + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep) { Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); const double localTol = (np > 20) ? highOrderTol : lowOrderTol; @@ -316,6 +320,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " difference " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; } @@ -331,8 +336,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - - for (auto np = npLower; np <= npUpper; ++np) { + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep) { const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); @@ -353,6 +358,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " difference " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; }