From b1c5bcb83cb5089f8c39293c1d98e5d72d6551f0 Mon Sep 17 00:00:00 2001 From: Theresa Pollinger Date: Wed, 15 Feb 2023 09:37:12 +0100 Subject: [PATCH 1/5] DFG: make general SliceIterator its own class --- src/fullgrid/DistributedFullGrid.hpp | 224 ++++++--------------------- src/fullgrid/SliceIterator.hpp | 144 +++++++++++++++++ 2 files changed, 192 insertions(+), 176 deletions(-) create mode 100644 src/fullgrid/SliceIterator.hpp diff --git a/src/fullgrid/DistributedFullGrid.hpp b/src/fullgrid/DistributedFullGrid.hpp index 481e959ad..883264d26 100644 --- a/src/fullgrid/DistributedFullGrid.hpp +++ b/src/fullgrid/DistributedFullGrid.hpp @@ -8,6 +8,7 @@ #include #include "fullgrid/FullGrid.hpp" +#include "fullgrid/SliceIterator.hpp" #include "mpi/MPICartesianUtils.hpp" #include "mpi/MPISystem.hpp" #include "sparsegrid/DistributedSparseGridUniform.hpp" @@ -18,7 +19,6 @@ #include "utils/Stats.hpp" #include "utils/Types.hpp" - namespace combigrid { /* a regular (equidistant) domain decompositioning for an even number of processes @@ -134,7 +134,6 @@ class DistributedFullGrid { // in contrast to serial implementation we directly create the grid fullgridVector_.resize(nrLocalElements_); - } // explicit DistributedFullGrid(const DistributedFullGrid& other) { @@ -148,182 +147,59 @@ class DistributedFullGrid { // copy construction would need to duplicate communicator_ DistributedFullGrid(const DistributedFullGrid& other) = delete; - DistributedFullGrid& operator=( const DistributedFullGrid & ) = delete; + DistributedFullGrid& operator=(const DistributedFullGrid&) = delete; DistributedFullGrid(DistributedFullGrid&& other) = default; DistributedFullGrid& operator=(DistributedFullGrid&& other) = default; virtual ~DistributedFullGrid() { // MPI_Comm_free(&communicator_); - for (size_t i = 0; i < upwardSubarrays_.size(); ++i) { + for (size_t i = 0; i < upwardSubarrays_.size(); ++i) { MPI_Type_free(&upwardSubarrays_[i]); } - for (size_t i = 0; i < downwardSubarrays_.size(); ++i) { + for (size_t i = 0; i < downwardSubarrays_.size(); ++i) { MPI_Type_free(&downwardSubarrays_[i]); } } - struct SubarrayIterator { - // cf. https://www.internalpointers.com/post/writing-custom-iterators-modern-cpp - using iterator_category = std::forward_iterator_tag; - using difference_type = std::ptrdiff_t; - using value_type = FG_ELEMENT; - using pointer = FG_ELEMENT*; - using reference = FG_ELEMENT&; - - SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid* dfgPointer) : dfgPointer_(dfgPointer){ - // read out the subarray informaiton - MPI_Datatype tmp; - auto dim = dfgPointer_->getDimension(); - std::vector MPITypeIntegers(3 * dim + 2); - auto success = MPI_Type_get_contents(subarrayType, static_cast(MPITypeIntegers.size()), - 0, 1, MPITypeIntegers.data(), nullptr, &tmp); - assert(success == MPI_SUCCESS); - auto sizes = std::vector(MPITypeIntegers.begin() + 1, MPITypeIntegers.begin() + dim + 1); - subsizes_ = std::vector(MPITypeIntegers.begin() + dim + 1, - MPITypeIntegers.begin() + 2 * dim + 1); - starts_ = std::vector(MPITypeIntegers.begin() + 2 * dim + 1, - MPITypeIntegers.begin() + 3 * dim + 1); - currentLocalIndex_ = starts_; - - // check datatype - assert(tmp == dfgPointer_->getMPIDatatype()); - // // check sizes - // std::cout << " sizes " << sizes << " subsizes " << subsizes_ << " starts " << starts_ - // << " end " << endIndex() << std::endl; - for (DimType d = 0; d < dfgPointer_->getDimension() - 1; ++d) { - assert(sizes[d] == static_cast(dfgPointer_->getLocalSizes()[d])); - assert(subsizes_[d] + starts_[d] <= sizes[d]); - } - assert(std::accumulate(sizes.begin(), sizes.end(), 1, - std::multiplies()) == dfgPointer_->getNrLocalElements()); - // check order - assert(MPITypeIntegers.back() == MPI_ORDER_FORTRAN); - assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements()); - } - - SubarrayIterator(const std::vector& subsizes, const std::vector& starts, - DistributedFullGrid* dfgPointer) - : currentLocalIndex_(starts), - subsizes_(subsizes), - starts_(starts), - dfgPointer_(dfgPointer) { - assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements()); - } - - // cheap rule of 5 - SubarrayIterator() = delete; - SubarrayIterator(const SubarrayIterator& other) = delete; - SubarrayIterator& operator=( const SubarrayIterator & ) = delete; - SubarrayIterator(SubarrayIterator&& other) = delete; - SubarrayIterator& operator=(SubarrayIterator&& other) = delete; - - reference operator*() const { - // make sure to only dereference when we actually have the data mapped -#ifndef NDEBUG - if(linearize(currentLocalIndex_) > dfgPointer_->getNrLocalElements()) { - std::cout << " subsizes " << subsizes_ << " starts " << starts_ - << " end " << endIndex() << std::endl; - std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_ - << " linearized " << linearize(currentLocalIndex_) - << " numElements " << dfgPointer_->getNrLocalElements() - << std::endl; - } - assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements()); - assert(linearize(currentLocalIndex_) < endIndex()); -#endif // ndef NDEBUG - return dfgPointer_->getElementVector()[linearize(currentLocalIndex_)]; - } - pointer operator->() const { - return &(dfgPointer_->getElementVector()[linearize(currentLocalIndex_)]); - } - pointer getPointer() const { - return &(dfgPointer_->getElementVector()[linearize(currentLocalIndex_)]); - } - const std::vector& getVecIndex() const { return currentLocalIndex_; } - IndexType getIndex() const { return linearize(currentLocalIndex_); } - const SubarrayIterator& operator++() { - assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements()); -#ifndef NDEBUG - auto cpLocalIdx = currentLocalIndex_; - auto idxbefore = linearize(currentLocalIndex_); -#endif // ndef NDEBUG - // increment - // Fortran ordering - currentLocalIndex_[0] += 1; - for (DimType d = 0; d < dfgPointer_->getDimension() - 1; ++d) { - // wrap around -#ifndef NDEBUG - if(currentLocalIndex_[d] > starts_[d] + subsizes_[d]) { - std::cout << " subsizes " << subsizes_ << " starts " << starts_ - << " end " << endIndex() << " idxbefore " << idxbefore << std::endl; - std::cout << "waah currentLocalIndex_" << currentLocalIndex_ << " before " << cpLocalIdx << std::endl; - } - assert(currentLocalIndex_[d] >= starts_[d]); - assert(!(currentLocalIndex_[d] > starts_[d] + subsizes_[d])); -#endif // ndef NDEBUG - if (currentLocalIndex_[d] == starts_[d] + subsizes_[d]) { - currentLocalIndex_[d] = starts_[d]; - ++currentLocalIndex_[d + 1]; - } - } - assert(idxbefore < linearize(currentLocalIndex_)); - assert(linearize(currentLocalIndex_) <= endIndex()); - return *this; - } - friend bool operator==(const SubarrayIterator& a, const SubarrayIterator& b) { - return a.currentLocalIndex_ == b.currentLocalIndex_; - }; - friend bool operator!=(const SubarrayIterator& a, const SubarrayIterator& b) { - return a.currentLocalIndex_ != b.currentLocalIndex_; - }; - pointer begin() { return &(dfgPointer_->getElementVector()[firstIndex()]); } - pointer end() { return &(dfgPointer_->getElementVector()[endIndex()]); } - bool isAtEnd() { return (linearize(currentLocalIndex_) == endIndex()); } - int size() { - return std::accumulate(subsizes_.begin(), subsizes_.end(), 1, - std::multiplies()); } - - IndexType firstIndex() const { - return linearize(starts_); - } - IndexType endIndex() const { - std::vector endVectorIndex = starts_; - endVectorIndex[dfgPointer_->getDimension() - 1] += subsizes_[dfgPointer_->getDimension() - 1]; - auto linEndIndex = linearize(endVectorIndex); - return linEndIndex; - } - - private: - IndexType linearize(std::vector indexVector) const { - auto offsets = dfgPointer_->getLocalOffsets(); - assert(offsets[0] == 1); - auto dim = dfgPointer_->getDimension(); - IndexType index = 0; - // Fortran ordering - for (DimType d = 0; d < dim; ++d) { - index += offsets[d] * indexVector[d]; - } -// // compare to dfg -// #ifndef NDEBUG -// IndexVector liv; -// liv.assign(indexVector.begin(), indexVector.end()); -// auto li = dfgPointer_->getLocalLinearIndex(liv); -// IndexVector linv(dim); -// dfgPointer_->getLocalVectorIndex(index, linv); -// if (li != index) { -// std::cout << "li " << li << " " << indexVector << " vs " << index << " " << linv << std::endl; -// } -// assert(li == index); -// #endif // ndef NDEBUG - return index; - } - - std::vector currentLocalIndex_; - std::vector subsizes_; - std::vector starts_; - DistributedFullGrid* dfgPointer_; - }; +class SubarrayIterator : public SliceIterator { + public: + SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid& dfg) : SliceIterator() { + this->offsets_ = dfg.getLocalOffsets(); + this->dataPointer_ = &(dfg.getElementVector()); + + // read out the subarray informaiton + MPI_Datatype tmp; + DimType dim = dfg.getDimension(); + std::vector MPITypeIntegers(3 * dim + 2); + auto success = MPI_Type_get_contents(subarrayType, static_cast(MPITypeIntegers.size()), 0, + 1, MPITypeIntegers.data(), nullptr, &tmp); + assert(success == MPI_SUCCESS); + // check datatype + assert(tmp == dfg.getMPIDatatype()); + // check order + assert(MPITypeIntegers.back() == MPI_ORDER_FORTRAN); + + auto sizes = std::vector(MPITypeIntegers.begin() + 1, MPITypeIntegers.begin() + dim + 1); + this->subsizes_ = + std::vector(MPITypeIntegers.begin() + dim + 1, MPITypeIntegers.begin() + 2 * dim + 1); + this->starts_ = std::vector(MPITypeIntegers.begin() + 2 * dim + 1, + MPITypeIntegers.begin() + 3 * dim + 1); + this->currentLocalIndex_ = this->starts_; + + assert(dim == this->getDimension()); + // check that sizes match + for (DimType d = 0; d < this->getDimension() - 1; ++d) { + assert(sizes[d] == static_cast(dfg.getLocalSizes()[d])); + assert(this->subsizes_[d] + this->starts_[d] <= sizes[d]); + } + assert(std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies()) == + this->dataPointer_->size()); + + this->setEndIndex(); + this->validateSizes(); + } +}; FG_ELEMENT evalLocalIndexOn(const IndexVector& localIndex, const std::vector& coords) const { const auto& lowerBounds = this->getLowerBounds(); @@ -1066,11 +942,11 @@ class DistributedFullGrid { std::vector csubsizes(subsizes.begin(), subsizes.end()); std::vector cstarts(starts.begin(), starts.end()); - // create subarray view on data MPI_Datatype mysubarray; MPI_Type_create_subarray(static_cast(this->getDimension()), &csizes[0], &csubsizes[0], - &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); + &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(), + &mysubarray); MPI_Type_commit(&mysubarray); subarrayTypes.push_back(mysubarray); @@ -1641,7 +1517,6 @@ class DistributedFullGrid { recvbufferFromUp.resize(0); } - // TODO asynchronous?? // send lower boundary values auto success = @@ -1733,8 +1608,8 @@ class DistributedFullGrid { // if both lower and higher are set, it's only me and I can just average // not strictly necessary but good to test the iterators if (recvbufferFromUp.size() > 0 && recvbufferFromDown.size() > 0) { - SubarrayIterator downIt(downSubarrays[d], this); - SubarrayIterator upIt(upSubarrays[d], this); + SubarrayIterator downIt(downSubarrays[d], *this); + SubarrayIterator upIt(upSubarrays[d], *this); assert(downIt.size() == upIt.size()); #ifndef NDEBUG auto initialIndexDiff = upIt.getIndex() - downIt.getIndex(); @@ -1754,7 +1629,7 @@ class DistributedFullGrid { assert(upIt.isAtEnd()); assert(ctr == upIt.size()); } else if (recvbufferFromUp.size() > 0) { - SubarrayIterator downIt(downSubarrays[d], this); + SubarrayIterator downIt(downSubarrays[d], *this); auto upIt = recvbufferFromUp.begin(); while (!downIt.isAtEnd()) { auto avg = 0.5 * ((*upIt) + (*downIt)); @@ -1764,7 +1639,7 @@ class DistributedFullGrid { } assert(upIt == recvbufferFromUp.end()); } else if (recvbufferFromDown.size() > 0) { - SubarrayIterator upIt(upSubarrays[d], this); + SubarrayIterator upIt(upSubarrays[d], *this); auto downIt = recvbufferFromDown.begin(); while (!upIt.isAtEnd()) { auto avg = 0.5 * ((*upIt) + (*downIt)); @@ -1784,7 +1659,6 @@ class DistributedFullGrid { } } - /** * @brief check if given globalLinearIndex is on the boundary of this DistributedFullGrid */ @@ -2108,8 +1982,6 @@ class DistributedFullGrid { } } } - - }; // end class diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp new file mode 100644 index 000000000..86a8092a3 --- /dev/null +++ b/src/fullgrid/SliceIterator.hpp @@ -0,0 +1,144 @@ +#pragma once +#include + +#include "utils/IndexVector.hpp" +#include "utils/Types.hpp" + +namespace combigrid { +template +class SliceIterator { + public: + // cf. https://www.internalpointers.com/post/writing-custom-iterators-modern-cpp + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = FG_ELEMENT; + using pointer = FG_ELEMENT*; + using reference = FG_ELEMENT&; + + SliceIterator(const std::vector& subsizes, const std::vector& starts, + const IndexVector& offsets, std::vector* dataPointer) + : currentLocalIndex_(starts), + subsizes_(subsizes), + starts_(starts), + offsets_(offsets), + dataPointer_(dataPointer) { + this->setEndIndex(); + this->validateSizes(); + } + + // cheap rule of 5 + explicit SliceIterator() = default; + SliceIterator(const SliceIterator& other) = delete; + SliceIterator& operator=(const SliceIterator&) = delete; + SliceIterator(SliceIterator&& other) = delete; + SliceIterator& operator=(SliceIterator&& other) = delete; + + reference operator*() const { +#ifndef NDEBUG + // make sure to only dereference when we actually have the data mapped + if (linearize(currentLocalIndex_) > dataPointer_->size()) { + std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex() + << std::endl; + std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_ << " linearized " + << linearize(currentLocalIndex_) << " numElements " << dataPointer_->size() + << std::endl; + } + assert(linearize(currentLocalIndex_) <= dataPointer_->size()); + assert(linearize(currentLocalIndex_) < endIndex()); +#endif // ndef NDEBUG + return (*dataPointer_)[linearize(currentLocalIndex_)]; + } + + pointer operator->() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); } + pointer getPointer() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); } + + const std::vector& getVecIndex() const { return currentLocalIndex_; } + IndexType getIndex() const { return linearize(currentLocalIndex_); } + + const SliceIterator& operator++() { +#ifndef NDEBUG + assert(linearize(currentLocalIndex_) <= dataPointer_->size()); + auto cpLocalIdx = currentLocalIndex_; + auto idxbefore = linearize(currentLocalIndex_); +#endif // ndef NDEBUG + // increment + // Fortran ordering + currentLocalIndex_[0] += 1; + for (DimType d = 0; d < this->getDimension() - 1; ++d) { + // wrap around +#ifndef NDEBUG + if (currentLocalIndex_[d] > starts_[d] + subsizes_[d]) { + std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex() + << " idxbefore " << idxbefore << std::endl; + std::cout << "waah currentLocalIndex_" << currentLocalIndex_ << " before " << cpLocalIdx + << std::endl; + } + assert(currentLocalIndex_[d] >= starts_[d]); + assert(!(currentLocalIndex_[d] > starts_[d] + subsizes_[d])); +#endif // ndef NDEBUG + // expect that it is more likely that we do not have to wrap around + // if (__builtin_expect((currentLocalIndex_[d] == starts_[d] + subsizes_[d]), false)) { + // (not sure if this is always the case) + if (currentLocalIndex_[d] == starts_[d] + subsizes_[d]) { + currentLocalIndex_[d] = starts_[d]; + ++currentLocalIndex_[d + 1]; + } + } + assert(idxbefore < linearize(currentLocalIndex_)); + assert(linearize(currentLocalIndex_) <= endIndex()); + return *this; + } + friend bool operator==(const SliceIterator& a, const SliceIterator& b) { + return a.currentLocalIndex_ == b.currentLocalIndex_; + }; + friend bool operator!=(const SliceIterator& a, const SliceIterator& b) { + return a.currentLocalIndex_ != b.currentLocalIndex_; + }; + pointer begin() { return &((*dataPointer_)[firstIndex()]); } + pointer end() { return &((*dataPointer_)[endIndex()]); } + bool isAtEnd() const { return (linearize(currentLocalIndex_) == endIndex()); } + int size() const { + return std::accumulate(subsizes_.begin(), subsizes_.end(), 1, std::multiplies()); + } + + IndexType firstIndex() const { return linearize(starts_); } + void setEndIndex() { + std::vector endVectorIndex = this->starts_; + endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1]; + linEndIndex_ = linearize(endVectorIndex); + } + IndexType endIndex() const { + assert(linEndIndex_ != -1); + return linEndIndex_; + } + + protected: + inline DimType getDimension() const { return static_cast(subsizes_.size()); } + + inline IndexType linearize(const std::vector& indexVector) const { + IndexType index = 0; + // Fortran ordering + for (DimType d = 0; d < subsizes_.size(); ++d) { + index += offsets_[d] * indexVector[d]; + } + return index; + } + + void validateSizes() { + assert(offsets_[0] == 1); + assert(std::accumulate(this->subsizes_.begin(), this->subsizes_.end(), 1, + std::multiplies()) <= this->dataPointer_->size()); + assert(linearize(this->currentLocalIndex_) <= this->dataPointer_->size()); + assert(currentLocalIndex_.size() == this->getDimension()); + assert(subsizes_.size() == this->getDimension()); + assert(starts_.size() == this->getDimension()); + } + + std::vector currentLocalIndex_; + std::vector subsizes_; + std::vector starts_; + IndexVector offsets_; + std::vector* dataPointer_; + IndexType linearEndIndex_ = -1; +}; +} // namespace combigrid \ No newline at end of file From e4730d3fb18b86cccfb6bb93b97c92919c7efa7c Mon Sep 17 00:00:00 2001 From: Theresa Pollinger Date: Wed, 15 Feb 2023 12:50:16 +0100 Subject: [PATCH 2/5] SliceIterator: fix typo (amends last commit) --- src/fullgrid/SliceIterator.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp index 86a8092a3..9eb869c5d 100644 --- a/src/fullgrid/SliceIterator.hpp +++ b/src/fullgrid/SliceIterator.hpp @@ -105,11 +105,11 @@ class SliceIterator { void setEndIndex() { std::vector endVectorIndex = this->starts_; endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1]; - linEndIndex_ = linearize(endVectorIndex); + linearEndIndex_ = linearize(endVectorIndex); } IndexType endIndex() const { - assert(linEndIndex_ != -1); - return linEndIndex_; + assert(linearEndIndex_ != -1); + return linearEndIndex_; } protected: From 989e3f08d5a64851bca58790c706da0171f59a63 Mon Sep 17 00:00:00 2001 From: Theresa Pollinger Date: Wed, 15 Feb 2023 13:59:56 +0100 Subject: [PATCH 3/5] SliceIterator: fix typo with -> (amends last commit) --- src/fullgrid/SliceIterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp index 9eb869c5d..30c57f0e1 100644 --- a/src/fullgrid/SliceIterator.hpp +++ b/src/fullgrid/SliceIterator.hpp @@ -104,7 +104,7 @@ class SliceIterator { IndexType firstIndex() const { return linearize(starts_); } void setEndIndex() { std::vector endVectorIndex = this->starts_; - endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1]; + endVectorIndex[this->getDimension() - 1] += this->subsizes_[this->getDimension() - 1]; linearEndIndex_ = linearize(endVectorIndex); } IndexType endIndex() const { From 06c601b3728d7d292d947660a007c6d8041c3066 Mon Sep 17 00:00:00 2001 From: Theresa Pollinger Date: Thu, 16 Feb 2023 09:35:03 +0100 Subject: [PATCH 4/5] advection example: task constructor w/o dim --- examples/distributed_advection/TaskAdvection.hpp | 4 ++-- examples/distributed_advection/combi_example.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/distributed_advection/TaskAdvection.hpp b/examples/distributed_advection/TaskAdvection.hpp index 0fcea21ad..68932b963 100644 --- a/examples/distributed_advection/TaskAdvection.hpp +++ b/examples/distributed_advection/TaskAdvection.hpp @@ -30,8 +30,8 @@ class TaskAdvection : public Task { /* if the constructor of the base task class is not sufficient we can provide an * own implementation. here, we add dt, nsteps, and p as a new parameters. */ - TaskAdvection(DimType dim, const LevelVector& l, const std::vector& boundary, - real coeff, LoadModel* loadModel, real dt, size_t nsteps, + TaskAdvection(const LevelVector& l, const std::vector& boundary, real coeff, + LoadModel* loadModel, real dt, size_t nsteps, std::vector p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), diff --git a/examples/distributed_advection/combi_example.cpp b/examples/distributed_advection/combi_example.cpp index 2b0965a0d..32265a59f 100644 --- a/examples/distributed_advection/combi_example.cpp +++ b/examples/distributed_advection/combi_example.cpp @@ -178,7 +178,7 @@ int main(int argc, char** argv) { TaskContainer tasks; std::vector taskIDs; for (size_t i = 0; i < levels.size(); i++) { - Task* t = new TaskAdvection(dim, levels[i], boundary, coeffs[i], loadmodel.get(), dt, + Task* t = new TaskAdvection(levels[i], boundary, coeffs[i], loadmodel.get(), dt, nsteps, p); static_assert(!isGENE, "isGENE"); From c8645e5c8335712bb76326046ef3971c20cefc27 Mon Sep 17 00:00:00 2001 From: Theresa Pollinger Date: Thu, 16 Feb 2023 09:39:47 +0100 Subject: [PATCH 5/5] dfg: use boost tensor as data storage * this compiles but the tests fail; needs debugging --- examples/combi_example/TaskExample.hpp | 4 +- examples/combi_example_faults/TaskExample.hpp | 4 +- .../distributed_advection/TaskAdvection.hpp | 34 ++--- .../distributed_third_level/TaskAdvection.hpp | 37 +++-- .../distributed_third_level/TaskConst.hpp | 4 +- .../selalib_distributed/src/SelalibTask.hpp | 2 +- src/fullgrid/DistributedFullGrid.hpp | 144 +++++++++++------- src/fullgrid/MultiArray.hpp | 2 +- src/fullgrid/SliceIterator.hpp | 32 ++-- .../DistributedHierarchization.hpp | 20 +-- tests/TaskConst.hpp | 4 +- tests/TaskCount.hpp | 2 +- tests/test_distributedfullgrid.cpp | 5 +- tests/test_ftolerance.cpp | 6 +- tests/test_hierarchization.cpp | 12 +- tests/test_reduce.cpp | 4 +- tests/test_rescheduling.cpp | 6 +- 17 files changed, 184 insertions(+), 138 deletions(-) diff --git a/examples/combi_example/TaskExample.hpp b/examples/combi_example/TaskExample.hpp index d42d7c245..7aa50b5f8 100644 --- a/examples/combi_example/TaskExample.hpp +++ b/examples/combi_example/TaskExample.hpp @@ -87,7 +87,7 @@ class TaskExample : public Task { dfg_ = new DistributedFullGrid(dim, l, lcomm, this->getBoundary(), p); /* loop over local subgrid and set initial values */ - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (size_t i = 0; i < elements.size(); ++i) { IndexType globalLinearIndex = dfg_->getGlobalLinearIndex(i); @@ -109,7 +109,7 @@ class TaskExample : public Task { int lrank = theMPISystem()->getLocalRank(); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); // TODO if your Example uses another data structure, you need to copy // the data from elements to that data structure diff --git a/examples/combi_example_faults/TaskExample.hpp b/examples/combi_example_faults/TaskExample.hpp index a1d20ddf8..0e3b706fb 100644 --- a/examples/combi_example_faults/TaskExample.hpp +++ b/examples/combi_example_faults/TaskExample.hpp @@ -91,7 +91,7 @@ class TaskExample: public Task { dfg_ = new DistributedFullGrid(dim, l, lcomm, this->getBoundary(), p); /* loop over local subgrid and set initial values */ - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (size_t i = 0; i < elements.size(); ++i) { IndexType globalLinearIndex = dfg_->getGlobalLinearIndex(i); @@ -115,7 +115,7 @@ class TaskExample: public Task { /* pseudo timestepping to demonstrate the behaviour of your typical * time-dependent simulation problem. */ - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (size_t step = stepsTotal_; step < stepsTotal_ + nsteps_; ++step) { real time = (step + 1)* dt_; diff --git a/examples/distributed_advection/TaskAdvection.hpp b/examples/distributed_advection/TaskAdvection.hpp index 68932b963..b692279ac 100644 --- a/examples/distributed_advection/TaskAdvection.hpp +++ b/examples/distributed_advection/TaskAdvection.hpp @@ -1,8 +1,9 @@ #pragma once +#include + #include "fullgrid/DistributedFullGrid.hpp" #include "task/Task.hpp" - namespace combigrid { // exact solution @@ -21,8 +22,9 @@ class TestFn { // leave out normalization, such that maximum is always 1 return std::exp(exponent); // / std::sqrt( std::pow(2*pi*sigmaSquared, coords.size())); } + private: - static constexpr double sigmaSquaredInv_ = 1. / ((1./3.)*(1./3.)); + static constexpr double sigmaSquaredInv_ = 1. / ((1. / 3.) * (1. / 3.)); }; class TaskAdvection : public Task { @@ -59,7 +61,7 @@ class TaskAdvection : public Task { dfg_ = new DistributedFullGrid(dim, l, lcomm, this->getBoundary(), p_, false, decomposition); if (phi_ == nullptr) { - phi_ = new std::vector(dfg_->getNrLocalElements()); + phi_ = new DistributedFullGrid::TensorType(dfg_->getLocalExtents()); } std::vector h = dfg_->getGridSpacing(); @@ -95,8 +97,7 @@ class TaskAdvection : public Task { std::vector h = dfg_->getGridSpacing(); const auto& fullOffsets = dfg_->getLocalOffsets(); - phi_->resize(dfg_->getNrLocalElements()); - std::fill(phi_->begin(), phi_->end(), 0.); + phi_->reshape(dfg_->getLocalExtents(), 0.); for (size_t i = 0; i < nsteps_; ++i) { // compute the gradient in the original dfg_, then update into phi_ and @@ -122,7 +123,7 @@ class TaskAdvection : public Task { dfg_->getLocalVectorIndex(li, locAxisIndex); // TODO can be unrolled into ghost and other part, avoiding if-statement CombiDataType phi_neighbor = 0.; - if (locAxisIndex[d] == 0){ + if (locAxisIndex[d] == 0) { assert(phi_ghost.size() > 0); // then use values from boundary exchange IndexType gli = 0; @@ -132,10 +133,7 @@ class TaskAdvection : public Task { assert(gli > -1); assert(gli < phi_ghost.size()); phi_neighbor = phi_ghost[gli]; - } else{ - // --locAxisIndex[d]; - // IndexType lni = dfg_->getLocalLinearIndex(locAxisIndex); - // assert(lni == (li - fullOffsets[d])); + } else { phi_neighbor = dfg_->getElementVector()[li - fullOffsets[d]]; } // calculate gradient of phi with backward differential quotient @@ -144,10 +142,11 @@ class TaskAdvection : public Task { u_dot_dphi[li] += velocity[d] * dphi; } } + // update all values in phi_ for (IndexType li = 0; li < dfg_->getNrLocalElements(); ++li) { (*phi_)[li] = dfg_->getElementVector()[li] - u_dot_dphi[li] * dt_; } - phi_->swap(dfg_->getElementVector()); + std::swap(*phi_, dfg_->getElementVector()); } stepsTotal_ += nsteps_; @@ -161,7 +160,8 @@ class TaskAdvection : public Task { * solution on one process and then converting it to the full grid representation. * the DistributedFullGrid class offers a convenient function to do this. */ - void getFullGrid(FullGrid& fg, RankType r, CommunicatorType lcomm, int n = 0) override { + void getFullGrid(FullGrid& fg, RankType r, CommunicatorType lcomm, + int n = 0) override { assert(fg.getLevels() == dfg_->getLevels()); dfg_->gatherFullGrid(fg, r); @@ -181,15 +181,13 @@ class TaskAdvection : public Task { phi_ = nullptr; } - real getCurrentTime() const override { - return stepsTotal_ * dt_; - } + real getCurrentTime() const override { return stepsTotal_ * dt_; } CombiDataType analyticalSolution(const std::vector& coords, int n = 0) const override { assert(n == 0); auto coordsCopy = coords; TestFn f; - return f(coordsCopy, stepsTotal_*dt_); + return f(coordsCopy, stepsTotal_ * dt_); } protected: @@ -212,7 +210,7 @@ class TaskAdvection : public Task { bool initialized_; size_t stepsTotal_; DistributedFullGrid* dfg_; - static std::vector* phi_; + static typename DistributedFullGrid::TensorType* phi_; /** * The serialize function has to be extended by the new member variables. @@ -234,6 +232,6 @@ class TaskAdvection : public Task { } }; -std::vector* TaskAdvection::phi_ = nullptr; +typename DistributedFullGrid::TensorType* TaskAdvection::phi_ = nullptr; } // namespace combigrid diff --git a/examples/distributed_third_level/TaskAdvection.hpp b/examples/distributed_third_level/TaskAdvection.hpp index fb0d1ded9..5ee3f9e42 100644 --- a/examples/distributed_third_level/TaskAdvection.hpp +++ b/examples/distributed_third_level/TaskAdvection.hpp @@ -1,8 +1,8 @@ #pragma once + #include "fullgrid/DistributedFullGrid.hpp" #include "task/Task.hpp" - namespace combigrid { // exact solution @@ -21,8 +21,9 @@ class TestFn { // leave out normalization, such that maximum is always 1 return std::exp(exponent); // / std::sqrt( std::pow(2*pi*sigmaSquared, coords.size())); } + private: - static constexpr double sigmaSquaredInv_ = 1. / ((1./3.)*(1./3.)); + static constexpr double sigmaSquaredInv_ = 1. / ((1. / 3.) * (1. / 3.)); }; class TaskAdvection : public Task { @@ -30,8 +31,8 @@ class TaskAdvection : public Task { /* if the constructor of the base task class is not sufficient we can provide an * own implementation. here, we add dt, nsteps, and p as a new parameters. */ - TaskAdvection(const LevelVector& l, const std::vector& boundary, - real coeff, LoadModel* loadModel, real dt, size_t nsteps, + TaskAdvection(const LevelVector& l, const std::vector& boundary, real coeff, + LoadModel* loadModel, real dt, size_t nsteps, std::vector p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), @@ -59,7 +60,7 @@ class TaskAdvection : public Task { dfg_ = new DistributedFullGrid(dim, l, lcomm, this->getBoundary(), p_, false, decomposition); if (phi_ == nullptr) { - phi_ = new std::vector(dfg_->getNrLocalElements()); + phi_ = new DistributedFullGrid::TensorType(dfg_->getLocalExtents()); } std::vector h = dfg_->getGridSpacing(); @@ -95,8 +96,7 @@ class TaskAdvection : public Task { std::vector h = dfg_->getGridSpacing(); const auto& fullOffsets = dfg_->getLocalOffsets(); - phi_->resize(dfg_->getNrLocalElements()); - std::fill(phi_->begin(), phi_->end(), 0.); + phi_->reshape(dfg_->getLocalExtents(), 0.); for (size_t i = 0; i < nsteps_; ++i) { // compute the gradient in the original dfg_, then update into phi_ and @@ -122,7 +122,7 @@ class TaskAdvection : public Task { dfg_->getLocalVectorIndex(li, locAxisIndex); // TODO can be unrolled into ghost and other part, avoiding if-statement CombiDataType phi_neighbor = 0.; - if (locAxisIndex[d] == 0){ + if (locAxisIndex[d] == 0) { assert(phi_ghost.size() > 0); // then use values from boundary exchange IndexType gli = 0; @@ -132,10 +132,7 @@ class TaskAdvection : public Task { assert(gli > -1); assert(gli < phi_ghost.size()); phi_neighbor = phi_ghost[gli]; - } else{ - // --locAxisIndex[d]; - // IndexType lni = dfg_->getLocalLinearIndex(locAxisIndex); - // assert(lni == (li - fullOffsets[d])); + } else { phi_neighbor = dfg_->getElementVector()[li - fullOffsets[d]]; } // calculate gradient of phi with backward differential quotient @@ -144,10 +141,11 @@ class TaskAdvection : public Task { u_dot_dphi[li] += velocity[d] * dphi; } } + // update all values in phi_ for (IndexType li = 0; li < dfg_->getNrLocalElements(); ++li) { (*phi_)[li] = dfg_->getElementVector()[li] - u_dot_dphi[li] * dt_; } - phi_->swap(dfg_->getElementVector()); + std::swap(*phi_, dfg_->getElementVector()); } stepsTotal_ += nsteps_; @@ -161,7 +159,8 @@ class TaskAdvection : public Task { * solution on one process and then converting it to the full grid representation. * the DistributedFullGrid class offers a convenient function to do this. */ - void getFullGrid(FullGrid& fg, RankType r, CommunicatorType lcomm, int n = 0) override { + void getFullGrid(FullGrid& fg, RankType r, CommunicatorType lcomm, + int n = 0) override { assert(fg.getLevels() == dfg_->getLevels()); dfg_->gatherFullGrid(fg, r); @@ -181,15 +180,13 @@ class TaskAdvection : public Task { phi_ = nullptr; } - real getCurrentTime() const override { - return stepsTotal_ * dt_; - } + real getCurrentTime() const override { return stepsTotal_ * dt_; } CombiDataType analyticalSolution(const std::vector& coords, int n = 0) const override { assert(n == 0); auto coordsCopy = coords; TestFn f; - return f(coordsCopy, stepsTotal_*dt_); + return f(coordsCopy, stepsTotal_ * dt_); } protected: @@ -212,7 +209,7 @@ class TaskAdvection : public Task { bool initialized_; size_t stepsTotal_; DistributedFullGrid* dfg_; - static std::vector* phi_; + static typename DistributedFullGrid::TensorType* phi_; /** * The serialize function has to be extended by the new member variables. @@ -234,6 +231,6 @@ class TaskAdvection : public Task { } }; -std::vector* TaskAdvection::phi_ = nullptr; +typename DistributedFullGrid::TensorType* TaskAdvection::phi_ = nullptr; } // namespace combigrid diff --git a/examples/distributed_third_level/TaskConst.hpp b/examples/distributed_third_level/TaskConst.hpp index 7e04b676e..9f874ccc8 100644 --- a/examples/distributed_third_level/TaskConst.hpp +++ b/examples/distributed_third_level/TaskConst.hpp @@ -38,7 +38,7 @@ class TaskConst : public combigrid::Task { dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = 10; } @@ -49,7 +49,7 @@ class TaskConst : public combigrid::Task { // std::cout << "run " << getCommRank(lcomm) << std::endl; - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = getLevelVector()[0] / (double)getLevelVector()[1]; // std::cout << "e " << element << std::endl; diff --git a/examples/selalib_distributed/src/SelalibTask.hpp b/examples/selalib_distributed/src/SelalibTask.hpp index f65968908..8fff8e6cb 100644 --- a/examples/selalib_distributed/src/SelalibTask.hpp +++ b/examples/selalib_distributed/src/SelalibTask.hpp @@ -299,7 +299,7 @@ class SelalibTask : public combigrid::Task { void setDFGfromLocalDistribution() { #ifndef NDEBUG - auto& localDFGSize = dfg_->getLocalSizes(); + auto& localDFGSize = dfg_->getLocalExtents(); assert(dim_ == 6); // std::cout << localDFGSize << " " << std::vector(localSize_.begin(), localSize_.end()) << // std::endl; diff --git a/src/fullgrid/DistributedFullGrid.hpp b/src/fullgrid/DistributedFullGrid.hpp index 883264d26..fdb406593 100644 --- a/src/fullgrid/DistributedFullGrid.hpp +++ b/src/fullgrid/DistributedFullGrid.hpp @@ -7,6 +7,8 @@ #include #include +#include + #include "fullgrid/FullGrid.hpp" #include "fullgrid/SliceIterator.hpp" #include "mpi/MPICartesianUtils.hpp" @@ -81,6 +83,11 @@ static inline std::vector getDefaultDecomposition( template class DistributedFullGrid { public: + // cf. https://en.wikipedia.org/wiki/Row-_and_column-major_order#Address_calculation_in_general + // -> column-major order + using TensorType = boost::numeric::ublas::tensor>; + /** dimension adaptive Ctor */ DistributedFullGrid(DimType dim, const LevelVector& levels, CommunicatorType comm, const std::vector& hasBdrPoints, const std::vector& procs, @@ -94,7 +101,7 @@ class DistributedFullGrid { InitMPI(comm, procs); // will also check grids per dim - // set global num of elements and offsets + // set global num of elements and other properties, for the whole grid nrElements_ = 1; offsets_.resize(dim_); nrPoints_.resize(dim_); @@ -117,23 +124,52 @@ class DistributedFullGrid { } else { setDecomposition(decomposition); } - myPartitionsLowerBounds_ = getLowerBounds(rank_); - myPartitionsUpperBounds_ = getUpperBounds(rank_); - // set local elements and local offsets - nrLocalPoints_ = getUpperBounds() - getLowerBounds(); + // set local elements and properties, for our partition of the grid only + myPartitionsLowerBounds_ = getLowerBounds(rank_); + myPartitionsUpperBounds_ = getUpperBounds(rank_); + auto localExtents = myPartitionsUpperBounds_ - myPartitionsLowerBounds_; + std::vector nrLocalPoints(localExtents.begin(), localExtents.end()); nrLocalElements_ = 1; localOffsets_.resize(dim); - // cf. https://en.wikipedia.org/wiki/Row-_and_column-major_order#Address_calculation_in_general - // -> column-major order for (DimType j = 0; j < dim_; ++j) { localOffsets_[j] = nrLocalElements_; - nrLocalElements_ *= nrLocalPoints_[j]; + nrLocalElements_ *= nrLocalPoints[j]; + } + // see if we can have templated dimensions, but selected at runtime + if (dim_ == 1) { + fullgridTensor_ = TensorType{nrLocalPoints[0]}; + } else if (dim_ == 2) { + fullgridTensor_ = TensorType{nrLocalPoints[0], + nrLocalPoints[1]}; + } else if (dim_ == 3) { + fullgridTensor_ = TensorType{nrLocalPoints[0], + nrLocalPoints[1], + nrLocalPoints[2]}; + } else if (dim_ == 4) { + fullgridTensor_ = TensorType{nrLocalPoints[0], + nrLocalPoints[1], + nrLocalPoints[2], + nrLocalPoints[3]}; + } else if (dim_ == 5) { + fullgridTensor_ = TensorType{nrLocalPoints[0], + nrLocalPoints[1], + nrLocalPoints[2], + nrLocalPoints[3], + nrLocalPoints[4]}; + } else if (dim_ == 6) { + fullgridTensor_ = TensorType{nrLocalPoints[0], + nrLocalPoints[1], + nrLocalPoints[2], + nrLocalPoints[3], + nrLocalPoints[4], + nrLocalPoints[5]}; + } else { + throw std::runtime_error("DistributedFullGrid: dimension not (yet) supported, plz add"); } - // in contrast to serial implementation we directly create the grid - fullgridVector_.resize(nrLocalElements_); + assert(fullgridTensor_.size() == nrLocalElements_); } // explicit DistributedFullGrid(const DistributedFullGrid& other) { @@ -145,7 +181,7 @@ class DistributedFullGrid { // } // } - // copy construction would need to duplicate communicator_ + // copy construction would need to duplicate communicator_, so we delete it DistributedFullGrid(const DistributedFullGrid& other) = delete; DistributedFullGrid& operator=(const DistributedFullGrid&) = delete; DistributedFullGrid(DistributedFullGrid&& other) = default; @@ -166,7 +202,8 @@ class SubarrayIterator : public SliceIterator { public: SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid& dfg) : SliceIterator() { this->offsets_ = dfg.getLocalOffsets(); - this->dataPointer_ = &(dfg.getElementVector()); + this->dataPointer_ = dfg.getElementVector().data(); + this->dataSize_ = dfg.getElementVector().size(); // read out the subarray informaiton MPI_Datatype tmp; @@ -190,11 +227,11 @@ class SubarrayIterator : public SliceIterator { assert(dim == this->getDimension()); // check that sizes match for (DimType d = 0; d < this->getDimension() - 1; ++d) { - assert(sizes[d] == static_cast(dfg.getLocalSizes()[d])); + assert(sizes[d] == static_cast(dfg.getLocalExtents()[d])); assert(this->subsizes_[d] + this->starts_[d] <= sizes[d]); } assert(std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies()) == - this->dataPointer_->size()); + this->dataSize_); this->setEndIndex(); this->validateSizes(); @@ -252,7 +289,7 @@ class SubarrayIterator : public SliceIterator { return evalLocalIndexOn(localIndex, coords); } else { FG_ELEMENT sum = 0.; - auto lastIndexInDim = this->getLocalSizes()[dim] - 1; + auto lastIndexInDim = this->getLocalExtents()[dim] - 1; if (localIndex[dim] >= 0 && localIndex[dim] <= lastIndexInDim) { sum += evalMultiindexRecursively(localIndex, static_cast(dim + 1), coords); } else if (localIndex[dim] > lastIndexInDim && @@ -330,7 +367,7 @@ class SubarrayIterator : public SliceIterator { this->getCartesianUtils().isOnLowerBoundaryInDimension(d)) { // if we have periodic boundary and this process is at the lower end of the dimension d // we need the periodic coordinate => do nothing - } else if (localIndexLowerNonzeroNeighborPoint[d] > this->getLocalSizes()[d] - 1) { + } else if (localIndexLowerNonzeroNeighborPoint[d] > this->getLocalExtents()[d] - 1) { // index too high value = 0.; return; @@ -579,10 +616,10 @@ class SubarrayIterator : public SliceIterator { /** returns the dimension of the full grid */ inline DimType getDimension() const { return dim_; } - /** the getters for the full grid vector */ - inline std::vector& getElementVector() { return fullgridVector_; } + /** the getters for the full grid tensor */ + inline TensorType& getElementVector() { return fullgridTensor_; } - inline const std::vector& getElementVector() const { return fullgridVector_; } + inline const TensorType& getElementVector() const { return fullgridTensor_; } /** return the offset in the full grid vector of the dimension */ inline IndexType getOffset(DimType i) const { return offsets_[i]; } @@ -620,9 +657,9 @@ class SubarrayIterator : public SliceIterator { std::fill(this->getElementVector().begin(), this->getElementVector().end(), 0.); } - inline FG_ELEMENT* getData() { return &fullgridVector_[0]; } + inline FG_ELEMENT* getData() { return &fullgridTensor_[0]; } - inline const FG_ELEMENT* getData() const { return &fullgridVector_[0]; } + inline const FG_ELEMENT* getData() const { return &fullgridTensor_[0]; } /** MPI Communicator*/ inline CommunicatorType getCommunicator() const { return communicator_; } @@ -904,7 +941,11 @@ class SubarrayIterator : public SliceIterator { } // return extents of local grid - inline const IndexVector& getLocalSizes() const { return nrLocalPoints_; } + inline IndexVector getLocalSizes() const { + // throw std::runtime_error("getLocalSizes() may be horribly slow if used often, use + // getLocalExtents() instead"); + return IndexVector(getLocalExtents().begin(), getLocalExtents().end()); + } // return extents of global grid inline const IndexVector& getGlobalSizes() const { return nrPoints_; } @@ -1037,7 +1078,7 @@ class SubarrayIterator : public SliceIterator { auto sPointer = dsg.getData(sIndex); subspaceIndices = getFGPointsOfSubspace(level); for (const auto& fIndex : subspaceIndices) { - fullgridVector_[fIndex] = *sPointer; + fullgridTensor_[fIndex] = *sPointer; ++sPointer; } } @@ -1092,9 +1133,9 @@ class SubarrayIterator : public SliceIterator { inline IndexType getNumPointsOnThisPartition(DimType d, IndexType localStart, IndexType strideForThisLevel) const { assert(d < this->getDimension()); - return (nrLocalPoints_[d] - 1 < localStart) + return (getLocalExtents()[d] - 1 < localStart) ? 0 - : (nrLocalPoints_[d] - 1 - localStart) / strideForThisLevel + 1; + : (getLocalExtents()[d] - 1 - localStart) / strideForThisLevel + 1; } inline IndexType getNumPointsOnThisPartition(LevelType l, DimType d) const { @@ -1185,7 +1226,7 @@ class SubarrayIterator : public SliceIterator { inline real normalizelp(int p) { real norm = getLpNorm(p); - std::vector& data = getElementVector(); + auto& data = getElementVector(); for (size_t i = 0; i < data.size(); ++i) data[i] = data[i] / norm; return norm; @@ -1193,7 +1234,7 @@ class SubarrayIterator : public SliceIterator { // multiply with a constant inline void mul(real c) { - std::vector& data = getElementVector(); + auto& data = getElementVector(); for (size_t i = 0; i < data.size(); ++i) data[i] *= c; } @@ -1354,14 +1395,14 @@ class SubarrayIterator : public SliceIterator { // set lower bounds of subarray IndexVector subarrayLowerBounds = this->getLowerBounds(); IndexVector subarrayUpperBounds = this->getUpperBounds(); - subarrayLowerBounds[d] += this->getLocalSizes()[d] - 1; + subarrayLowerBounds[d] += this->getLocalExtents()[d] - 1; auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; assert(subarrayExtents[d] == 1); auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds(); // create MPI datatype - std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); + std::vector sizes(this->getLocalExtents().begin(), this->getLocalExtents().end()); std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); // the starts are local indices std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); @@ -1390,7 +1431,7 @@ class SubarrayIterator : public SliceIterator { // set upper bounds of subarray IndexVector subarrayLowerBounds = this->getLowerBounds(); IndexVector subarrayUpperBounds = this->getUpperBounds(); - subarrayUpperBounds[d] -= this->getLocalSizes()[d] - 1; + subarrayUpperBounds[d] -= this->getLocalExtents()[d] - 1; auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; assert(subarrayExtents[d] == 1); @@ -1398,7 +1439,7 @@ class SubarrayIterator : public SliceIterator { // create MPI datatype // also, the data dimensions are reversed - std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); + std::vector sizes(this->getLocalExtents().begin(), this->getLocalExtents().end()); std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); // the starts are local indices std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); @@ -1573,7 +1614,7 @@ class SubarrayIterator : public SliceIterator { // create recvbuffer auto numElements = std::accumulate(subarrayExtents.begin(), subarrayExtents.end(), 1, - std::multiplies()); + std::multiplies<>()); if (lower < 0) { numElements = 0; subarrayExtents = IndexVector(this->getDimension(), 0); @@ -1755,8 +1796,10 @@ class SubarrayIterator : public SliceIterator { return values; } - const MPICartesianUtils& getCartesianUtils() const { - return cartesianUtils_; + const MPICartesianUtils& getCartesianUtils() const { return cartesianUtils_; } + + const typename TensorType::extents_type& getLocalExtents() const { + return fullgridTensor_.extents(); } private: @@ -1794,17 +1837,17 @@ class SubarrayIterator : public SliceIterator { IndexVector myPartitionsUpperBounds_; /** the full grid vector, this contains the elements of the full grid */ - std::vector fullgridVector_; + TensorType fullgridTensor_; /** Variables for the distributed Full Grid*/ /** Cartesien MPI Communicator */ CommunicatorType communicator_; - /** - * the decomposition of the full grid over processors - * contains (for every dimension) the grid point indices at - * which a process boundary is assumed - */ + /** + * the decomposition of the full grid over processors + * contains (for every dimension) the grid point indices at + * which a process boundary is assumed + */ std::vector decomposition_; /** utility to get info about cartesian communicator */ @@ -1820,9 +1863,6 @@ class SubarrayIterator : public SliceIterator { std::vector downwardSubarrays_; std::vector upwardSubarrays_; - /** number of local (in this grid cell) points per axis*/ - IndexVector nrLocalPoints_; - /** * @brief sets the MPI-related members rank_, size_, communicator_, and cartesianUtils_ * @@ -1939,11 +1979,11 @@ class SubarrayIterator : public SliceIterator { assert(dim_ == 2); // loop over rows i -> dim2 - for (IndexType i = 0; i < nrLocalPoints_[1]; ++i) { + for (IndexType i = 0; i < getLocalExtents()[1]; ++i) { IndexType offset = localOffsets_[1] * i; - for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) { - os << fullgridVector_[offset + j] << "\t"; + for (IndexType j = 0; j < getLocalExtents()[0]; ++j) { + os << fullgridTensor_[offset + j] << "\t"; } os << std::endl; @@ -1954,8 +1994,8 @@ class SubarrayIterator : public SliceIterator { void print1D(std::ostream& os) const { assert(dim_ == 1); - for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) { - os << fullgridVector_[j] << "\t"; + for (IndexType j = 0; j < getLocalExtents()[0]; ++j) { + os << fullgridTensor_[j] << "\t"; } os << std::endl; @@ -1963,7 +2003,7 @@ class SubarrayIterator : public SliceIterator { // n-dim output. will print 2-dimensional slices of domain void print3D(std::ostream& os) const { - for (IndexType k = 0; k < nrLocalPoints_[2]; ++k) { + for (IndexType k = 0; k < getLocalExtents()[2]; ++k) { assert(dim_ == 3); // output global z index @@ -1971,11 +2011,11 @@ class SubarrayIterator : public SliceIterator { IndexType offsetZ = localOffsets_[2] * k; // loop over rows i -> dim2 - for (IndexType i = 0; i < nrLocalPoints_[1]; ++i) { + for (IndexType i = 0; i < getLocalExtents()[1]; ++i) { IndexType offset = offsetZ + localOffsets_[1] * i; - for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) { - os << fullgridVector_[offset + j] << "\t"; + for (IndexType j = 0; j < getLocalExtents()[0]; ++j) { + os << fullgridTensor_[offset + j] << "\t"; } os << std::endl; diff --git a/src/fullgrid/MultiArray.hpp b/src/fullgrid/MultiArray.hpp index 8befc84c8..3b9c8e1cb 100644 --- a/src/fullgrid/MultiArray.hpp +++ b/src/fullgrid/MultiArray.hpp @@ -16,7 +16,7 @@ using MultiArrayRef = boost::multi_array_ref; template static MultiArrayRef createMultiArrayRef(DistributedFullGrid& dfg) { /* note that the sizes of dfg are in reverse order compared to shape */ - std::vector shape(dfg.getLocalSizes().rbegin(), dfg.getLocalSizes().rend()); + std::vector shape(dfg.getLocalExtents().rbegin(), dfg.getLocalExtents().rend()); if (!reverseOrderingDFGPartitions) { assert(false && "this is not adapted to normal ordering of DFG partitions yet"); } diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp index 30c57f0e1..ff148ad37 100644 --- a/src/fullgrid/SliceIterator.hpp +++ b/src/fullgrid/SliceIterator.hpp @@ -4,6 +4,8 @@ #include "utils/IndexVector.hpp" #include "utils/Types.hpp" +// template +// using SliceIterator = typename boost::numeric::ublas::tensor::iterator; namespace combigrid { template class SliceIterator { @@ -16,12 +18,13 @@ class SliceIterator { using reference = FG_ELEMENT&; SliceIterator(const std::vector& subsizes, const std::vector& starts, - const IndexVector& offsets, std::vector* dataPointer) + const IndexVector& offsets, std::vector& data) : currentLocalIndex_(starts), subsizes_(subsizes), starts_(starts), offsets_(offsets), - dataPointer_(dataPointer) { + dataPointer_(data.data(), + dataSize_(data.size())) { this->setEndIndex(); this->validateSizes(); } @@ -36,28 +39,28 @@ class SliceIterator { reference operator*() const { #ifndef NDEBUG // make sure to only dereference when we actually have the data mapped - if (linearize(currentLocalIndex_) > dataPointer_->size()) { + if (linearize(currentLocalIndex_) > dataSize_) { std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex() << std::endl; std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_ << " linearized " - << linearize(currentLocalIndex_) << " numElements " << dataPointer_->size() + << linearize(currentLocalIndex_) << " numElements " << dataSize_ << std::endl; } - assert(linearize(currentLocalIndex_) <= dataPointer_->size()); + assert(linearize(currentLocalIndex_) <= dataSize_); assert(linearize(currentLocalIndex_) < endIndex()); #endif // ndef NDEBUG - return (*dataPointer_)[linearize(currentLocalIndex_)]; + return dataPointer_[linearize(currentLocalIndex_)]; } - pointer operator->() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); } - pointer getPointer() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); } + pointer operator->() const { return &(dataPointer_[linearize(currentLocalIndex_)]); } + pointer getPointer() const { return &(dataPointer_[linearize(currentLocalIndex_)]); } const std::vector& getVecIndex() const { return currentLocalIndex_; } IndexType getIndex() const { return linearize(currentLocalIndex_); } const SliceIterator& operator++() { #ifndef NDEBUG - assert(linearize(currentLocalIndex_) <= dataPointer_->size()); + assert(linearize(currentLocalIndex_) <= dataSize_); auto cpLocalIdx = currentLocalIndex_; auto idxbefore = linearize(currentLocalIndex_); #endif // ndef NDEBUG @@ -94,8 +97,8 @@ class SliceIterator { friend bool operator!=(const SliceIterator& a, const SliceIterator& b) { return a.currentLocalIndex_ != b.currentLocalIndex_; }; - pointer begin() { return &((*dataPointer_)[firstIndex()]); } - pointer end() { return &((*dataPointer_)[endIndex()]); } + pointer begin() { return &(dataPointer_[firstIndex()]); } + pointer end() { return &(dataPointer_[endIndex()]); } bool isAtEnd() const { return (linearize(currentLocalIndex_) == endIndex()); } int size() const { return std::accumulate(subsizes_.begin(), subsizes_.end(), 1, std::multiplies()); @@ -127,8 +130,8 @@ class SliceIterator { void validateSizes() { assert(offsets_[0] == 1); assert(std::accumulate(this->subsizes_.begin(), this->subsizes_.end(), 1, - std::multiplies()) <= this->dataPointer_->size()); - assert(linearize(this->currentLocalIndex_) <= this->dataPointer_->size()); + std::multiplies()) <= this->dataSize_); + assert(linearize(this->currentLocalIndex_) <= this->dataSize_); assert(currentLocalIndex_.size() == this->getDimension()); assert(subsizes_.size() == this->getDimension()); assert(starts_.size() == this->getDimension()); @@ -138,7 +141,8 @@ class SliceIterator { std::vector subsizes_; std::vector starts_; IndexVector offsets_; - std::vector* dataPointer_; + FG_ELEMENT* dataPointer_; + size_t dataSize_; IndexType linearEndIndex_ = -1; }; } // namespace combigrid \ No newline at end of file diff --git a/src/hierarchization/DistributedHierarchization.hpp b/src/hierarchization/DistributedHierarchization.hpp index 6e27ed20d..7b4c0a3ee 100644 --- a/src/hierarchization/DistributedHierarchization.hpp +++ b/src/hierarchization/DistributedHierarchization.hpp @@ -184,7 +184,7 @@ void sendAndReceiveIndices(const std::map>& send1d MPI_Datatype mysubarray; { // sizes of local grid - std::vector sizes(dfg.getLocalSizes().begin(), dfg.getLocalSizes().end()); + std::vector sizes(dfg.getLocalExtents().begin(), dfg.getLocalExtents().end()); // sizes of subarray ( full size except dimension d ) std::vector subsizes = sizes; @@ -274,7 +274,7 @@ void sendAndReceiveIndicesBlock(const std::map>& s MPI_Datatype mysubarray; { // sizes of local grid - std::vector sizes(dfg.getLocalSizes().begin(), dfg.getLocalSizes().end()); + std::vector sizes(dfg.getLocalExtents().begin(), dfg.getLocalExtents().end()); // sizes of subarray ( full size except dimension d ) std::vector subsizes = sizes; @@ -1036,7 +1036,7 @@ void hierarchizeWithBoundary(DistributedFullGrid& dfg, auto lmax = dfg.getLevels()[dim]; auto size = dfg.getNrLocalElements(); auto stride = dfg.getLocalOffsets()[dim]; - auto ndim = dfg.getLocalSizes()[dim]; + auto ndim = dfg.getLocalExtents()[dim]; IndexType jump = stride * ndim; IndexType nbrOfPoles = size / ndim; @@ -1050,7 +1050,7 @@ void hierarchizeWithBoundary(DistributedFullGrid& dfg, bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1; tmp.resize(dfg.getGlobalSizes()[dim] + (oneSidedBoundary ? 1 : 0), std::numeric_limits::quiet_NaN()); - std::vector& ldata = dfg.getElementVector(); + auto& ldata = dfg.getElementVector(); lldiv_t divresult; IndexType start; IndexType gstart = dfg.getLowerBounds()[dim]; @@ -1107,7 +1107,7 @@ void hierarchizeNoBoundary(DistributedFullGrid& dfg, LevelType lmax = dfg.getLevels()[dim]; IndexType size = dfg.getNrLocalElements(); IndexType stride = dfg.getLocalOffsets()[dim]; - IndexType ndim = dfg.getLocalSizes()[dim]; + IndexType ndim = dfg.getLocalExtents()[dim]; IndexType jump = stride * ndim; IndexType nbrOfPoles = size / ndim; @@ -1115,7 +1115,7 @@ void hierarchizeNoBoundary(DistributedFullGrid& dfg, // loop over poles std::vector tmp(dfg.getGlobalSizes()[dim], std::numeric_limits::quiet_NaN()); - std::vector& ldata = dfg.getElementVector(); + auto& ldata = dfg.getElementVector(); lldiv_t divresult; IndexType start; IndexType gstart = dfg.getLowerBounds()[dim]; @@ -1190,7 +1190,7 @@ void dehierarchizeWithBoundary(DistributedFullGrid& dfg, const auto& lmax = dfg.getLevels()[dim]; const auto& size = dfg.getNrLocalElements(); const auto& stride = dfg.getLocalOffsets()[dim]; - const auto& ndim = dfg.getLocalSizes()[dim]; + const auto& ndim = dfg.getLocalExtents()[dim]; IndexType jump = stride * ndim; IndexType nbrOfPoles = size / ndim; @@ -1204,7 +1204,7 @@ void dehierarchizeWithBoundary(DistributedFullGrid& dfg, bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1; tmp.resize(dfg.getGlobalSizes()[dim] + (oneSidedBoundary ? 1 : 0), std::numeric_limits::quiet_NaN()); - std::vector& ldata = dfg.getElementVector(); + auto& ldata = dfg.getElementVector(); lldiv_t divresult; IndexType start; IndexType gstart = dfg.getLowerBounds()[dim]; @@ -1257,7 +1257,7 @@ void dehierarchizeNoBoundary(DistributedFullGrid& dfg, auto lmax = dfg.getLevels()[dim]; auto size = dfg.getNrLocalElements(); auto stride = dfg.getLocalOffsets()[dim]; - auto ndim = dfg.getLocalSizes()[dim]; + auto ndim = dfg.getLocalExtents()[dim]; IndexType jump = stride * ndim; IndexType nbrOfPoles = size / ndim; @@ -1266,7 +1266,7 @@ void dehierarchizeNoBoundary(DistributedFullGrid& dfg, // loop over poles static std::vector tmp; tmp.resize(dfg.getGlobalSizes()[dim], std::numeric_limits::quiet_NaN()); - std::vector& ldata = dfg.getElementVector(); + auto& ldata = dfg.getElementVector(); lldiv_t divresult; IndexType start; IndexType gstart = dfg.getLowerBounds()[dim]; diff --git a/tests/TaskConst.hpp b/tests/TaskConst.hpp index 6fd263f78..567e22577 100644 --- a/tests/TaskConst.hpp +++ b/tests/TaskConst.hpp @@ -40,7 +40,7 @@ class TaskConst : public combigrid::Task { dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = 10; } @@ -51,7 +51,7 @@ class TaskConst : public combigrid::Task { // std::cout << "run " << getCommRank(lcomm) << std::endl; - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = getLevelVector()[0] / (double)getLevelVector()[1]; // std::cout << "e " << element << std::endl; diff --git a/tests/TaskCount.hpp b/tests/TaskCount.hpp index a7cb75cf2..e8a49107b 100644 --- a/tests/TaskCount.hpp +++ b/tests/TaskCount.hpp @@ -49,7 +49,7 @@ class TaskCount : public combigrid::Task { } dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = -0.; } diff --git a/tests/test_distributedfullgrid.cpp b/tests/test_distributedfullgrid.cpp index 555aba321..85d82aef4 100644 --- a/tests/test_distributedfullgrid.cpp +++ b/tests/test_distributedfullgrid.cpp @@ -169,8 +169,11 @@ void checkDistributedFullgrid(LevelVector& levels, std::vector& procs, TestFn f; const auto dim = static_cast(levels.size()); - // create dfg + BOOST_TEST_CHECKPOINT("create dfg"); DistributedFullGrid> dfg(dim, levels, comm, boundary, procs, forward); + BOOST_CHECK(dfg.getNrElements() > 0); + BOOST_CHECK(dfg.getNrLocalElements() <= dfg.getNrElements()); + BOOST_CHECK(dfg.getLocalExtents().size() == dim); // set function values for (IndexType li = 0; li < dfg.getNrLocalElements(); ++li) { diff --git a/tests/test_ftolerance.cpp b/tests/test_ftolerance.cpp index 42fd59630..20ec52131 100644 --- a/tests/test_ftolerance.cpp +++ b/tests/test_ftolerance.cpp @@ -73,7 +73,7 @@ class TaskAdvFDM : public combigrid::Task { dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p); - phi_.resize(dfg_->getNrElements()); + phi_.reshape(dfg_->getLocalExtents(), 0.); for (IndexType li = 0; li < dfg_->getNrElements(); ++li) { std::vector coords(getDim()); @@ -102,7 +102,7 @@ class TaskAdvFDM : public combigrid::Task { double h1 = 1.0 / (double)l1; for (size_t i = 0; i < nsteps_; ++i) { - phi_.swap(dfg_->getElementVector()); + std::swap(phi_, dfg_->getElementVector()); for (IndexType li = 0; li < dfg_->getNrElements(); ++li) { @@ -161,7 +161,7 @@ class TaskAdvFDM : public combigrid::Task { size_t stepsTotal_; size_t combiStep_; DistributedFullGrid* dfg_; - std::vector phi_; + typename DistributedFullGrid::TensorType phi_; template void serialize(Archive& ar, const unsigned int version) { diff --git a/tests/test_hierarchization.cpp b/tests/test_hierarchization.cpp index f08f53b7f..dab2b0ded 100644 --- a/tests/test_hierarchization.cpp +++ b/tests/test_hierarchization.cpp @@ -222,7 +222,9 @@ real checkConservationOfMomentum(DistributedFullGrid& dfg, // mostly stolen from dfg::getCornersValues auto corners = dfgOne->getCornersGlobalVectorIndices(); std::sort(corners.begin(), corners.end()); - std::vector values; + typename DistributedFullGrid::TensorType::extents_type extents( + std::vector(dfgOne->getDimension(), 2)); + typename DistributedFullGrid::TensorType values(extents); std::vector localCornerIndices; for (size_t cornerNo = 0; cornerNo < corners.size(); ++cornerNo) { if (dfgOne->isGlobalIndexHere(corners[cornerNo])) { @@ -236,13 +238,15 @@ real checkConservationOfMomentum(DistributedFullGrid& dfg, } // make sure corner values are in right order std::sort(localCornerIndices.begin(), localCornerIndices.end()); + size_t valuesIndex = 0; for (const auto& index : localCornerIndices) { - values.push_back(dfgOne->getElementVector()[index]); + values[valuesIndex] = dfgOne->getElementVector()[index]; + ++valuesIndex; } auto dfgZero = std::unique_ptr>( new DistributedFullGrid(dim, lmin, comm, boundary, procs)); - BOOST_CHECK(values.size() == dfgZero->getElementVector().size()); + BOOST_CHECK(valuesIndex == dfgZero->getElementVector().size()); dfgZero->getElementVector() = values; // no need to dehierarchize, is nodal/scaling function on coarsest grid anyways @@ -633,7 +637,7 @@ BOOST_AUTO_TEST_CASE(test_exchangeData1d, *boost::unit_test::timeout(20)) { if (procs[d] > 1) { // check that all indices from other ranks along the pole are present BOOST_CHECK_EQUAL(remoteDataAll.size(), - dfg.getGlobalSizes()[d] - dfg.getLocalSizes()[d]); + dfg.getGlobalSizes()[d] - dfg.getLocalExtents()[d]); } MPI_Barrier(comm); TestHelper::testStrayMessages(comm); diff --git a/tests/test_reduce.cpp b/tests/test_reduce.cpp index e28d13ed4..9795e4179 100644 --- a/tests/test_reduce.cpp +++ b/tests/test_reduce.cpp @@ -60,7 +60,7 @@ class TaskConst : public combigrid::Task { dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = 10; } @@ -70,7 +70,7 @@ class TaskConst : public combigrid::Task { std::cout << "run " << getCommRank(lcomm) << std::endl; - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { // BOOST_CHECK(abs(dfg_->getData()[li])); element = static_cast(getLevelVector()[0]) / getLevelVector()[1]; diff --git a/tests/test_rescheduling.cpp b/tests/test_rescheduling.cpp index 2ad00a4b6..c661636f9 100644 --- a/tests/test_rescheduling.cpp +++ b/tests/test_rescheduling.cpp @@ -86,7 +86,7 @@ class TestingTask : public combigrid::Task { dfg_ = new DistributedFullGrid(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition); - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = 0; // default state is 0 } @@ -98,7 +98,7 @@ class TestingTask : public combigrid::Task { // std::cout << "run " << getCommRank(lcomm) << std::endl; - std::vector& elements = dfg_->getElementVector(); + auto& elements = dfg_->getElementVector(); for (auto& element : elements) { element = 10; // after run was executed the state is 10 } @@ -251,7 +251,7 @@ void checkRescheduling(size_t ngroup = 1, size_t nprocs = 1) { BOOST_REQUIRE(tasksContainSameValue(pgroup.getTasks())); for (auto& t : pgroup.getTasks()) { - std::vector& elements = t->getDistributedFullGrid().getElementVector(); + auto& elements = t->getDistributedFullGrid().getElementVector(); for (auto e : elements) { // Elements need to be always equal to 10 because // * first run: run is executed