From b1c5bcb83cb5089f8c39293c1d98e5d72d6551f0 Mon Sep 17 00:00:00 2001
From: Theresa Pollinger <theresa.pollinger@ipvs.uni-stuttgart.de>
Date: Wed, 15 Feb 2023 09:37:12 +0100
Subject: [PATCH 1/5] DFG: make general SliceIterator its own class

---
 src/fullgrid/DistributedFullGrid.hpp | 224 ++++++---------------------
 src/fullgrid/SliceIterator.hpp       | 144 +++++++++++++++++
 2 files changed, 192 insertions(+), 176 deletions(-)
 create mode 100644 src/fullgrid/SliceIterator.hpp
diff --git a/src/fullgrid/DistributedFullGrid.hpp b/src/fullgrid/DistributedFullGrid.hpp
index 481e959ad..883264d26 100644
--- a/src/fullgrid/DistributedFullGrid.hpp
+++ b/src/fullgrid/DistributedFullGrid.hpp
@@ -8,6 +8,7 @@
 #include <string>
 
 #include "fullgrid/FullGrid.hpp"
+#include "fullgrid/SliceIterator.hpp"
 #include "mpi/MPICartesianUtils.hpp"
 #include "mpi/MPISystem.hpp"
 #include "sparsegrid/DistributedSparseGridUniform.hpp"
@@ -18,7 +19,6 @@
 #include "utils/Stats.hpp"
 #include "utils/Types.hpp"
 
-
 namespace combigrid {
 
 /* a regular (equidistant) domain decompositioning for an even number of processes
@@ -134,7 +134,6 @@ class DistributedFullGrid {
 
     // in contrast to serial implementation we directly create the grid
     fullgridVector_.resize(nrLocalElements_);
-
   }
 
   // explicit DistributedFullGrid(const DistributedFullGrid& other) {
@@ -148,182 +147,59 @@ class DistributedFullGrid {
 
   // copy construction would need to duplicate communicator_
   DistributedFullGrid(const DistributedFullGrid& other) = delete;
-  DistributedFullGrid& operator=( const DistributedFullGrid & ) = delete;
+  DistributedFullGrid& operator=(const DistributedFullGrid&) = delete;
   DistributedFullGrid(DistributedFullGrid&& other) = default;
   DistributedFullGrid& operator=(DistributedFullGrid&& other) = default;
 
   virtual ~DistributedFullGrid() {
     // MPI_Comm_free(&communicator_);
-    for (size_t i = 0; i < upwardSubarrays_.size(); ++i)  {
+    for (size_t i = 0; i < upwardSubarrays_.size(); ++i) {
       MPI_Type_free(&upwardSubarrays_[i]);
     }
-    for (size_t i = 0; i < downwardSubarrays_.size(); ++i)  {
+    for (size_t i = 0; i < downwardSubarrays_.size(); ++i) {
       MPI_Type_free(&downwardSubarrays_[i]);
     }
   }
 
-  struct SubarrayIterator {
-    // cf. https://www.internalpointers.com/post/writing-custom-iterators-modern-cpp
-    using iterator_category = std::forward_iterator_tag;
-    using difference_type = std::ptrdiff_t;
-    using value_type = FG_ELEMENT;
-    using pointer = FG_ELEMENT*;
-    using reference = FG_ELEMENT&;
-
-    SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid* dfgPointer) : dfgPointer_(dfgPointer){
-      // read out the subarray informaiton
-      MPI_Datatype tmp;
-      auto dim = dfgPointer_->getDimension();
-      std::vector<int> MPITypeIntegers(3 * dim + 2);
-      auto success = MPI_Type_get_contents(subarrayType, static_cast<int>(MPITypeIntegers.size()),
-                                           0, 1, MPITypeIntegers.data(), nullptr, &tmp);
-      assert(success == MPI_SUCCESS);
-      auto sizes = std::vector<int>(MPITypeIntegers.begin() + 1, MPITypeIntegers.begin() + dim + 1);
-      subsizes_ = std::vector<int>(MPITypeIntegers.begin() + dim + 1,
-                                   MPITypeIntegers.begin() + 2 * dim + 1);
-      starts_ = std::vector<int>(MPITypeIntegers.begin() + 2 * dim + 1,
-                                 MPITypeIntegers.begin() + 3 * dim + 1);
-      currentLocalIndex_ = starts_;
-
-      // check datatype
-      assert(tmp == dfgPointer_->getMPIDatatype());
-      // // check sizes
-      // std::cout << " sizes " << sizes << " subsizes " << subsizes_ << " starts " << starts_
-      //           << " end " << endIndex() << std::endl;
-      for (DimType d = 0; d < dfgPointer_->getDimension() - 1; ++d) {
-        assert(sizes[d] == static_cast<int>(dfgPointer_->getLocalSizes()[d]));
-        assert(subsizes_[d] + starts_[d] <= sizes[d]);
-      }
-      assert(std::accumulate(sizes.begin(), sizes.end(), 1,
-                                       std::multiplies<int>()) == dfgPointer_->getNrLocalElements());
-      // check order
-      assert(MPITypeIntegers.back() == MPI_ORDER_FORTRAN);
-      assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements());
-    }
-
-    SubarrayIterator(const std::vector<int>& subsizes, const std::vector<int>& starts,
-                                            DistributedFullGrid* dfgPointer)
-        : currentLocalIndex_(starts),
-          subsizes_(subsizes),
-          starts_(starts),
-          dfgPointer_(dfgPointer) {
-      assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements());
-    }
-
-    // cheap rule of 5
-    SubarrayIterator() = delete;
-    SubarrayIterator(const SubarrayIterator& other) = delete;
-    SubarrayIterator& operator=( const SubarrayIterator & ) = delete;
-    SubarrayIterator(SubarrayIterator&& other) = delete;
-    SubarrayIterator& operator=(SubarrayIterator&& other) = delete;
-
-    reference operator*() const {
-      // make sure to only dereference when we actually have the data mapped
-#ifndef NDEBUG
-      if(linearize(currentLocalIndex_) > dfgPointer_->getNrLocalElements()) {
-        std::cout << " subsizes " << subsizes_ << " starts " << starts_
-                  << " end " << endIndex() << std::endl;
-        std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_
-                  << " linearized " << linearize(currentLocalIndex_)
-                  << " numElements " << dfgPointer_->getNrLocalElements()
-                  << std::endl;
-      }
-      assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements());
-      assert(linearize(currentLocalIndex_) < endIndex());
-#endif // ndef NDEBUG
-      return dfgPointer_->getElementVector()[linearize(currentLocalIndex_)];
-    }
-    pointer operator->() const {
-      return &(dfgPointer_->getElementVector()[linearize(currentLocalIndex_)]);
-    }
-    pointer getPointer() const {
-      return &(dfgPointer_->getElementVector()[linearize(currentLocalIndex_)]);
-    }
-    const std::vector<int>& getVecIndex() const { return currentLocalIndex_; }
-    IndexType getIndex() const { return linearize(currentLocalIndex_); }
-    const SubarrayIterator& operator++() {
-      assert(linearize(currentLocalIndex_) <= dfgPointer_->getNrLocalElements());
-#ifndef NDEBUG
-      auto cpLocalIdx = currentLocalIndex_;
-      auto idxbefore = linearize(currentLocalIndex_);
-#endif // ndef NDEBUG
-      // increment
-      // Fortran ordering
-      currentLocalIndex_[0] += 1;
-      for (DimType d = 0; d < dfgPointer_->getDimension() - 1; ++d) {
-        // wrap around
-#ifndef NDEBUG
-        if(currentLocalIndex_[d] > starts_[d] + subsizes_[d]) {
-          std::cout << " subsizes " << subsizes_ << " starts " << starts_
-                    << " end " << endIndex() << " idxbefore " << idxbefore << std::endl;
-          std::cout << "waah currentLocalIndex_" << currentLocalIndex_ << " before " << cpLocalIdx << std::endl;
-        }
-        assert(currentLocalIndex_[d] >= starts_[d]);
-        assert(!(currentLocalIndex_[d] > starts_[d] + subsizes_[d]));
-#endif // ndef NDEBUG
-        if (currentLocalIndex_[d] == starts_[d] + subsizes_[d]) {
-          currentLocalIndex_[d] = starts_[d];
-          ++currentLocalIndex_[d + 1];
-        }
-      }
-      assert(idxbefore < linearize(currentLocalIndex_));
-      assert(linearize(currentLocalIndex_) <= endIndex());
-      return *this;
-    }
-    friend bool operator==(const SubarrayIterator& a, const SubarrayIterator& b) {
-      return a.currentLocalIndex_ == b.currentLocalIndex_;
-    };
-    friend bool operator!=(const SubarrayIterator& a, const SubarrayIterator& b) {
-      return a.currentLocalIndex_ != b.currentLocalIndex_;
-    };
-    pointer begin() { return &(dfgPointer_->getElementVector()[firstIndex()]); }
-    pointer end() { return &(dfgPointer_->getElementVector()[endIndex()]); }
-    bool isAtEnd() { return (linearize(currentLocalIndex_) == endIndex()); }
-    int size() {
-      return std::accumulate(subsizes_.begin(), subsizes_.end(), 1,
-                                       std::multiplies<int>()); }
-
-    IndexType firstIndex() const {
-      return linearize(starts_);
-    }
-    IndexType endIndex() const {
-      std::vector<int> endVectorIndex = starts_;
-      endVectorIndex[dfgPointer_->getDimension() - 1] += subsizes_[dfgPointer_->getDimension() - 1];
-      auto linEndIndex = linearize(endVectorIndex);
-      return linEndIndex;
-    }
-
-   private:
-    IndexType linearize(std::vector<int> indexVector) const {
-      auto offsets = dfgPointer_->getLocalOffsets();
-      assert(offsets[0] == 1);
-      auto dim = dfgPointer_->getDimension();
-      IndexType index = 0;
-      // Fortran ordering
-      for (DimType d = 0; d < dim; ++d) {
-        index += offsets[d] * indexVector[d];
-      }
-//       // compare to dfg
-// #ifndef NDEBUG
-//       IndexVector liv;
-//       liv.assign(indexVector.begin(), indexVector.end());
-//       auto li = dfgPointer_->getLocalLinearIndex(liv);
-//       IndexVector linv(dim);
-//       dfgPointer_->getLocalVectorIndex(index, linv);
-//       if (li != index) {
-//         std::cout << "li " << li << " " << indexVector << " vs " << index << " " << linv << std::endl;
-//       }
-//       assert(li == index);
-// #endif // ndef NDEBUG
-      return index;
-    }
-
-    std::vector<int> currentLocalIndex_;
-    std::vector<int> subsizes_;
-    std::vector<int> starts_;
-    DistributedFullGrid* dfgPointer_;
-  };
 
+class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
+ public:
+  SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid& dfg) : SliceIterator<FG_ELEMENT>() {
+    this->offsets_ = dfg.getLocalOffsets();
+    this->dataPointer_ = &(dfg.getElementVector());
+
+    // read out the subarray informaiton
+    MPI_Datatype tmp;
+    DimType dim = dfg.getDimension();
+    std::vector<int> MPITypeIntegers(3 * dim + 2);
+    auto success = MPI_Type_get_contents(subarrayType, static_cast<int>(MPITypeIntegers.size()), 0,
+                                         1, MPITypeIntegers.data(), nullptr, &tmp);
+    assert(success == MPI_SUCCESS);
+    // check datatype
+    assert(tmp == dfg.getMPIDatatype());
+    // check order
+    assert(MPITypeIntegers.back() == MPI_ORDER_FORTRAN);
+
+    auto sizes = std::vector<int>(MPITypeIntegers.begin() + 1, MPITypeIntegers.begin() + dim + 1);
+    this->subsizes_ =
+        std::vector<int>(MPITypeIntegers.begin() + dim + 1, MPITypeIntegers.begin() + 2 * dim + 1);
+    this->starts_ = std::vector<int>(MPITypeIntegers.begin() + 2 * dim + 1,
+                                     MPITypeIntegers.begin() + 3 * dim + 1);
+    this->currentLocalIndex_ = this->starts_;
+
+    assert(dim == this->getDimension());
+    // check that sizes match
+    for (DimType d = 0; d < this->getDimension() - 1; ++d) {
+      assert(sizes[d] == static_cast<int>(dfg.getLocalSizes()[d]));
+      assert(this->subsizes_[d] + this->starts_[d] <= sizes[d]);
+    }
+    assert(std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<int>()) ==
+           this->dataPointer_->size());
+
+    this->setEndIndex();
+    this->validateSizes();
+  }
+};
   FG_ELEMENT evalLocalIndexOn(const IndexVector& localIndex,
                               const std::vector<real>& coords) const {
     const auto& lowerBounds = this->getLowerBounds();
@@ -1066,11 +942,11 @@ class DistributedFullGrid {
         std::vector<int> csubsizes(subsizes.begin(), subsizes.end());
         std::vector<int> cstarts(starts.begin(), starts.end());
 
-
         // create subarray view on data
         MPI_Datatype mysubarray;
         MPI_Type_create_subarray(static_cast<int>(this->getDimension()), &csizes[0], &csubsizes[0],
-                                 &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray);
+                                 &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(),
+                                 &mysubarray);
         MPI_Type_commit(&mysubarray);
         subarrayTypes.push_back(mysubarray);
 
@@ -1641,7 +1517,6 @@ class DistributedFullGrid {
       recvbufferFromUp.resize(0);
     }
 
-
     // TODO asynchronous??
     // send lower boundary values
     auto success =
@@ -1733,8 +1608,8 @@ class DistributedFullGrid {
     // if both lower and higher are set, it's only me and I can just average
     // not strictly necessary but good to test the iterators
     if (recvbufferFromUp.size() > 0 && recvbufferFromDown.size() > 0) {
-      SubarrayIterator downIt(downSubarrays[d], this);
-      SubarrayIterator upIt(upSubarrays[d], this);
+      SubarrayIterator downIt(downSubarrays[d], *this);
+      SubarrayIterator upIt(upSubarrays[d], *this);
       assert(downIt.size() == upIt.size());
 #ifndef NDEBUG
       auto initialIndexDiff = upIt.getIndex() - downIt.getIndex();
@@ -1754,7 +1629,7 @@ class DistributedFullGrid {
       assert(upIt.isAtEnd());
       assert(ctr == upIt.size());
     } else if (recvbufferFromUp.size() > 0) {
-      SubarrayIterator downIt(downSubarrays[d], this);
+      SubarrayIterator downIt(downSubarrays[d], *this);
       auto upIt = recvbufferFromUp.begin();
       while (!downIt.isAtEnd()) {
         auto avg = 0.5 * ((*upIt) + (*downIt));
@@ -1764,7 +1639,7 @@ class DistributedFullGrid {
       }
       assert(upIt == recvbufferFromUp.end());
     } else if (recvbufferFromDown.size() > 0) {
-      SubarrayIterator upIt(upSubarrays[d], this);
+      SubarrayIterator upIt(upSubarrays[d], *this);
       auto downIt = recvbufferFromDown.begin();
       while (!upIt.isAtEnd()) {
         auto avg = 0.5 * ((*upIt) + (*downIt));
@@ -1784,7 +1659,6 @@ class DistributedFullGrid {
     }
   }
 
-
   /**
    * @brief check if given globalLinearIndex is on the boundary of this DistributedFullGrid
    */
@@ -2108,8 +1982,6 @@ class DistributedFullGrid {
       }
     }
   }
-
-
 };
 // end class
 
diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp
new file mode 100644
index 000000000..86a8092a3
--- /dev/null
+++ b/src/fullgrid/SliceIterator.hpp
@@ -0,0 +1,144 @@
+#pragma once
+#include <cassert>
+
+#include "utils/IndexVector.hpp"
+#include "utils/Types.hpp"
+
+namespace combigrid {
+template <typename FG_ELEMENT>
+class SliceIterator {
+ public:
+  // cf. https://www.internalpointers.com/post/writing-custom-iterators-modern-cpp
+  using iterator_category = std::forward_iterator_tag;
+  using difference_type = std::ptrdiff_t;
+  using value_type = FG_ELEMENT;
+  using pointer = FG_ELEMENT*;
+  using reference = FG_ELEMENT&;
+
+  SliceIterator(const std::vector<int>& subsizes, const std::vector<int>& starts,
+                const IndexVector& offsets, std::vector<FG_ELEMENT>* dataPointer)
+      : currentLocalIndex_(starts),
+        subsizes_(subsizes),
+        starts_(starts),
+        offsets_(offsets),
+        dataPointer_(dataPointer) {
+    this->setEndIndex();
+    this->validateSizes();
+  }
+
+  // cheap rule of 5
+  explicit SliceIterator() = default;
+  SliceIterator(const SliceIterator& other) = delete;
+  SliceIterator& operator=(const SliceIterator&) = delete;
+  SliceIterator(SliceIterator&& other) = delete;
+  SliceIterator& operator=(SliceIterator&& other) = delete;
+
+  reference operator*() const {
+#ifndef NDEBUG
+    // make sure to only dereference when we actually have the data mapped
+    if (linearize(currentLocalIndex_) > dataPointer_->size()) {
+      std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex()
+                << std::endl;
+      std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_ << " linearized "
+                << linearize(currentLocalIndex_) << " numElements " << dataPointer_->size()
+                << std::endl;
+    }
+    assert(linearize(currentLocalIndex_) <= dataPointer_->size());
+    assert(linearize(currentLocalIndex_) < endIndex());
+#endif  // ndef NDEBUG
+    return (*dataPointer_)[linearize(currentLocalIndex_)];
+  }
+
+  pointer operator->() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); }
+  pointer getPointer() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); }
+
+  const std::vector<int>& getVecIndex() const { return currentLocalIndex_; }
+  IndexType getIndex() const { return linearize(currentLocalIndex_); }
+
+  const SliceIterator& operator++() {
+#ifndef NDEBUG
+    assert(linearize(currentLocalIndex_) <= dataPointer_->size());
+    auto cpLocalIdx = currentLocalIndex_;
+    auto idxbefore = linearize(currentLocalIndex_);
+#endif  // ndef NDEBUG
+    // increment
+    // Fortran ordering
+    currentLocalIndex_[0] += 1;
+    for (DimType d = 0; d < this->getDimension() - 1; ++d) {
+      // wrap around
+#ifndef NDEBUG
+      if (currentLocalIndex_[d] > starts_[d] + subsizes_[d]) {
+        std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex()
+                  << " idxbefore " << idxbefore << std::endl;
+        std::cout << "waah currentLocalIndex_" << currentLocalIndex_ << " before " << cpLocalIdx
+                  << std::endl;
+      }
+      assert(currentLocalIndex_[d] >= starts_[d]);
+      assert(!(currentLocalIndex_[d] > starts_[d] + subsizes_[d]));
+#endif  // ndef NDEBUG
+      // expect that it is more likely that we do not have to wrap around
+      // if (__builtin_expect((currentLocalIndex_[d] == starts_[d] + subsizes_[d]), false)) {
+      // (not sure if this is always the case)
+      if (currentLocalIndex_[d] == starts_[d] + subsizes_[d]) {
+        currentLocalIndex_[d] = starts_[d];
+        ++currentLocalIndex_[d + 1];
+      }
+    }
+    assert(idxbefore < linearize(currentLocalIndex_));
+    assert(linearize(currentLocalIndex_) <= endIndex());
+    return *this;
+  }
+  friend bool operator==(const SliceIterator& a, const SliceIterator& b) {
+    return a.currentLocalIndex_ == b.currentLocalIndex_;
+  };
+  friend bool operator!=(const SliceIterator& a, const SliceIterator& b) {
+    return a.currentLocalIndex_ != b.currentLocalIndex_;
+  };
+  pointer begin() { return &((*dataPointer_)[firstIndex()]); }
+  pointer end() { return &((*dataPointer_)[endIndex()]); }
+  bool isAtEnd() const { return (linearize(currentLocalIndex_) == endIndex()); }
+  int size() const {
+    return std::accumulate(subsizes_.begin(), subsizes_.end(), 1, std::multiplies<int>());
+  }
+
+  IndexType firstIndex() const { return linearize(starts_); }
+  void setEndIndex() {
+    std::vector<int> endVectorIndex = this->starts_;
+    endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1];
+    linEndIndex_ = linearize(endVectorIndex);
+  }
+  IndexType endIndex() const {
+    assert(linEndIndex_ != -1);
+    return linEndIndex_;
+  }
+
+ protected:
+  inline DimType getDimension() const { return static_cast<DimType>(subsizes_.size()); }
+
+  inline IndexType linearize(const std::vector<int>& indexVector) const {
+    IndexType index = 0;
+    // Fortran ordering
+    for (DimType d = 0; d < subsizes_.size(); ++d) {
+      index += offsets_[d] * indexVector[d];
+    }
+    return index;
+  }
+
+  void validateSizes() {
+    assert(offsets_[0] == 1);
+    assert(std::accumulate(this->subsizes_.begin(), this->subsizes_.end(), 1,
+                           std::multiplies<int>()) <= this->dataPointer_->size());
+    assert(linearize(this->currentLocalIndex_) <= this->dataPointer_->size());
+    assert(currentLocalIndex_.size() == this->getDimension());
+    assert(subsizes_.size() == this->getDimension());
+    assert(starts_.size() == this->getDimension());
+  }
+
+  std::vector<int> currentLocalIndex_;
+  std::vector<int> subsizes_;
+  std::vector<int> starts_;
+  IndexVector offsets_;
+  std::vector<FG_ELEMENT>* dataPointer_;
+  IndexType linearEndIndex_ = -1;
+};
+}  // namespace combigrid
\ No newline at end of file

From e4730d3fb18b86cccfb6bb93b97c92919c7efa7c Mon Sep 17 00:00:00 2001
From: Theresa Pollinger <theresa.pollinger@ipvs.uni-stuttgart.de>
Date: Wed, 15 Feb 2023 12:50:16 +0100
Subject: [PATCH 2/5] SliceIterator: fix typo  (amends last commit)

---
 src/fullgrid/SliceIterator.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp
index 86a8092a3..9eb869c5d 100644
--- a/src/fullgrid/SliceIterator.hpp
+++ b/src/fullgrid/SliceIterator.hpp
@@ -105,11 +105,11 @@ class SliceIterator {
   void setEndIndex() {
     std::vector<int> endVectorIndex = this->starts_;
     endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1];
-    linEndIndex_ = linearize(endVectorIndex);
+    linearEndIndex_ = linearize(endVectorIndex);
   }
   IndexType endIndex() const {
-    assert(linEndIndex_ != -1);
-    return linEndIndex_;
+    assert(linearEndIndex_ != -1);
+    return linearEndIndex_;
   }
 
  protected:

From 989e3f08d5a64851bca58790c706da0171f59a63 Mon Sep 17 00:00:00 2001
From: Theresa Pollinger <theresa.pollinger@ipvs.uni-stuttgart.de>
Date: Wed, 15 Feb 2023 13:59:56 +0100
Subject: [PATCH 3/5] SliceIterator: fix typo with ->  (amends last commit)

---
 src/fullgrid/SliceIterator.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp
index 9eb869c5d..30c57f0e1 100644
--- a/src/fullgrid/SliceIterator.hpp
+++ b/src/fullgrid/SliceIterator.hpp
@@ -104,7 +104,7 @@ class SliceIterator {
   IndexType firstIndex() const { return linearize(starts_); }
   void setEndIndex() {
     std::vector<int> endVectorIndex = this->starts_;
-    endVectorIndex[this->getDimension() - 1] += this - subsizes_[this->getDimension() - 1];
+    endVectorIndex[this->getDimension() - 1] += this->subsizes_[this->getDimension() - 1];
     linearEndIndex_ = linearize(endVectorIndex);
   }
   IndexType endIndex() const {

From 06c601b3728d7d292d947660a007c6d8041c3066 Mon Sep 17 00:00:00 2001
From: Theresa Pollinger <theresa.pollinger@ipvs.uni-stuttgart.de>
Date: Thu, 16 Feb 2023 09:35:03 +0100
Subject: [PATCH 4/5] advection example: task constructor w/o dim

---
 examples/distributed_advection/TaskAdvection.hpp | 4 ++--
 examples/distributed_advection/combi_example.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/distributed_advection/TaskAdvection.hpp b/examples/distributed_advection/TaskAdvection.hpp
index 0fcea21ad..68932b963 100644
--- a/examples/distributed_advection/TaskAdvection.hpp
+++ b/examples/distributed_advection/TaskAdvection.hpp
@@ -30,8 +30,8 @@ class TaskAdvection : public Task {
   /* if the constructor of the base task class is not sufficient we can provide an
    * own implementation. here, we add dt, nsteps, and p as a new parameters.
    */
-  TaskAdvection(DimType dim, const LevelVector& l, const std::vector<BoundaryType>& boundary,
-                real coeff, LoadModel* loadModel, real dt, size_t nsteps,
+  TaskAdvection(const LevelVector& l, const std::vector<BoundaryType>& boundary, real coeff,
+                LoadModel* loadModel, real dt, size_t nsteps,
                 std::vector<int> p = std::vector<int>(0),
                 FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)})))
       : Task(l, boundary, coeff, loadModel, faultCrit),
diff --git a/examples/distributed_advection/combi_example.cpp b/examples/distributed_advection/combi_example.cpp
index 2b0965a0d..32265a59f 100644
--- a/examples/distributed_advection/combi_example.cpp
+++ b/examples/distributed_advection/combi_example.cpp
@@ -178,7 +178,7 @@ int main(int argc, char** argv) {
     TaskContainer tasks;
     std::vector<size_t> taskIDs;
     for (size_t i = 0; i < levels.size(); i++) {
-      Task* t = new TaskAdvection(dim, levels[i], boundary, coeffs[i], loadmodel.get(), dt,
+      Task* t = new TaskAdvection(levels[i], boundary, coeffs[i], loadmodel.get(), dt,
                                   nsteps, p);
 
       static_assert(!isGENE, "isGENE");

From c8645e5c8335712bb76326046ef3971c20cefc27 Mon Sep 17 00:00:00 2001
From: Theresa Pollinger <theresa.pollinger@ipvs.uni-stuttgart.de>
Date: Thu, 16 Feb 2023 09:39:47 +0100
Subject: [PATCH 5/5] dfg: use boost tensor as data storage * this compiles but
 the tests fail; needs debugging

---
 examples/combi_example/TaskExample.hpp        |   4 +-
 examples/combi_example_faults/TaskExample.hpp |   4 +-
 .../distributed_advection/TaskAdvection.hpp   |  34 ++---
 .../distributed_third_level/TaskAdvection.hpp |  37 +++--
 .../distributed_third_level/TaskConst.hpp     |   4 +-
 .../selalib_distributed/src/SelalibTask.hpp   |   2 +-
 src/fullgrid/DistributedFullGrid.hpp          | 144 +++++++++++-------
 src/fullgrid/MultiArray.hpp                   |   2 +-
 src/fullgrid/SliceIterator.hpp                |  32 ++--
 .../DistributedHierarchization.hpp            |  20 +--
 tests/TaskConst.hpp                           |   4 +-
 tests/TaskCount.hpp                           |   2 +-
 tests/test_distributedfullgrid.cpp            |   5 +-
 tests/test_ftolerance.cpp                     |   6 +-
 tests/test_hierarchization.cpp                |  12 +-
 tests/test_reduce.cpp                         |   4 +-
 tests/test_rescheduling.cpp                   |   6 +-
 17 files changed, 184 insertions(+), 138 deletions(-)

diff --git a/examples/combi_example/TaskExample.hpp b/examples/combi_example/TaskExample.hpp
index d42d7c245..7aa50b5f8 100644
--- a/examples/combi_example/TaskExample.hpp
+++ b/examples/combi_example/TaskExample.hpp
@@ -87,7 +87,7 @@ class TaskExample : public Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(dim, l, lcomm, this->getBoundary(), p);
 
     /* loop over local subgrid and set initial values */
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
 
     for (size_t i = 0; i < elements.size(); ++i) {
       IndexType globalLinearIndex = dfg_->getGlobalLinearIndex(i);
@@ -109,7 +109,7 @@ class TaskExample : public Task {
 
     int lrank = theMPISystem()->getLocalRank();
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     // TODO if your Example uses another data structure, you need to copy
     // the data from elements to that data structure
 
diff --git a/examples/combi_example_faults/TaskExample.hpp b/examples/combi_example_faults/TaskExample.hpp
index a1d20ddf8..0e3b706fb 100644
--- a/examples/combi_example_faults/TaskExample.hpp
+++ b/examples/combi_example_faults/TaskExample.hpp
@@ -91,7 +91,7 @@ class TaskExample: public Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(dim, l, lcomm, this->getBoundary(), p);
 
     /* loop over local subgrid and set initial values */
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
 
     for (size_t i = 0; i < elements.size(); ++i) {
       IndexType globalLinearIndex = dfg_->getGlobalLinearIndex(i);
@@ -115,7 +115,7 @@ class TaskExample: public Task {
 
     /* pseudo timestepping to demonstrate the behaviour of your typical
      * time-dependent simulation problem. */
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
 
     for (size_t step = stepsTotal_; step < stepsTotal_ + nsteps_; ++step) {
       real time = (step + 1)* dt_;
diff --git a/examples/distributed_advection/TaskAdvection.hpp b/examples/distributed_advection/TaskAdvection.hpp
index 68932b963..b692279ac 100644
--- a/examples/distributed_advection/TaskAdvection.hpp
+++ b/examples/distributed_advection/TaskAdvection.hpp
@@ -1,8 +1,9 @@
 #pragma once
 
+#include <boost/numeric/ublas/tensor.hpp>
+
 #include "fullgrid/DistributedFullGrid.hpp"
 #include "task/Task.hpp"
-
 namespace combigrid {
 
 // exact solution
@@ -21,8 +22,9 @@ class TestFn {
     // leave out normalization, such that maximum is always 1
     return std::exp(exponent);  // / std::sqrt( std::pow(2*pi*sigmaSquared, coords.size()));
   }
+
  private:
-  static constexpr double sigmaSquaredInv_ = 1. / ((1./3.)*(1./3.));
+  static constexpr double sigmaSquaredInv_ = 1. / ((1. / 3.) * (1. / 3.));
 };
 
 class TaskAdvection : public Task {
@@ -59,7 +61,7 @@ class TaskAdvection : public Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(dim, l, lcomm, this->getBoundary(), p_, false,
                                                   decomposition);
     if (phi_ == nullptr) {
-      phi_ = new std::vector<CombiDataType>(dfg_->getNrLocalElements());
+      phi_ = new DistributedFullGrid<CombiDataType>::TensorType(dfg_->getLocalExtents());
     }
 
     std::vector<double> h = dfg_->getGridSpacing();
@@ -95,8 +97,7 @@ class TaskAdvection : public Task {
     std::vector<double> h = dfg_->getGridSpacing();
     const auto& fullOffsets = dfg_->getLocalOffsets();
 
-    phi_->resize(dfg_->getNrLocalElements());
-    std::fill(phi_->begin(), phi_->end(), 0.);
+    phi_->reshape(dfg_->getLocalExtents(), 0.);
 
     for (size_t i = 0; i < nsteps_; ++i) {
       // compute the gradient in the original dfg_, then update into phi_ and
@@ -122,7 +123,7 @@ class TaskAdvection : public Task {
           dfg_->getLocalVectorIndex(li, locAxisIndex);
           // TODO can be unrolled into ghost and other part, avoiding if-statement
           CombiDataType phi_neighbor = 0.;
-          if (locAxisIndex[d] == 0){
+          if (locAxisIndex[d] == 0) {
             assert(phi_ghost.size() > 0);
             // then use values from boundary exchange
             IndexType gli = 0;
@@ -132,10 +133,7 @@ class TaskAdvection : public Task {
             assert(gli > -1);
             assert(gli < phi_ghost.size());
             phi_neighbor = phi_ghost[gli];
-          } else{
-            // --locAxisIndex[d];
-            // IndexType lni = dfg_->getLocalLinearIndex(locAxisIndex);
-            // assert(lni == (li - fullOffsets[d]));
+          } else {
             phi_neighbor = dfg_->getElementVector()[li - fullOffsets[d]];
           }
           // calculate gradient of phi with backward differential quotient
@@ -144,10 +142,11 @@ class TaskAdvection : public Task {
           u_dot_dphi[li] += velocity[d] * dphi;
         }
       }
+      // update all values in phi_
       for (IndexType li = 0; li < dfg_->getNrLocalElements(); ++li) {
         (*phi_)[li] = dfg_->getElementVector()[li] - u_dot_dphi[li] * dt_;
       }
-      phi_->swap(dfg_->getElementVector());
+      std::swap(*phi_, dfg_->getElementVector());
     }
     stepsTotal_ += nsteps_;
 
@@ -161,7 +160,8 @@ class TaskAdvection : public Task {
    * solution on one process and then converting it to the full grid representation.
    * the DistributedFullGrid class offers a convenient function to do this.
    */
-  void getFullGrid(FullGrid<CombiDataType>& fg, RankType r, CommunicatorType lcomm, int n = 0) override {
+  void getFullGrid(FullGrid<CombiDataType>& fg, RankType r, CommunicatorType lcomm,
+                   int n = 0) override {
     assert(fg.getLevels() == dfg_->getLevels());
 
     dfg_->gatherFullGrid(fg, r);
@@ -181,15 +181,13 @@ class TaskAdvection : public Task {
     phi_ = nullptr;
   }
 
-  real getCurrentTime() const override {
-    return stepsTotal_ * dt_;
-  }
+  real getCurrentTime() const override { return stepsTotal_ * dt_; }
 
   CombiDataType analyticalSolution(const std::vector<real>& coords, int n = 0) const override {
     assert(n == 0);
     auto coordsCopy = coords;
     TestFn f;
-    return f(coordsCopy, stepsTotal_*dt_);
+    return f(coordsCopy, stepsTotal_ * dt_);
   }
 
  protected:
@@ -212,7 +210,7 @@ class TaskAdvection : public Task {
   bool initialized_;
   size_t stepsTotal_;
   DistributedFullGrid<CombiDataType>* dfg_;
-  static std::vector<CombiDataType>* phi_;
+  static typename DistributedFullGrid<CombiDataType>::TensorType* phi_;
 
   /**
    * The serialize function has to be extended by the new member variables.
@@ -234,6 +232,6 @@ class TaskAdvection : public Task {
   }
 };
 
-std::vector<CombiDataType>* TaskAdvection::phi_ = nullptr;
+typename DistributedFullGrid<CombiDataType>::TensorType* TaskAdvection::phi_ = nullptr;
 
 }  // namespace combigrid
diff --git a/examples/distributed_third_level/TaskAdvection.hpp b/examples/distributed_third_level/TaskAdvection.hpp
index fb0d1ded9..5ee3f9e42 100644
--- a/examples/distributed_third_level/TaskAdvection.hpp
+++ b/examples/distributed_third_level/TaskAdvection.hpp
@@ -1,8 +1,8 @@
 #pragma once
 
+
 #include "fullgrid/DistributedFullGrid.hpp"
 #include "task/Task.hpp"
-
 namespace combigrid {
 
 // exact solution
@@ -21,8 +21,9 @@ class TestFn {
     // leave out normalization, such that maximum is always 1
     return std::exp(exponent);  // / std::sqrt( std::pow(2*pi*sigmaSquared, coords.size()));
   }
+
  private:
-  static constexpr double sigmaSquaredInv_ = 1. / ((1./3.)*(1./3.));
+  static constexpr double sigmaSquaredInv_ = 1. / ((1. / 3.) * (1. / 3.));
 };
 
 class TaskAdvection : public Task {
@@ -30,8 +31,8 @@ class TaskAdvection : public Task {
   /* if the constructor of the base task class is not sufficient we can provide an
    * own implementation. here, we add dt, nsteps, and p as a new parameters.
    */
-  TaskAdvection(const LevelVector& l, const std::vector<BoundaryType>& boundary,
-                real coeff, LoadModel* loadModel, real dt, size_t nsteps,
+  TaskAdvection(const LevelVector& l, const std::vector<BoundaryType>& boundary, real coeff,
+                LoadModel* loadModel, real dt, size_t nsteps,
                 std::vector<int> p = std::vector<int>(0),
                 FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)})))
       : Task(l, boundary, coeff, loadModel, faultCrit),
@@ -59,7 +60,7 @@ class TaskAdvection : public Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(dim, l, lcomm, this->getBoundary(), p_, false,
                                                   decomposition);
     if (phi_ == nullptr) {
-      phi_ = new std::vector<CombiDataType>(dfg_->getNrLocalElements());
+      phi_ = new DistributedFullGrid<CombiDataType>::TensorType(dfg_->getLocalExtents());
     }
 
     std::vector<double> h = dfg_->getGridSpacing();
@@ -95,8 +96,7 @@ class TaskAdvection : public Task {
     std::vector<double> h = dfg_->getGridSpacing();
     const auto& fullOffsets = dfg_->getLocalOffsets();
 
-    phi_->resize(dfg_->getNrLocalElements());
-    std::fill(phi_->begin(), phi_->end(), 0.);
+    phi_->reshape(dfg_->getLocalExtents(), 0.);
 
     for (size_t i = 0; i < nsteps_; ++i) {
       // compute the gradient in the original dfg_, then update into phi_ and
@@ -122,7 +122,7 @@ class TaskAdvection : public Task {
           dfg_->getLocalVectorIndex(li, locAxisIndex);
           // TODO can be unrolled into ghost and other part, avoiding if-statement
           CombiDataType phi_neighbor = 0.;
-          if (locAxisIndex[d] == 0){
+          if (locAxisIndex[d] == 0) {
             assert(phi_ghost.size() > 0);
             // then use values from boundary exchange
             IndexType gli = 0;
@@ -132,10 +132,7 @@ class TaskAdvection : public Task {
             assert(gli > -1);
             assert(gli < phi_ghost.size());
             phi_neighbor = phi_ghost[gli];
-          } else{
-            // --locAxisIndex[d];
-            // IndexType lni = dfg_->getLocalLinearIndex(locAxisIndex);
-            // assert(lni == (li - fullOffsets[d]));
+          } else {
             phi_neighbor = dfg_->getElementVector()[li - fullOffsets[d]];
           }
           // calculate gradient of phi with backward differential quotient
@@ -144,10 +141,11 @@ class TaskAdvection : public Task {
           u_dot_dphi[li] += velocity[d] * dphi;
         }
       }
+      // update all values in phi_
       for (IndexType li = 0; li < dfg_->getNrLocalElements(); ++li) {
         (*phi_)[li] = dfg_->getElementVector()[li] - u_dot_dphi[li] * dt_;
       }
-      phi_->swap(dfg_->getElementVector());
+      std::swap(*phi_, dfg_->getElementVector());
     }
     stepsTotal_ += nsteps_;
 
@@ -161,7 +159,8 @@ class TaskAdvection : public Task {
    * solution on one process and then converting it to the full grid representation.
    * the DistributedFullGrid class offers a convenient function to do this.
    */
-  void getFullGrid(FullGrid<CombiDataType>& fg, RankType r, CommunicatorType lcomm, int n = 0) override {
+  void getFullGrid(FullGrid<CombiDataType>& fg, RankType r, CommunicatorType lcomm,
+                   int n = 0) override {
     assert(fg.getLevels() == dfg_->getLevels());
 
     dfg_->gatherFullGrid(fg, r);
@@ -181,15 +180,13 @@ class TaskAdvection : public Task {
     phi_ = nullptr;
   }
 
-  real getCurrentTime() const override {
-    return stepsTotal_ * dt_;
-  }
+  real getCurrentTime() const override { return stepsTotal_ * dt_; }
 
   CombiDataType analyticalSolution(const std::vector<real>& coords, int n = 0) const override {
     assert(n == 0);
     auto coordsCopy = coords;
     TestFn f;
-    return f(coordsCopy, stepsTotal_*dt_);
+    return f(coordsCopy, stepsTotal_ * dt_);
   }
 
  protected:
@@ -212,7 +209,7 @@ class TaskAdvection : public Task {
   bool initialized_;
   size_t stepsTotal_;
   DistributedFullGrid<CombiDataType>* dfg_;
-  static std::vector<CombiDataType>* phi_;
+  static typename DistributedFullGrid<CombiDataType>::TensorType* phi_;
 
   /**
    * The serialize function has to be extended by the new member variables.
@@ -234,6 +231,6 @@ class TaskAdvection : public Task {
   }
 };
 
-std::vector<CombiDataType>* TaskAdvection::phi_ = nullptr;
+typename DistributedFullGrid<CombiDataType>::TensorType* TaskAdvection::phi_ = nullptr;
 
 }  // namespace combigrid
diff --git a/examples/distributed_third_level/TaskConst.hpp b/examples/distributed_third_level/TaskConst.hpp
index 7e04b676e..9f874ccc8 100644
--- a/examples/distributed_third_level/TaskConst.hpp
+++ b/examples/distributed_third_level/TaskConst.hpp
@@ -38,7 +38,7 @@ class TaskConst : public combigrid::Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(),
                                                   p, false, decomposition);
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = 10;
     }
@@ -49,7 +49,7 @@ class TaskConst : public combigrid::Task {
 
     // std::cout << "run " << getCommRank(lcomm) << std::endl;    
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = getLevelVector()[0] / (double)getLevelVector()[1];
       // std::cout << "e " << element << std::endl;
diff --git a/examples/selalib_distributed/src/SelalibTask.hpp b/examples/selalib_distributed/src/SelalibTask.hpp
index f65968908..8fff8e6cb 100644
--- a/examples/selalib_distributed/src/SelalibTask.hpp
+++ b/examples/selalib_distributed/src/SelalibTask.hpp
@@ -299,7 +299,7 @@ class SelalibTask : public combigrid::Task {
 
   void setDFGfromLocalDistribution() {
 #ifndef NDEBUG
-    auto& localDFGSize = dfg_->getLocalSizes();
+    auto& localDFGSize = dfg_->getLocalExtents();
     assert(dim_ == 6);
     // std::cout << localDFGSize << " " << std::vector<int>(localSize_.begin(), localSize_.end()) <<
     // std::endl;
diff --git a/src/fullgrid/DistributedFullGrid.hpp b/src/fullgrid/DistributedFullGrid.hpp
index 883264d26..fdb406593 100644
--- a/src/fullgrid/DistributedFullGrid.hpp
+++ b/src/fullgrid/DistributedFullGrid.hpp
@@ -7,6 +7,8 @@
 #include <numeric>
 #include <string>
 
+#include <boost/numeric/ublas/tensor.hpp>
+
 #include "fullgrid/FullGrid.hpp"
 #include "fullgrid/SliceIterator.hpp"
 #include "mpi/MPICartesianUtils.hpp"
@@ -81,6 +83,11 @@ static inline std::vector<IndexVector> getDefaultDecomposition(
 template <typename FG_ELEMENT>
 class DistributedFullGrid {
  public:
+  // cf. https://en.wikipedia.org/wiki/Row-_and_column-major_order#Address_calculation_in_general
+  // -> column-major order
+  using TensorType = boost::numeric::ublas::tensor<FG_ELEMENT, boost::numeric::ublas::first_order,
+                                                   std::vector<FG_ELEMENT>>;
+
   /** dimension adaptive Ctor */
   DistributedFullGrid(DimType dim, const LevelVector& levels, CommunicatorType comm,
                       const std::vector<BoundaryType>& hasBdrPoints, const std::vector<int>& procs,
@@ -94,7 +101,7 @@ class DistributedFullGrid {
 
     InitMPI(comm, procs);  // will also check grids per dim
 
-    // set global num of elements and offsets
+    // set global num of elements and other properties, for the whole grid
     nrElements_ = 1;
     offsets_.resize(dim_);
     nrPoints_.resize(dim_);
@@ -117,23 +124,52 @@ class DistributedFullGrid {
     } else {
       setDecomposition(decomposition);
     }
-    myPartitionsLowerBounds_ = getLowerBounds(rank_);
-    myPartitionsUpperBounds_ = getUpperBounds(rank_);
 
-    // set local elements and local offsets
-    nrLocalPoints_ = getUpperBounds() - getLowerBounds();
 
+    // set local elements and properties, for our partition of the grid only
+    myPartitionsLowerBounds_ = getLowerBounds(rank_);
+    myPartitionsUpperBounds_ = getUpperBounds(rank_);
+    auto localExtents = myPartitionsUpperBounds_ - myPartitionsLowerBounds_;
+    std::vector<size_t> nrLocalPoints(localExtents.begin(), localExtents.end());
     nrLocalElements_ = 1;
     localOffsets_.resize(dim);
-    // cf. https://en.wikipedia.org/wiki/Row-_and_column-major_order#Address_calculation_in_general
-    // -> column-major order
     for (DimType j = 0; j < dim_; ++j) {
       localOffsets_[j] = nrLocalElements_;
-      nrLocalElements_ *= nrLocalPoints_[j];
+      nrLocalElements_ *= nrLocalPoints[j];
+    }
+    // see if we can have templated dimensions, but selected at runtime
+    if (dim_ == 1) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0]};
+    } else if (dim_ == 2) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0],
+                                                                   nrLocalPoints[1]};
+    } else if (dim_ == 3) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0],
+                                                                   nrLocalPoints[1],
+                                                                   nrLocalPoints[2]};
+    } else if (dim_ == 4) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0],
+                                                                   nrLocalPoints[1],
+                                                                   nrLocalPoints[2],
+                                                                   nrLocalPoints[3]};
+    } else if (dim_ == 5) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0],
+                                                                   nrLocalPoints[1],
+                                                                   nrLocalPoints[2],
+                                                                   nrLocalPoints[3],
+                                                                   nrLocalPoints[4]};
+    } else if (dim_ == 6) {
+      fullgridTensor_ = TensorType{nrLocalPoints[0],
+                                                                   nrLocalPoints[1],
+                                                                   nrLocalPoints[2],
+                                                                   nrLocalPoints[3],
+                                                                   nrLocalPoints[4],
+                                                                   nrLocalPoints[5]};
+    } else {
+      throw std::runtime_error("DistributedFullGrid: dimension not (yet) supported, plz add");
     }
 
-    // in contrast to serial implementation we directly create the grid
-    fullgridVector_.resize(nrLocalElements_);
+    assert(fullgridTensor_.size() == nrLocalElements_);
   }
 
   // explicit DistributedFullGrid(const DistributedFullGrid& other) {
@@ -145,7 +181,7 @@ class DistributedFullGrid {
   //   }
   // }
 
-  // copy construction would need to duplicate communicator_
+  // copy construction would need to duplicate communicator_, so we delete it
   DistributedFullGrid(const DistributedFullGrid& other) = delete;
   DistributedFullGrid& operator=(const DistributedFullGrid&) = delete;
   DistributedFullGrid(DistributedFullGrid&& other) = default;
@@ -166,7 +202,8 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
  public:
   SubarrayIterator(const MPI_Datatype& subarrayType, DistributedFullGrid& dfg) : SliceIterator<FG_ELEMENT>() {
     this->offsets_ = dfg.getLocalOffsets();
-    this->dataPointer_ = &(dfg.getElementVector());
+    this->dataPointer_ = dfg.getElementVector().data();
+    this->dataSize_ = dfg.getElementVector().size();
 
     // read out the subarray informaiton
     MPI_Datatype tmp;
@@ -190,11 +227,11 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     assert(dim == this->getDimension());
     // check that sizes match
     for (DimType d = 0; d < this->getDimension() - 1; ++d) {
-      assert(sizes[d] == static_cast<int>(dfg.getLocalSizes()[d]));
+      assert(sizes[d] == static_cast<int>(dfg.getLocalExtents()[d]));
       assert(this->subsizes_[d] + this->starts_[d] <= sizes[d]);
     }
     assert(std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<int>()) ==
-           this->dataPointer_->size());
+           this->dataSize_);
 
     this->setEndIndex();
     this->validateSizes();
@@ -252,7 +289,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
       return evalLocalIndexOn(localIndex, coords);
     } else {
       FG_ELEMENT sum = 0.;
-      auto lastIndexInDim = this->getLocalSizes()[dim] - 1;
+      auto lastIndexInDim = this->getLocalExtents()[dim] - 1;
       if (localIndex[dim] >= 0 && localIndex[dim] <= lastIndexInDim) {
         sum += evalMultiindexRecursively(localIndex, static_cast<DimType>(dim + 1), coords);
       } else if (localIndex[dim] > lastIndexInDim &&
@@ -330,7 +367,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
                  this->getCartesianUtils().isOnLowerBoundaryInDimension(d)) {
         // if we have periodic boundary and this process is at the lower end of the dimension d
         // we need the periodic coordinate => do nothing
-      } else if (localIndexLowerNonzeroNeighborPoint[d] > this->getLocalSizes()[d] - 1) {
+      } else if (localIndexLowerNonzeroNeighborPoint[d] > this->getLocalExtents()[d] - 1) {
         // index too high
         value = 0.;
         return;
@@ -579,10 +616,10 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   /** returns the dimension of the full grid */
   inline DimType getDimension() const { return dim_; }
 
-  /** the getters for the full grid vector */
-  inline std::vector<FG_ELEMENT>& getElementVector() { return fullgridVector_; }
+  /** the getters for the full grid tensor */
+  inline TensorType& getElementVector() { return fullgridTensor_; }
 
-  inline const std::vector<FG_ELEMENT>& getElementVector() const { return fullgridVector_; }
+  inline const TensorType& getElementVector() const { return fullgridTensor_; }
 
   /** return the offset in the full grid vector of the dimension */
   inline IndexType getOffset(DimType i) const { return offsets_[i]; }
@@ -620,9 +657,9 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     std::fill(this->getElementVector().begin(), this->getElementVector().end(), 0.);
   }
 
-  inline FG_ELEMENT* getData() { return &fullgridVector_[0]; }
+  inline FG_ELEMENT* getData() { return &fullgridTensor_[0]; }
 
-  inline const FG_ELEMENT* getData() const { return &fullgridVector_[0]; }
+  inline const FG_ELEMENT* getData() const { return &fullgridTensor_[0]; }
 
   /** MPI Communicator*/
   inline CommunicatorType getCommunicator() const { return communicator_; }
@@ -904,7 +941,11 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   }
 
   // return extents of local grid
-  inline const IndexVector& getLocalSizes() const { return nrLocalPoints_; }
+  inline IndexVector getLocalSizes() const {
+    // throw std::runtime_error("getLocalSizes() may be horribly slow if used often, use
+    // getLocalExtents() instead");
+    return IndexVector(getLocalExtents().begin(), getLocalExtents().end());
+  }
 
   // return extents of global grid
   inline const IndexVector& getGlobalSizes() const { return nrPoints_; }
@@ -1037,7 +1078,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
         auto sPointer = dsg.getData(sIndex);
         subspaceIndices = getFGPointsOfSubspace(level);
         for (const auto& fIndex : subspaceIndices) {
-          fullgridVector_[fIndex] = *sPointer;
+          fullgridTensor_[fIndex] = *sPointer;
           ++sPointer;
         }
       }
@@ -1092,9 +1133,9 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   inline IndexType getNumPointsOnThisPartition(DimType d, IndexType localStart,
                                                IndexType strideForThisLevel) const {
     assert(d < this->getDimension());
-    return (nrLocalPoints_[d] - 1 < localStart)
+    return (getLocalExtents()[d] - 1 < localStart)
                ? 0
-               : (nrLocalPoints_[d] - 1 - localStart) / strideForThisLevel + 1;
+               : (getLocalExtents()[d] - 1 - localStart) / strideForThisLevel + 1;
   }
 
   inline IndexType getNumPointsOnThisPartition(LevelType l, DimType d) const {
@@ -1185,7 +1226,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   inline real normalizelp(int p) {
     real norm = getLpNorm(p);
 
-    std::vector<FG_ELEMENT>& data = getElementVector();
+    auto& data = getElementVector();
     for (size_t i = 0; i < data.size(); ++i) data[i] = data[i] / norm;
 
     return norm;
@@ -1193,7 +1234,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
 
   // multiply with a constant
   inline void mul(real c) {
-    std::vector<FG_ELEMENT>& data = getElementVector();
+    auto& data = getElementVector();
     for (size_t i = 0; i < data.size(); ++i) data[i] *= c;
   }
 
@@ -1354,14 +1395,14 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     // set lower bounds of subarray
     IndexVector subarrayLowerBounds = this->getLowerBounds();
     IndexVector subarrayUpperBounds = this->getUpperBounds();
-    subarrayLowerBounds[d] += this->getLocalSizes()[d] - 1;
+    subarrayLowerBounds[d] += this->getLocalExtents()[d] - 1;
 
     auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds;
     assert(subarrayExtents[d] == 1);
     auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds();
 
     // create MPI datatype
-    std::vector<int> sizes(this->getLocalSizes().begin(), this->getLocalSizes().end());
+    std::vector<int> sizes(this->getLocalExtents().begin(), this->getLocalExtents().end());
     std::vector<int> subsizes(subarrayExtents.begin(), subarrayExtents.end());
     // the starts are local indices
     std::vector<int> starts(subarrayStarts.begin(), subarrayStarts.end());
@@ -1390,7 +1431,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     // set upper bounds of subarray
     IndexVector subarrayLowerBounds = this->getLowerBounds();
     IndexVector subarrayUpperBounds = this->getUpperBounds();
-    subarrayUpperBounds[d] -= this->getLocalSizes()[d] - 1;
+    subarrayUpperBounds[d] -= this->getLocalExtents()[d] - 1;
 
     auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds;
     assert(subarrayExtents[d] == 1);
@@ -1398,7 +1439,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
 
     // create MPI datatype
     // also, the data dimensions are reversed
-    std::vector<int> sizes(this->getLocalSizes().begin(), this->getLocalSizes().end());
+    std::vector<int> sizes(this->getLocalExtents().begin(), this->getLocalExtents().end());
     std::vector<int> subsizes(subarrayExtents.begin(), subarrayExtents.end());
     // the starts are local indices
     std::vector<int> starts(subarrayStarts.begin(), subarrayStarts.end());
@@ -1573,7 +1614,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
 
     // create recvbuffer
     auto numElements = std::accumulate(subarrayExtents.begin(), subarrayExtents.end(), 1,
-                                       std::multiplies<IndexType>());
+                                       std::multiplies<>());
     if (lower < 0) {
       numElements = 0;
       subarrayExtents = IndexVector(this->getDimension(), 0);
@@ -1755,8 +1796,10 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     return values;
   }
 
-  const MPICartesianUtils& getCartesianUtils() const {
-    return cartesianUtils_;
+  const MPICartesianUtils& getCartesianUtils() const { return cartesianUtils_; }
+
+  const typename TensorType::extents_type& getLocalExtents() const {
+    return fullgridTensor_.extents();
   }
 
  private:
@@ -1794,17 +1837,17 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   IndexVector myPartitionsUpperBounds_;
 
   /** the full grid vector, this contains the elements of the full grid */
-  std::vector<FG_ELEMENT> fullgridVector_;
+  TensorType fullgridTensor_;
 
   /** Variables for the distributed Full Grid*/
   /** Cartesien MPI Communicator  */
   CommunicatorType communicator_;
 
- /**
-  * the decomposition of the full grid over processors
-  * contains (for every dimension) the grid point indices at
-  * which a process boundary is assumed
-  */
+  /**
+   * the decomposition of the full grid over processors
+   * contains (for every dimension) the grid point indices at
+   * which a process boundary is assumed
+   */
   std::vector<IndexVector> decomposition_;
 
   /** utility to get info about cartesian communicator  */
@@ -1820,9 +1863,6 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   std::vector<MPI_Datatype> downwardSubarrays_;
   std::vector<MPI_Datatype> upwardSubarrays_;
 
-  /** number of local (in this grid cell) points per axis*/
-  IndexVector nrLocalPoints_;
-
   /**
    * @brief sets the MPI-related members rank_, size_, communicator_, and cartesianUtils_
    *
@@ -1939,11 +1979,11 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
     assert(dim_ == 2);
 
     // loop over rows i -> dim2
-    for (IndexType i = 0; i < nrLocalPoints_[1]; ++i) {
+    for (IndexType i = 0; i < getLocalExtents()[1]; ++i) {
       IndexType offset = localOffsets_[1] * i;
 
-      for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) {
-        os << fullgridVector_[offset + j] << "\t";
+      for (IndexType j = 0; j < getLocalExtents()[0]; ++j) {
+        os << fullgridTensor_[offset + j] << "\t";
       }
 
       os << std::endl;
@@ -1954,8 +1994,8 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
   void print1D(std::ostream& os) const {
     assert(dim_ == 1);
 
-    for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) {
-      os << fullgridVector_[j] << "\t";
+    for (IndexType j = 0; j < getLocalExtents()[0]; ++j) {
+      os << fullgridTensor_[j] << "\t";
     }
 
     os << std::endl;
@@ -1963,7 +2003,7 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
 
   // n-dim output. will print 2-dimensional slices of domain
   void print3D(std::ostream& os) const {
-    for (IndexType k = 0; k < nrLocalPoints_[2]; ++k) {
+    for (IndexType k = 0; k < getLocalExtents()[2]; ++k) {
       assert(dim_ == 3);
 
 // output global z index
@@ -1971,11 +2011,11 @@ class SubarrayIterator : public SliceIterator<FG_ELEMENT> {
       IndexType offsetZ = localOffsets_[2] * k;
 
       // loop over rows i -> dim2
-      for (IndexType i = 0; i < nrLocalPoints_[1]; ++i) {
+      for (IndexType i = 0; i < getLocalExtents()[1]; ++i) {
         IndexType offset = offsetZ + localOffsets_[1] * i;
 
-        for (IndexType j = 0; j < nrLocalPoints_[0]; ++j) {
-          os << fullgridVector_[offset + j] << "\t";
+        for (IndexType j = 0; j < getLocalExtents()[0]; ++j) {
+          os << fullgridTensor_[offset + j] << "\t";
         }
 
         os << std::endl;
diff --git a/src/fullgrid/MultiArray.hpp b/src/fullgrid/MultiArray.hpp
index 8befc84c8..3b9c8e1cb 100644
--- a/src/fullgrid/MultiArray.hpp
+++ b/src/fullgrid/MultiArray.hpp
@@ -16,7 +16,7 @@ using MultiArrayRef = boost::multi_array_ref<T, D>;
 template <typename T, size_t D>
 static MultiArrayRef<T, D> createMultiArrayRef(DistributedFullGrid<T>& dfg) {
   /* note that the sizes of dfg are in reverse order compared to shape */
-  std::vector<size_t> shape(dfg.getLocalSizes().rbegin(), dfg.getLocalSizes().rend());
+  std::vector<size_t> shape(dfg.getLocalExtents().rbegin(), dfg.getLocalExtents().rend());
   if (!reverseOrderingDFGPartitions) {
     assert(false && "this is not adapted to normal ordering of DFG partitions yet");
   }
diff --git a/src/fullgrid/SliceIterator.hpp b/src/fullgrid/SliceIterator.hpp
index 30c57f0e1..ff148ad37 100644
--- a/src/fullgrid/SliceIterator.hpp
+++ b/src/fullgrid/SliceIterator.hpp
@@ -4,6 +4,8 @@
 #include "utils/IndexVector.hpp"
 #include "utils/Types.hpp"
 
+// template <typename FG_ELEMENT>
+// using SliceIterator = typename boost::numeric::ublas::tensor<FG_ELEMENT>::iterator;
 namespace combigrid {
 template <typename FG_ELEMENT>
 class SliceIterator {
@@ -16,12 +18,13 @@ class SliceIterator {
   using reference = FG_ELEMENT&;
 
   SliceIterator(const std::vector<int>& subsizes, const std::vector<int>& starts,
-                const IndexVector& offsets, std::vector<FG_ELEMENT>* dataPointer)
+                const IndexVector& offsets, std::vector<FG_ELEMENT>& data)
       : currentLocalIndex_(starts),
         subsizes_(subsizes),
         starts_(starts),
         offsets_(offsets),
-        dataPointer_(dataPointer) {
+        dataPointer_(data.data(),
+        dataSize_(data.size())) {
     this->setEndIndex();
     this->validateSizes();
   }
@@ -36,28 +39,28 @@ class SliceIterator {
   reference operator*() const {
 #ifndef NDEBUG
     // make sure to only dereference when we actually have the data mapped
-    if (linearize(currentLocalIndex_) > dataPointer_->size()) {
+    if (linearize(currentLocalIndex_) > dataSize_) {
       std::cout << " subsizes " << subsizes_ << " starts " << starts_ << " end " << endIndex()
                 << std::endl;
       std::cout << " ref waah currentLocalIndex_" << currentLocalIndex_ << " linearized "
-                << linearize(currentLocalIndex_) << " numElements " << dataPointer_->size()
+                << linearize(currentLocalIndex_) << " numElements " << dataSize_
                 << std::endl;
     }
-    assert(linearize(currentLocalIndex_) <= dataPointer_->size());
+    assert(linearize(currentLocalIndex_) <= dataSize_);
     assert(linearize(currentLocalIndex_) < endIndex());
 #endif  // ndef NDEBUG
-    return (*dataPointer_)[linearize(currentLocalIndex_)];
+    return dataPointer_[linearize(currentLocalIndex_)];
   }
 
-  pointer operator->() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); }
-  pointer getPointer() const { return &((*dataPointer_)[linearize(currentLocalIndex_)]); }
+  pointer operator->() const { return &(dataPointer_[linearize(currentLocalIndex_)]); }
+  pointer getPointer() const { return &(dataPointer_[linearize(currentLocalIndex_)]); }
 
   const std::vector<int>& getVecIndex() const { return currentLocalIndex_; }
   IndexType getIndex() const { return linearize(currentLocalIndex_); }
 
   const SliceIterator& operator++() {
 #ifndef NDEBUG
-    assert(linearize(currentLocalIndex_) <= dataPointer_->size());
+    assert(linearize(currentLocalIndex_) <= dataSize_);
     auto cpLocalIdx = currentLocalIndex_;
     auto idxbefore = linearize(currentLocalIndex_);
 #endif  // ndef NDEBUG
@@ -94,8 +97,8 @@ class SliceIterator {
   friend bool operator!=(const SliceIterator& a, const SliceIterator& b) {
     return a.currentLocalIndex_ != b.currentLocalIndex_;
   };
-  pointer begin() { return &((*dataPointer_)[firstIndex()]); }
-  pointer end() { return &((*dataPointer_)[endIndex()]); }
+  pointer begin() { return &(dataPointer_[firstIndex()]); }
+  pointer end() { return &(dataPointer_[endIndex()]); }
   bool isAtEnd() const { return (linearize(currentLocalIndex_) == endIndex()); }
   int size() const {
     return std::accumulate(subsizes_.begin(), subsizes_.end(), 1, std::multiplies<int>());
@@ -127,8 +130,8 @@ class SliceIterator {
   void validateSizes() {
     assert(offsets_[0] == 1);
     assert(std::accumulate(this->subsizes_.begin(), this->subsizes_.end(), 1,
-                           std::multiplies<int>()) <= this->dataPointer_->size());
-    assert(linearize(this->currentLocalIndex_) <= this->dataPointer_->size());
+                           std::multiplies<int>()) <= this->dataSize_);
+    assert(linearize(this->currentLocalIndex_) <= this->dataSize_);
     assert(currentLocalIndex_.size() == this->getDimension());
     assert(subsizes_.size() == this->getDimension());
     assert(starts_.size() == this->getDimension());
@@ -138,7 +141,8 @@ class SliceIterator {
   std::vector<int> subsizes_;
   std::vector<int> starts_;
   IndexVector offsets_;
-  std::vector<FG_ELEMENT>* dataPointer_;
+  FG_ELEMENT* dataPointer_;
+  size_t dataSize_;
   IndexType linearEndIndex_ = -1;
 };
 }  // namespace combigrid
\ No newline at end of file
diff --git a/src/hierarchization/DistributedHierarchization.hpp b/src/hierarchization/DistributedHierarchization.hpp
index 6e27ed20d..7b4c0a3ee 100644
--- a/src/hierarchization/DistributedHierarchization.hpp
+++ b/src/hierarchization/DistributedHierarchization.hpp
@@ -184,7 +184,7 @@ void sendAndReceiveIndices(const std::map<RankType, std::set<IndexType>>& send1d
       MPI_Datatype mysubarray;
       {
         // sizes of local grid
-        std::vector<int> sizes(dfg.getLocalSizes().begin(), dfg.getLocalSizes().end());
+        std::vector<int> sizes(dfg.getLocalExtents().begin(), dfg.getLocalExtents().end());
 
         // sizes of subarray ( full size except dimension d )
         std::vector<int> subsizes = sizes;
@@ -274,7 +274,7 @@ void sendAndReceiveIndicesBlock(const std::map<RankType, std::set<IndexType>>& s
   MPI_Datatype mysubarray;
   {
     // sizes of local grid
-    std::vector<int> sizes(dfg.getLocalSizes().begin(), dfg.getLocalSizes().end());
+    std::vector<int> sizes(dfg.getLocalExtents().begin(), dfg.getLocalExtents().end());
 
     // sizes of subarray ( full size except dimension d )
     std::vector<int> subsizes = sizes;
@@ -1036,7 +1036,7 @@ void hierarchizeWithBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   auto lmax = dfg.getLevels()[dim];
   auto size = dfg.getNrLocalElements();
   auto stride = dfg.getLocalOffsets()[dim];
-  auto ndim = dfg.getLocalSizes()[dim];
+  auto ndim = dfg.getLocalExtents()[dim];
   IndexType jump = stride * ndim;
   IndexType nbrOfPoles = size / ndim;
 
@@ -1050,7 +1050,7 @@ void hierarchizeWithBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1;
   tmp.resize(dfg.getGlobalSizes()[dim] + (oneSidedBoundary ? 1 : 0),
              std::numeric_limits<double>::quiet_NaN());
-  std::vector<FG_ELEMENT>& ldata = dfg.getElementVector();
+  auto& ldata = dfg.getElementVector();
   lldiv_t divresult;
   IndexType start;
   IndexType gstart = dfg.getLowerBounds()[dim];
@@ -1107,7 +1107,7 @@ void hierarchizeNoBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   LevelType lmax = dfg.getLevels()[dim];
   IndexType size = dfg.getNrLocalElements();
   IndexType stride = dfg.getLocalOffsets()[dim];
-  IndexType ndim = dfg.getLocalSizes()[dim];
+  IndexType ndim = dfg.getLocalExtents()[dim];
   IndexType jump = stride * ndim;
   IndexType nbrOfPoles = size / ndim;
 
@@ -1115,7 +1115,7 @@ void hierarchizeNoBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
 
   // loop over poles
   std::vector<FG_ELEMENT> tmp(dfg.getGlobalSizes()[dim], std::numeric_limits<double>::quiet_NaN());
-  std::vector<FG_ELEMENT>& ldata = dfg.getElementVector();
+  auto& ldata = dfg.getElementVector();
   lldiv_t divresult;
   IndexType start;
   IndexType gstart = dfg.getLowerBounds()[dim];
@@ -1190,7 +1190,7 @@ void dehierarchizeWithBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   const auto& lmax = dfg.getLevels()[dim];
   const auto& size = dfg.getNrLocalElements();
   const auto& stride = dfg.getLocalOffsets()[dim];
-  const auto& ndim = dfg.getLocalSizes()[dim];
+  const auto& ndim = dfg.getLocalExtents()[dim];
   IndexType jump = stride * ndim;
   IndexType nbrOfPoles = size / ndim;
 
@@ -1204,7 +1204,7 @@ void dehierarchizeWithBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1;
   tmp.resize(dfg.getGlobalSizes()[dim] + (oneSidedBoundary ? 1 : 0),
              std::numeric_limits<double>::quiet_NaN());
-  std::vector<FG_ELEMENT>& ldata = dfg.getElementVector();
+  auto& ldata = dfg.getElementVector();
   lldiv_t divresult;
   IndexType start;
   IndexType gstart = dfg.getLowerBounds()[dim];
@@ -1257,7 +1257,7 @@ void dehierarchizeNoBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   auto lmax = dfg.getLevels()[dim];
   auto size = dfg.getNrLocalElements();
   auto stride = dfg.getLocalOffsets()[dim];
-  auto ndim = dfg.getLocalSizes()[dim];
+  auto ndim = dfg.getLocalExtents()[dim];
   IndexType jump = stride * ndim;
   IndexType nbrOfPoles = size / ndim;
 
@@ -1266,7 +1266,7 @@ void dehierarchizeNoBoundary(DistributedFullGrid<FG_ELEMENT>& dfg,
   // loop over poles
   static std::vector<FG_ELEMENT> tmp;
   tmp.resize(dfg.getGlobalSizes()[dim], std::numeric_limits<double>::quiet_NaN());
-  std::vector<FG_ELEMENT>& ldata = dfg.getElementVector();
+  auto& ldata = dfg.getElementVector();
   lldiv_t divresult;
   IndexType start;
   IndexType gstart = dfg.getLowerBounds()[dim];
diff --git a/tests/TaskConst.hpp b/tests/TaskConst.hpp
index 6fd263f78..567e22577 100644
--- a/tests/TaskConst.hpp
+++ b/tests/TaskConst.hpp
@@ -40,7 +40,7 @@ class TaskConst : public combigrid::Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(),
                                                   p, false, decomposition);
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = 10;
     }
@@ -51,7 +51,7 @@ class TaskConst : public combigrid::Task {
 
     // std::cout << "run " << getCommRank(lcomm) << std::endl;    
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = getLevelVector()[0] / (double)getLevelVector()[1];
       // std::cout << "e " << element << std::endl;
diff --git a/tests/TaskCount.hpp b/tests/TaskCount.hpp
index a7cb75cf2..e8a49107b 100644
--- a/tests/TaskCount.hpp
+++ b/tests/TaskCount.hpp
@@ -49,7 +49,7 @@ class TaskCount : public combigrid::Task {
     }
     dfg_ = new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(), p, false, decomposition);
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = -0.;
     }
diff --git a/tests/test_distributedfullgrid.cpp b/tests/test_distributedfullgrid.cpp
index 555aba321..85d82aef4 100644
--- a/tests/test_distributedfullgrid.cpp
+++ b/tests/test_distributedfullgrid.cpp
@@ -169,8 +169,11 @@ void checkDistributedFullgrid(LevelVector& levels, std::vector<int>& procs,
   TestFn f;
   const auto dim = static_cast<DimType>(levels.size());
 
-  // create dfg
+  BOOST_TEST_CHECKPOINT("create dfg");
   DistributedFullGrid<std::complex<double>> dfg(dim, levels, comm, boundary, procs, forward);
+  BOOST_CHECK(dfg.getNrElements() > 0);
+  BOOST_CHECK(dfg.getNrLocalElements() <= dfg.getNrElements());
+  BOOST_CHECK(dfg.getLocalExtents().size() == dim);
 
   // set function values
   for (IndexType li = 0; li < dfg.getNrLocalElements(); ++li) {
diff --git a/tests/test_ftolerance.cpp b/tests/test_ftolerance.cpp
index 42fd59630..20ec52131 100644
--- a/tests/test_ftolerance.cpp
+++ b/tests/test_ftolerance.cpp
@@ -73,7 +73,7 @@ class TaskAdvFDM : public combigrid::Task {
 
     dfg_ =
         new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(), p);
-    phi_.resize(dfg_->getNrElements());
+    phi_.reshape(dfg_->getLocalExtents(), 0.);
 
     for (IndexType li = 0; li < dfg_->getNrElements(); ++li) {
       std::vector<double> coords(getDim());
@@ -102,7 +102,7 @@ class TaskAdvFDM : public combigrid::Task {
     double h1 = 1.0 / (double)l1;
 
     for (size_t i = 0; i < nsteps_; ++i) {
-      phi_.swap(dfg_->getElementVector());
+      std::swap(phi_, dfg_->getElementVector());
 
       for (IndexType li = 0; li < dfg_->getNrElements(); ++li) {
 
@@ -161,7 +161,7 @@ class TaskAdvFDM : public combigrid::Task {
   size_t stepsTotal_;
   size_t combiStep_;
   DistributedFullGrid<CombiDataType>* dfg_;
-  std::vector<CombiDataType> phi_;
+  typename DistributedFullGrid<CombiDataType>::TensorType phi_;
 
   template <class Archive>
   void serialize(Archive& ar, const unsigned int version) {
diff --git a/tests/test_hierarchization.cpp b/tests/test_hierarchization.cpp
index f08f53b7f..dab2b0ded 100644
--- a/tests/test_hierarchization.cpp
+++ b/tests/test_hierarchization.cpp
@@ -222,7 +222,9 @@ real checkConservationOfMomentum(DistributedFullGrid<FG_ELEMENT>& dfg,
   //  mostly stolen from dfg::getCornersValues
   auto corners = dfgOne->getCornersGlobalVectorIndices();
   std::sort(corners.begin(), corners.end());
-  std::vector<FG_ELEMENT> values;
+  typename DistributedFullGrid<FG_ELEMENT>::TensorType::extents_type extents(
+      std::vector<size_t>(dfgOne->getDimension(), 2));
+  typename DistributedFullGrid<FG_ELEMENT>::TensorType values(extents);
   std::vector<IndexType> localCornerIndices;
   for (size_t cornerNo = 0; cornerNo < corners.size(); ++cornerNo) {
     if (dfgOne->isGlobalIndexHere(corners[cornerNo])) {
@@ -236,13 +238,15 @@ real checkConservationOfMomentum(DistributedFullGrid<FG_ELEMENT>& dfg,
   }
   // make sure corner values are in right order
   std::sort(localCornerIndices.begin(), localCornerIndices.end());
+  size_t valuesIndex = 0;
   for (const auto& index : localCornerIndices) {
-    values.push_back(dfgOne->getElementVector()[index]);
+    values[valuesIndex] = dfgOne->getElementVector()[index];
+    ++valuesIndex;
   }
 
   auto dfgZero = std::unique_ptr<DistributedFullGrid<FG_ELEMENT>>(
       new DistributedFullGrid<FG_ELEMENT>(dim, lmin, comm, boundary, procs));
-  BOOST_CHECK(values.size() == dfgZero->getElementVector().size());
+  BOOST_CHECK(valuesIndex == dfgZero->getElementVector().size());
   dfgZero->getElementVector() = values;
 
   // no need to dehierarchize, is nodal/scaling function on coarsest grid anyways
@@ -633,7 +637,7 @@ BOOST_AUTO_TEST_CASE(test_exchangeData1d, *boost::unit_test::timeout(20)) {
             if (procs[d] > 1) {
               // check that all indices from other ranks along the pole are present
               BOOST_CHECK_EQUAL(remoteDataAll.size(),
-                                dfg.getGlobalSizes()[d] - dfg.getLocalSizes()[d]);
+                                dfg.getGlobalSizes()[d] - dfg.getLocalExtents()[d]);
             }
             MPI_Barrier(comm);
             TestHelper::testStrayMessages(comm);
diff --git a/tests/test_reduce.cpp b/tests/test_reduce.cpp
index e28d13ed4..9795e4179 100644
--- a/tests/test_reduce.cpp
+++ b/tests/test_reduce.cpp
@@ -60,7 +60,7 @@ class TaskConst : public combigrid::Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(),
                                                   p, false, decomposition);
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = 10;
     }
@@ -70,7 +70,7 @@ class TaskConst : public combigrid::Task {
 
     std::cout << "run " << getCommRank(lcomm) << std::endl;
     
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       // BOOST_CHECK(abs(dfg_->getData()[li]));
       element = static_cast<double>(getLevelVector()[0]) / getLevelVector()[1];
diff --git a/tests/test_rescheduling.cpp b/tests/test_rescheduling.cpp
index 2ad00a4b6..c661636f9 100644
--- a/tests/test_rescheduling.cpp
+++ b/tests/test_rescheduling.cpp
@@ -86,7 +86,7 @@ class TestingTask : public combigrid::Task {
     dfg_ = new DistributedFullGrid<CombiDataType>(getDim(), getLevelVector(), lcomm, getBoundary(),
                                                   p, false, decomposition);
 
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = 0; // default state is 0
     }
@@ -98,7 +98,7 @@ class TestingTask : public combigrid::Task {
 
     // std::cout << "run " << getCommRank(lcomm) << std::endl;
     
-    std::vector<CombiDataType>& elements = dfg_->getElementVector();
+    auto& elements = dfg_->getElementVector();
     for (auto& element : elements) {
       element = 10; // after run was executed the state is 10
     }
@@ -251,7 +251,7 @@ void checkRescheduling(size_t ngroup = 1, size_t nprocs = 1) {
       BOOST_REQUIRE(tasksContainSameValue(pgroup.getTasks()));
 
       for (auto& t : pgroup.getTasks()) {
-        std::vector<CombiDataType>& elements = t->getDistributedFullGrid().getElementVector();
+        auto& elements = t->getDistributedFullGrid().getElementVector();
         for (auto e : elements) {
           // Elements need to be always equal to 10 because
           // * first run: run is executed