diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66c006a28b..455c688420 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: CCACHE_COMPRESS : true CCACHE_COMPRESSLEVEL : 6 OMPI_MCA_btl_vader_single_copy_mechanism : none - PARSEC_MCA_runtime_bind_threads : 0 + PARSEC_MCA_bind_threads : 0 BUILD_CONFIG : > -DMADNESS_TASK_BACKEND=${{ matrix.task_backend }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a1688f5862..735340c596 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,6 +38,7 @@ TiledArray/error.h TiledArray/initialize.h TiledArray/perm_index.h TiledArray/permutation.h +TiledArray/platform.h TiledArray/proc_grid.h TiledArray/range.h TiledArray/range1.h @@ -239,7 +240,6 @@ if(TILEDARRAY_HAS_HIP OR TILEDARRAY_HAS_CUDA) TiledArray/device/kernel/reduce_kernel.h TiledArray/device/kernel/thrust/mult_kernel.h TiledArray/device/kernel/thrust/reduce_kernel.h - TiledArray/device/platform.h TiledArray/device/thrust.h TiledArray/device/um_storage.h ) diff --git a/src/TiledArray/dense_shape.h b/src/TiledArray/dense_shape.h index 9ab1ccaf50..147f8ad960 100644 --- a/src/TiledArray/dense_shape.h +++ b/src/TiledArray/dense_shape.h @@ -27,7 +27,10 @@ #define TILEDARRAY_DENSE_SHAPE_H__INCLUDED #include + +#include #include + #include namespace madness { @@ -391,6 +394,11 @@ class DenseShape { std::numeric_limits::epsilon(); }; // class DenseShape +template +std::size_t size_of(const DenseShape& shape) { + return sizeof(shape); +} + constexpr inline bool operator==(const DenseShape& a, const DenseShape& b) { return true; } diff --git a/src/TiledArray/device/btas.h b/src/TiledArray/device/btas.h index b30fdd4edd..0fa0bd593b 100644 --- a/src/TiledArray/device/btas.h +++ b/src/TiledArray/device/btas.h @@ -37,9 +37,9 @@ #include #include -#include #include #include +#include namespace TiledArray { diff --git a/src/TiledArray/device/cpu_cuda_vector.h b/src/TiledArray/device/cpu_cuda_vector.h index d7a9ad1422..3742c6b01d 100644 --- a/src/TiledArray/device/cpu_cuda_vector.h +++ b/src/TiledArray/device/cpu_cuda_vector.h @@ -4,8 +4,8 @@ #include -#include #include +#include #include @@ -213,9 +213,9 @@ struct ArchiveLoadImpl> { static inline void load(const Archive& ar, TiledArray::cpu_cuda_vector& x) { typename TiledArray::cpu_cuda_vector::size_type n(0); - ar& n; + ar & n; x.resize(n); - for (auto& xi : x) ar& xi; + for (auto& xi : x) ar & xi; } }; @@ -223,8 +223,8 @@ template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const TiledArray::cpu_cuda_vector& x) { - ar& x.size(); - for (const auto& xi : x) ar& xi; + ar & x.size(); + for (const auto& xi : x) ar & xi; } }; diff --git a/src/TiledArray/device/um_storage.h b/src/TiledArray/device/um_storage.h index d91c032312..5ce3b8588e 100644 --- a/src/TiledArray/device/um_storage.h +++ b/src/TiledArray/device/um_storage.h @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index bb75523c93..58c774557b 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1743,6 +1743,20 @@ class DistArray : public madness::archive::ParallelSerializableObject { }; // class DistArray +/// \return the approximate number of bytes used by \p t in this rank's +/// memory space `S` +/// \note this does not account for the TiledRange and some other metadata +template +std::size_t size_of(const DistArray& da) { + std::size_t result = 0; + result += size_of(da.shape()); + // add up local tile's contributions + for (const auto& tile_ref : da) { + result += size_of(tile_ref.get()); + } + return result; +} + #ifndef TILEDARRAY_HEADER_ONLY extern template class DistArray, DensePolicy>; diff --git a/src/TiledArray/device/platform.h b/src/TiledArray/platform.h similarity index 59% rename from src/TiledArray/device/platform.h rename to src/TiledArray/platform.h index d30a204fb4..a885addba6 100644 --- a/src/TiledArray/device/platform.h +++ b/src/TiledArray/platform.h @@ -21,8 +21,12 @@ * */ -#ifndef TILEDARRAY_DEVICE_PLATFORM_H__INCLUDED -#define TILEDARRAY_DEVICE_PLATFORM_H__INCLUDED +#ifndef TILEDARRAY_PLATFORM_H__INCLUDED +#define TILEDARRAY_PLATFORM_H__INCLUDED + +#include + +#include namespace TiledArray { @@ -54,6 +58,40 @@ constexpr bool overlap(MemorySpace space1, MemorySpace space2) { return (space1 & space2) != MemorySpace::Null; } +// customization point: is_constexpr_size_of_v reports whether +// size_of(T) is the same for all T +template +inline constexpr bool is_constexpr_size_of_v = detail::is_numeric_v; + +// customization point: size_of(O) -> std::size_t reports the number of +// bytes occupied by O in S +template >> +constexpr std::size_t size_of(const T& t) { + return sizeof(T); +} + +// customization point: allocates_memory_space(A) -> bool reports whether +// allocator A allocates memory in space S +template +constexpr bool allocates_memory_space(const std::allocator& a) { + return S == MemorySpace::Host; +} +template +constexpr bool allocates_memory_space(const Eigen::aligned_allocator& a) { + return S == MemorySpace::Host; +} +template +constexpr bool allocates_memory_space(const host_allocator& a) { + return S == MemorySpace::Host; +} +#ifdef TILEDARRAY_HAS_DEVICE +template +constexpr bool allocates_memory_space(const device_um_allocator& a) { + return S == MemorySpace::Device_UM; +} +#endif + /// enumerates the execution spaces enum class ExecutionSpace { Host, Device }; @@ -62,4 +100,4 @@ enum class ExecutionSpace { Host, Device }; } // namespace TiledArray -#endif // TILEDARRAY_DEVICE_PLATFORM_H__INCLUDED +#endif // TILEDARRAY_PLATFORM_H__INCLUDED diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 2235da948e..eea59db2e3 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -21,6 +21,7 @@ #define TILEDARRAY_RANGE_H__INCLUDED #include +#include #include #include #include @@ -1247,6 +1248,20 @@ class Range { return ordinal(last) - ordinal(first); } + template + friend constexpr std::size_t size_of(const Range& r) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(r); + } + // correct for optional dynamic allocation of datavec_ + if constexpr (S == MemorySpace::Host) { + sz -= sizeof(r.datavec_); + } + sz += size_of(r.datavec_); + return sz; + } + }; // class Range // lift Range::index_type and Range::index_view_type into user-land diff --git a/src/TiledArray/range1.h b/src/TiledArray/range1.h index 0086868dc6..47bdef796e 100644 --- a/src/TiledArray/range1.h +++ b/src/TiledArray/range1.h @@ -26,6 +26,7 @@ #include #include +#include namespace TiledArray { @@ -138,7 +139,7 @@ struct Range1 { /// \brief dereferences this iterator /// \return const reference to the current index const auto& dereference() const { return v; } - }; + }; // class Iterator friend class Iterator; typedef Iterator const_iterator; ///< Iterator type @@ -201,6 +202,15 @@ struct Range1 { void serialize(Archive& ar) const { ar & first & second; } + + template + friend constexpr std::size_t size_of(const Range1& r) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(r); + } + return sz; + } }; inline bool operator==(const Range1& x, const Range1& y) { diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h index 7da071a88b..ffeacebd5c 100644 --- a/src/TiledArray/sparse_shape.h +++ b/src/TiledArray/sparse_shape.h @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -1721,6 +1722,9 @@ class SparseShape { return cast_abs_factor; } + template + friend std::size_t size_of(const SparseShape& shape); + }; // class SparseShape // Static member initialization @@ -1728,6 +1732,23 @@ template typename SparseShape::value_type SparseShape::threshold_ = std::numeric_limits::epsilon(); +template +std::size_t size_of(const SparseShape& shape) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(shape); + } + // account for dynamically-allocated content + if constexpr (S == MemorySpace::Host) { + sz -= sizeof(shape.tile_norms_); + } + sz += size_of(shape.tile_norms_); + if (shape.tile_norms_unscaled_) { + sz += size_of(*(shape.tile_norms_unscaled_)); + } + return sz; +} + /// Add the shape to an output stream /// \tparam T the numeric type supporting the type of \c shape diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 73965a619b..c116241289 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -24,6 +24,7 @@ #include "TiledArray/external/umpire.h" #include "TiledArray/host/env.h" +#include "TiledArray/platform.h" #include "TiledArray/math/blas.h" #include "TiledArray/math/gemm_helper.h" @@ -2776,6 +2777,34 @@ class Tensor { }; // class Tensor +/// \return the number of bytes used by \p t in memory space +/// `S` +template +std::size_t size_of(const Tensor& t) { + std::size_t result = 0; + if constexpr (S == MemorySpace::Host) { + result += sizeof(t); + } + // correct for optional dynamic allocation of Range + if constexpr (S == MemorySpace::Host) { + result -= sizeof(Range); + } + result += size_of(t.range()); + + if (allocates_memory_space(A{})) { + if (!t.empty()) { + if constexpr (is_constexpr_size_of_v) { + result += t.size() * sizeof(T); + } else { + result += std::accumulate( + t.begin(), t.end(), std::size_t{0}, + [](const std::size_t s, const T& t) { return s + size_of(t); }); + } + } + } + return result; +} + #ifdef TA_TENSOR_MEM_TRACE template std::size_t Tensor::trace_if_larger_than_ = diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h index d4db1c4911..9718e5c189 100644 --- a/src/TiledArray/tiled_range.h +++ b/src/TiledArray/tiled_range.h @@ -398,6 +398,24 @@ class TiledRange { range_type elements_range_; ///< Range of element indices Ranges ranges_; ///< tiled (1d) range, aka TiledRange1, for each mode ///< `*this` is a direct product of these tilings + + template + friend constexpr std::size_t size_of(const TiledRange& r) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(r); + } + // correct for optional dynamic allocation of range_ and elements_range_ + if constexpr (S == MemorySpace::Host) { + sz -= sizeof(r.range_); + sz -= sizeof(r.elements_range_); + sz -= sizeof(r.ranges_); + } + sz += size_of(r.range_); + sz += size_of(r.elements_range_); + sz += size_of(r.ranges_); + return sz; + } }; /// TiledRange permutation operator. diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 3bd3af1e54..7938688005 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -493,6 +494,27 @@ class TiledRange1 { mutable std::shared_ptr elem2tile_; ///< maps element index to tile index (memoized data). + template + friend constexpr std::size_t size_of(const TiledRange1& r) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(r); + } + // correct for optional dynamic allocation of range_ and elements_range_ + if constexpr (S == MemorySpace::Host) { + sz -= sizeof(r.range_); + sz -= sizeof(r.elements_range_); + sz -= sizeof(r.tiles_ranges_); + } + sz += size_of(r.range_); + sz += size_of(r.elements_range_); + sz += size_of(r.tiles_ranges_); + if (r.elem2tile_) { + sz += r.elements_range_.extent() * sizeof(index1_type); + } + return sz; + } + }; // class TiledRange1 /// Exchange the data of the two given ranges. diff --git a/src/TiledArray/util/vector.h b/src/TiledArray/util/vector.h index d0e0651ecc..88103be8ee 100644 --- a/src/TiledArray/util/vector.h +++ b/src/TiledArray/util/vector.h @@ -45,6 +45,7 @@ #include #include #include "TiledArray/error.h" +#include "TiledArray/platform.h" namespace TiledArray { @@ -55,6 +56,66 @@ using vector = std::vector; template using svector = boost::container::small_vector; +} // namespace container + +// size_of etc. + +template +constexpr std::size_t size_of(const std::vector& t) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(t); + } + if constexpr (allocates_memory_space(t.get_allocator())) { + if constexpr (is_constexpr_size_of_v) { + sz += sizeof(T) * t.capacity(); + } else { + sz += std::accumulate( + t.begin(), t.end(), std::size_t{0}, + [](const std::size_t s, const T& t) { return s + size_of(t); }); + sz += (t.capacity() - t.size()) * sizeof(T); + } + } + return sz; +} + +template +constexpr bool allocates_memory_space( + const boost::container::small_vector_allocator& a) { + return S == MemorySpace::Host; +} + +template +constexpr std::size_t size_of( + const boost::container::small_vector& t) { + std::size_t sz = 0; + if constexpr (S == MemorySpace::Host) { + sz += sizeof(t); + } + if (allocates_memory_space(t.get_allocator())) { + std::size_t data_sz = 0; + if constexpr (is_constexpr_size_of_v) { + data_sz += sizeof(T) * t.capacity(); + } else { + data_sz += std::accumulate( + t.begin(), t.end(), std::size_t{0}, + [](const std::size_t s, const T& t) { return s + size_of(t); }); + data_sz += (t.capacity() - t.size()) * sizeof(T); + } + if (t.capacity() > N || + S != MemorySpace::Host) { // dynamically allocated buffer => account + // for tne entirety of data_sz + sz += data_sz; + } else { // data in internal buffer => account for the + // dynamically-allocated part of data_sz + sz += (data_sz - t.capacity() * sizeof(T)); + } + } + return sz; +} + +namespace container { + template std::enable_if_t && detail::is_sized_range_v, diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index 64f69e69db..1f5c9fe10c 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -944,4 +944,67 @@ BOOST_AUTO_TEST_CASE(reduction) { BOOST_REQUIRE(array_norm = std::sqrt(TA::dot(array, array))); } +BOOST_AUTO_TEST_CASE(size_of) { + using Numeric = double; + using T = Tensor; + using ToT = Tensor; + using Policy = SparsePolicy; + using ArrayToT = DistArray; + + auto unit_T = [](Range const& rng) { return T(rng, Numeric{1}); }; + + auto unit_ToT = [unit_T](Range const& rngo, Range const& rngi) { + return ToT(rngo, unit_T(rngi)); + }; + + size_t constexpr nrows = 3; + size_t constexpr ncols = 4; + TiledRange const trange({{0, 2, 5, 7}, {0, 5, 7, 10, 12}}); + TA_ASSERT(trange.tiles_range().extent().at(0) == nrows && + trange.tiles_range().extent().at(1) == ncols, + "Following code depends on this condition."); + + // this Range is used to construct all inner tensors of the tile with + // tile index @c tix. + auto inner_dims = [nrows, ncols](Range::index_type const& tix) -> Range { + static std::array const rows{7, 8, 9}; + static std::array const cols{7, 8, 9, 10}; + + TA_ASSERT(tix.size() == 2, "Only rank-2 tensor expected."); + return Range({rows[tix.at(0) % nrows], cols[tix.at(1) % ncols]}); + }; + + // let's make all 'diagonal' tiles zero + auto zero_tile = [](Range::index_type const& tix) -> bool { + return tix.at(0) == tix.at(1); + }; + + auto make_tile = [inner_dims, // + zero_tile, // + &trange, // + unit_ToT](auto& tile, auto const& rng) { + auto&& tix = trange.element_to_tile(rng.lobound()); + if (zero_tile(tix)) + return 0.; + else { + tile = unit_ToT(rng, inner_dims(tix)); + return tile.norm(); + } + }; + + auto& world = get_default_world(); + + // all non-zero inner tensors of this ToT array are unit (ie all + // inner tensors' elements are 1.) + auto array = make_array(world, trange, make_tile); + + auto sz0 = TiledArray::size_of(array); + world.gop.sum(sz0); + const auto sz0_expected = + /* size on 1 rank */ 56728 + + /* size of shape on ranks 1 ... N-1 */ (world.size() - 1) * + TiledArray::size_of(array.shape()); + BOOST_REQUIRE(sz0 == sz0_expected); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/tensor.cpp b/tests/tensor.cpp index 0909004e00..34ea8f5d3d 100644 --- a/tests/tensor.cpp +++ b/tests/tensor.cpp @@ -783,4 +783,32 @@ BOOST_AUTO_TEST_CASE(print) { // std::cout << tb; } +BOOST_AUTO_TEST_CASE(size_of) { + auto sz0h = TiledArray::size_of(TensorN{}); + BOOST_REQUIRE(sz0h == sizeof(TensorN)); + + auto sz0d = TiledArray::size_of(TensorN{}); + BOOST_REQUIRE(sz0d == 0); + + auto sz0um = + TiledArray::size_of(TensorN{}); + BOOST_REQUIRE(sz0um == 0); + + auto sz1 = TiledArray::size_of( + TensorZ(Range(2, 3, 4))); + BOOST_REQUIRE(sz1 == + sizeof(TensorZ) + 2 * 3 * 4 * sizeof(TensorZ::value_type)); + + using TTD = Tensor>; + auto sz2 = + TiledArray::size_of(TTD(Range(2, 3, 4))); + BOOST_REQUIRE(sz2 == sizeof(TTD) + 2 * 3 * 4 * sizeof(TTD::value_type)); + + TTD ttd(Range(2, 3, 4)); + ttd(0, 0, 0) = TensorD(Range(5, 6)); + auto sz3 = TiledArray::size_of(ttd); + BOOST_REQUIRE(sz3 == sizeof(TTD) + 2 * 3 * 4 * sizeof(TTD::value_type) + + 5 * 6 * sizeof(TTD::value_type::value_type)); +} + BOOST_AUTO_TEST_SUITE_END()