diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d6b71126b..e010495570 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -320,10 +320,9 @@ if (TA_TTG) endif(TA_TTG) detect_MADNESS_configuration() include(external/eigen.cmake) -# the FetchContent-based version will not work due to BLT target name conflicts -# include(${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchUmpire.cmake) -# use the ExternalProject-based version -include(external/umpire.cmake) + +include(${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchUmpireCXXAllocator.cmake) +add_dependencies(External-tiledarray vrg-build-external-projects) ###### discover linear algebra diff --git a/INSTALL.md b/INSTALL.md index fc5c111992..ae0536d2d4 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,16 +2,32 @@ ## Synopsis -```.cpp -$ git clone https://github.com/ValeevGroup/TiledArray.git tiledarray -$ cd tiledarray -$ cmake -B build \ - -D CMAKE_INSTALL_PREFIX=/path/to/tiledarray/install \ - -D CMAKE_TOOLCHAIN_FILE=cmake/vg/toolchains/.cmake \ - . -$ cmake --build build -(recommended, but optional): $ cmake --build build --target check -$ cmake --build build --target install +Building and installing: +```c++ +$ git clone https://github.com/ValeevGroup/tiledarray.git +$ cmake -S tiledarray -B tiledarray/build \ + -D CMAKE_INSTALL_PREFIX=/path/to/tiledarray/install +(recommended, but optional): $ cmake --build tiledarray/build --target check +$ cmake --build tiledarray/build --target install +``` +After this TA can be consumed from another project's CMake harness: +```cmake +find_package(tiledarray CONFIG REQUIRED) +target_link_libraries(your_executable_or_library_target PUBLIC tiledarray) +``` + +Or simply build TiledArray from source within another project's CMake harness: +```cmake +find_package(tiledarray CONFIG) +if (NOT TARGET tiledarray) + cmake_minimum_required(VERSION 3.14.0) # for FetchContent_MakeAvailable + include(FetchContent) + FetchContent_Declare(tiledarray + GIT_REPOSITORY https://github.com/ValeevGroup/tiledarray + ) + FetchContent_MakeAvailable(tiledarray) +endif() +target_link_libraries(your_executable_or_library_target PUBLIC tiledarray) ``` ## Introduction @@ -40,22 +56,21 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 62d57d9b1e0c733b4b547bc9cfdd07047159dbca . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS) -- a generic dense local tensor framework. If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 8abd78b8a304a88b951449d8cb127f5a91f27721 . - Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness) -- a multiresolution numerical calculus framework, + TiledArray only uses its distributed task-based programming model ("MADworld") If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. - A detailed list of MADNESS prerequisites can be found at [MADNESS' INSTALL file](https://github.com/m-a-d-n-e-s-s/madness/blob/master/INSTALL_CMake); - it also also contains detailed - MADNESS compilation instructions. + A detailed list of MADNESS dependencies can be found at [MADNESS' INSTALL file](https://github.com/m-a-d-n-e-s-s/madness/blob/master/INSTALL_CMake); + it also also contains detailed MADNESS compilation instructions. +- [Umpire C++ allocator](github.com/ValeevGroup/umpire-cxx-allocator) -- a C++ allocator for [LLNL/Umpire](https://github.com/LLNL/Umpire), a portable memory manager. *It is recommended to let TiledArray build the Umpire C++ allocator and Umpire itself from source.* Compiling MADNESS requires the following prerequisites: - An implementation of Message Passing Interface version 2 or 3, with support for `MPI_THREAD_MULTIPLE`. - - (optional) - Intel Thread Building Blocks (TBB), available in a [commercial](software.intel.com/tbb) or - an [open-source](https://www.threadingbuildingblocks.org/) form + - (recommended) + [PaRSEC](https://github.com/ICLDisco/parsec) -- a distributed programming model used for local task scheduling in MADNESS. Compiling BTAS requires the following prerequisites: - [blaspp](https://bitbucket.org/icl/blaspp.git) -- C++ API for BLAS @@ -68,10 +83,9 @@ Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on NVIDIA's CUDA-enabled accelerators. CUDA 12 or later is required. - [HIP/ROCm compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on AMD's ROCm-enabled accelerators. Note that TiledArray does not use ROCm directly but its C++ Heterogeneous-Compute Interface for Portability, `HIP`; although HIP can also be used to program CUDA-enabled devices, in TiledArray it is used only to program ROCm devices, hence ROCm and HIP will be used interchangeably. - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, ROCm, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) extended to provide thread-safety improvements (via github.com/ValeevGroup/cutt) and extended to non-CUDA platforms by [@victor-anisimov](github.com/victor-anisimov) (tag 6eed30d4dd2a5aa58840fe895dcffd80be7fbece). - - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag 8c85866107f78a58403e20a2ae8e1f24c9852287). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: - - [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ wrapper for ScaLAPACK (tag 6397f52cf11c0dfd82a79698ee198a2fce515d81); pulls and builds the following additional prerequisite + - [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ wrapper for ScaLAPACK; pulls and builds the following additional prerequisite - [blacspp](https://github.com/wavefunction91/blacspp.git) -- a modern C++ wrapper for BLACS - Python3 interpreter -- to test (optionally-built) Python bindings - [TTG](https://github.com/TESSEorg/ttg.git) -- C++ implementation of the Template Task Graph programming model for fine-grained flow-graph composition of distributed memory programs (tag 3fe4a06dbf4b05091269488aab38223da1f8cb8e). diff --git a/README.md b/README.md index 8742d1e774..0aa02e607a 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ $ cmake --build build $ cmake --build build --target install ``` Here `` is the appropriate toolchain file from [the Valeev Group CMake kit](https://github.com/ValeevGroup/kit-cmake/tree/master/toolchains); an alternative is -to provide your own toolchain file. On some standard platforms (e.g. MacOS) the toolchain file can be skipped. +to provide your own toolchain file. On most standard platforms (e.g. Ubuntu, MacOS) the toolchain file can be skipped. The detailed instructions can be found in [INSTALL.md](https://github.com/ValeevGroup/tiledarray/blob/master/INSTALL.md). diff --git a/cmake/modules/FindOrFetchUmpireCXXAllocator.cmake b/cmake/modules/FindOrFetchUmpireCXXAllocator.cmake new file mode 100644 index 0000000000..09f4d451e1 --- /dev/null +++ b/cmake/modules/FindOrFetchUmpireCXXAllocator.cmake @@ -0,0 +1,40 @@ +# try find_package +if (NOT TARGET umpire-cxx-allocator) + include (FindPackageRegimport) + find_package_regimport(umpire-cxx-allocator QUIET CONFIG) + if (TARGET umpire-cxx-allocator) + message(STATUS "Found umpire-cxx-allocator CONFIG at ${umpire-cxx-allocator_CONFIG}") + endif (TARGET umpire-cxx-allocator) +endif (NOT TARGET umpire-cxx-allocator) + +# if not found, build via FetchContent +if (NOT TARGET umpire-cxx-allocator) + + if (TA_CUDA) + set(UMPIRE_ENABLE_CUDA ON CACHE BOOL "Enable CUDA support in Umpire") + endif() + if (TA_HIP) + set(UMPIRE_ENABLE_HIP ON CACHE BOOL "Enable HIP support in Umpire") + endif() + + include(FetchContent) + FetchContent_Declare( + umpire-cxx-allocator + GIT_REPOSITORY https://github.com/ValeevGroup/umpire-cxx-allocator.git + GIT_TAG ${TA_TRACKED_UMPIRE-CXX-ALLOCATOR_TAG} + ) + FetchContent_MakeAvailable(umpire-cxx-allocator) + FetchContent_GetProperties(umpire-cxx-allocator + SOURCE_DIR UMPIRE-CXX-ALLOCATOR_SOURCE_DIR + BINARY_DIR UMPIRE-CXX-ALLOCATOR_BINARY_DIR + ) + + # set umpire-cxx-allocator_CONFIG to the install location so that we know where to find it + set(umpire-cxx-allocator_CONFIG ${CMAKE_INSTALL_PREFIX}/${UMPIRE-CXX-ALLOCATOR_CMAKE_DIR}/umpire-cxx-allocator-config.cmake) + +endif(NOT TARGET umpire-cxx-allocator) + +# postcond check +if (NOT TARGET umpire-cxx-allocator) + message(FATAL_ERROR "FindOrFetchUmpireCXXAllocator could not make umpire-cxx-allocator target available") +endif(NOT TARGET umpire-cxx-allocator) diff --git a/cmake/tiledarray-config.cmake.in b/cmake/tiledarray-config.cmake.in index abff1952ea..27b2c18787 100644 --- a/cmake/tiledarray-config.cmake.in +++ b/cmake/tiledarray-config.cmake.in @@ -38,9 +38,6 @@ if(NOT TARGET MADworld) include( CMakeFindDependencyMacro ) find_dependency(MADNESS 0.10.1 CONFIG REQUIRED COMPONENTS world PATHS "${MADNESS_CONFIG_DIR}" NO_DEFAULT_PATH) endif() -if(NOT TARGET tiledarray) - include("${CMAKE_CURRENT_LIST_DIR}/tiledarray-targets.cmake") -endif() # if TA is a CUDA-dependent library it needs CUDA to link properly ... unfortunately CMake is not able to do this correctly # see https://gitlab.kitware.com/cmake/cmake/issues/18614 @@ -66,8 +63,8 @@ if(TILEDARRAY_HAS_CUDA) INTERFACE_LINK_LIBRARIES "${_ta_interface_libs}") endif() -set(TILEDARRAY_HAS_SCALAPACK "@ENABLE_SCALAPACK@" ) -if(TILEDARRAY_HAS_SCALAPACK) +set(TA_SCALAPACK "@TA_SCALAPACK@" ) +if(TA_SCALAPACK) include( CMakeFindDependencyMacro ) get_filename_component(blacspp_DIR "@blacspp_CONFIG@" DIRECTORY) find_dependency( blacspp CONFIG REQUIRED HINTS "${blacspp_DIR}" ) @@ -75,6 +72,15 @@ if(TILEDARRAY_HAS_SCALAPACK) find_dependency( scalapackpp CONFIG REQUIRED HINTS "${scalapackpp_DIR}" ) endif() +if (NOT TARGET umpire-cxx-allocator) + get_filename_component(umpire-cxx-allocator_DIR "@umpire-cxx-allocator_CONFIG@" DIRECTORY) + find_dependency(umpire-cxx-allocator 1.0.0 QUIET CONFIG REQUIRED HINTS "${umpire-cxx-allocator_DIR}") +endif() + +if(NOT TARGET tiledarray) + include("${CMAKE_CURRENT_LIST_DIR}/tiledarray-targets.cmake") +endif() + # Set the tiledarray compiled library target set(TILEDARRAY_LIBRARIES tiledarray) diff --git a/examples/device/device_task.cpp b/examples/device/device_task.cpp index bfd75ac51c..08f61edf31 100644 --- a/examples/device/device_task.cpp +++ b/examples/device/device_task.cpp @@ -9,8 +9,8 @@ #include using value_type = double; -using tensor_type = TA::btasUMTensorVarray; -using tile_type = TA::Tile; +using tensor_type = TiledArray::btasUMTensorVarray; +using tile_type = TiledArray::Tile; /// verify the elements in tile is equal to value void verify(const tile_type& tile, value_type value, std::size_t index) { @@ -34,7 +34,7 @@ tile_type scale(const tile_type& arg, value_type a, using Storage = typename tile_type::tensor_type::storage_type; Storage result_storage; auto result_range = arg.range(); - make_device_storage(result_storage, arg.size(), stream); + TiledArray::make_device_storage(result_storage, arg.size(), stream); typename tile_type::tensor_type result(std::move(result_range), std::move(result_storage)); @@ -42,10 +42,11 @@ tile_type scale(const tile_type& arg, value_type a, /// copy the original Tensor auto& queue = TiledArray::BLASQueuePool::queue(stream); - blas::copy(result.size(), arg.data(), 1, device_data(result.storage()), 1, - queue); + blas::copy(result.size(), arg.data(), 1, + TiledArray::device_data(result.storage()), 1, queue); - blas::scal(result.size(), a, device_data(result.storage()), 1, queue); + blas::scal(result.size(), a, TiledArray::device_data(result.storage()), 1, + queue); // std::stringstream stream_str; // stream_str << stream; diff --git a/external/cuda.cmake b/external/cuda.cmake index 74bd953e65..d174b67f7a 100644 --- a/external/cuda.cmake +++ b/external/cuda.cmake @@ -44,11 +44,6 @@ sanitize_cuda_implicit_directories() message(STATUS "CMAKE Implicit Include Directories: ${CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES}") message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") -## -## Umpire -## -include(external/umpire.cmake) - ## ## LibreTT ## diff --git a/external/hip.cmake b/external/hip.cmake index a76f543454..da75138acb 100644 --- a/external/hip.cmake +++ b/external/hip.cmake @@ -20,11 +20,6 @@ foreach (library hipblas;rocthrust) endif() endforeach() -## -## Umpire -## -include(external/umpire.cmake) - ## ## LibreTT ## diff --git a/external/umpire.cmake b/external/umpire.cmake deleted file mode 100644 index 4a1a98cea9..0000000000 --- a/external/umpire.cmake +++ /dev/null @@ -1,244 +0,0 @@ -## -## find Umpire -## - -if (NOT TARGET TiledArray_UMPIRE) - -find_path(_UMPIRE_INSTALL_DIR NAMES include/umpire/Umpire.hpp HINTS ${UMPIRE_INSTALL_DIR}) - -# if user provides UMPIRE, use it -if(_UMPIRE_INSTALL_DIR) - - ## check umpire -# set(umpire_DIR ${UMPIRE_INSTALL_DIR}/share/umpire/cmake) -# find_package(umpire REQUIRED) - message(STATUS "Umpire found at ${_UMPIRE_INSTALL_DIR}") - - add_library(TiledArray_UMPIRE INTERFACE) - - set_target_properties( - TiledArray_UMPIRE - PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES - "${_UMPIRE_INSTALL_DIR}/include" - INTERFACE_LINK_LIBRARIES - "umpire" - INTERFACE_LINK_DIRECTORIES - "${_UMPIRE_INSTALL_DIR}/lib/" - ) - - install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) - -elseif(TA_EXPERT) - - message("** Umpire was not found") - message(STATUS "** Downloading and building Umpire is explicitly disabled in EXPERT mode") - -else() - - ## build umpire automatically - - include(ExternalProject) - - # to pass CMAKE_C_* vars to external project - enable_language(C) - - # set source and build path for Umpire in the TiledArray project - set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/umpire-src) - set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/umpire-build) - set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}) - - if (NOT UMPIRE_URL) - set(UMPIRE_URL https://github.com/LLNL/Umpire.git) - endif (NOT UMPIRE_URL) - if (NOT UMPIRE_TAG) - set(UMPIRE_TAG ${TA_TRACKED_UMPIRE_TAG}) - endif (NOT UMPIRE_TAG) - - message("** Will clone Umpire from ${UMPIRE_URL}") - - if (TA_ASSERT_POLICY STREQUAL TA_ASSERT_IGNORE) - set(enable_umpire_asserts OFF) - else() - set(enable_umpire_asserts ON) - endif() - - # as of now BLT only supports up to C++20, so limit CMAKE_CXX_STANDARD - set(BLT_CXX_STD ${CMAKE_CXX_STANDARD}) - set(BLT_CXX_STD_MAX 20) - if (BLT_CXX_STD GREATER ${BLT_CXX_STD_MAX}) - set(BLT_CXX_STD ${BLT_CXX_STD_MAX}) - endif() - - if (CMAKE_PREFIX_PATH) - set(UMPIRE_CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH}) - endif() - - set(UMPIRE_CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_DIR} - -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} - -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_PREFIX_PATH=${UMPIRE_CMAKE_PREFIX_PATH} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_C_FLAGS_RELWITHDEBINFO=${CMAKE_C_FLAGS_RELWITHDEBINFO} - -DCMAKE_C_FLAGS_MINSIZEREL=${CMAKE_C_FLAGS_MINSIZEREL} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${CMAKE_CXX_FLAGS_RELWITHDEBINFO} - -DCMAKE_CXX_FLAGS_MINSIZEREL=${CMAKE_CXX_FLAGS_MINSIZEREL} - -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} - -DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS} - -DCMAKE_AR=${CMAKE_AR} - -DBLT_CXX_STD=c++${BLT_CXX_STD} - -DENABLE_BENCHMARKS=OFF - -DENABLE_OPENMP=OFF - -DENABLE_TESTS=OFF - -DENABLE_EXAMPLES=OFF - -DENABLE_LOGGING=OFF - -DENABLE_ASSERTS=${enable_umpire_asserts} - -DENABLE_CLANGFORMAT=OFF - ) - - # caveat: on recent Ubuntu default libstdc++ provides filesystem, but if using older gcc (gcc-8) must link against - # libstdc++fs: https://bugs.launchpad.net/ubuntu/+source/gcc-8/+bug/1824721 ... skip the use of std::filesystem altogether with pre-9 gcc!!! - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) - # disable by populating cache with compile test result variable - list(APPEND UMPIRE_CMAKE_ARGS - -DUMPIRE_ENABLE_FILESYSTEM=OFF) - endif() - - if (TA_CUDA) - list(APPEND UMPIRE_CMAKE_ARGS - -DENABLE_CUDA=ON - -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} - -DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD} - -DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS} - -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} - -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} - ) - if (DEFINED CMAKE_CUDA_ARCHITECTURES) - list(APPEND UMPIRE_CMAKE_ARGS "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") - endif(DEFINED CMAKE_CUDA_ARCHITECTURES) - # BLT will need FindCUDA until https://github.com/LLNL/blt/pull/585 is merged - # with CMake 3.28.1 needs to set CMP0146 to OLD - if (POLICY CMP0146) - list(APPEND UMPIRE_CMAKE_ARGS -DCMAKE_POLICY_DEFAULT_CMP0146=OLD) - endif() - # as of CMake 3.28+ FindCUDA seems to require CUDA_TOOLKIT_ROOT_DIR to be defined - if (DEFINED CUDA_TOOLKIT_ROOT_DIR) - list(APPEND UMPIRE_CMAKE_ARGS "-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}") - endif() - endif(TA_CUDA) - if (TA_HIP) - list(APPEND UMPIRE_CMAKE_ARGS - -DENABLE_HIP=ON - -DCMAKE_HIP_COMPILER=${CMAKE_HIP_COMPILER} - -DCMAKE_HIP_STANDARD=${CMAKE_HIP_STANDARD} - -DCMAKE_HIP_EXTENSIONS=${CMAKE_HIP_EXTENSIONS} - ) - if (DEFINED CMAKE_HIP_ARCHITECTURES) - list(APPEND UMPIRE_CMAKE_ARGS "-DCMAKE_HIP_ARCHITECTURES=${CMAKE_HIP_ARCHITECTURES}") - endif(DEFINED CMAKE_HIP_ARCHITECTURES) - endif(TA_HIP) - if (CMAKE_TOOLCHAIN_FILE) - set(UMPIRE_CMAKE_ARGS "${UMPIRE_CMAKE_ARGS}" - "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" - ) - endif(CMAKE_TOOLCHAIN_FILE) - if (DEFINED CMAKE_INTERPROCEDURAL_OPTIMIZATION) - set(UMPIRE_CMAKE_ARGS "${UMPIRE_CMAKE_ARGS}" - "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}") - endif(DEFINED CMAKE_INTERPROCEDURAL_OPTIMIZATION) - if (DEFINED CMAKE_INTERPROCEDURAL_OPTIMIZATION_${CMAKE_BUILD_TYPE}) - set(UMPIRE_CMAKE_ARGS "${UMPIRE_CMAKE_ARGS}" - "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION_${CMAKE_BUILD_TYPE}=${CMAKE_INTERPROCEDURAL_OPTIMIZATION_${CMAKE_BUILD_TYPE}}") - endif(DEFINED CMAKE_INTERPROCEDURAL_OPTIMIZATION_${CMAKE_BUILD_TYPE}) - - foreach(lang C CXX CUDA) - if (DEFINED CMAKE_${lang}_COMPILER_LAUNCHER) - list(APPEND UMPIRE_CMAKE_ARGS - "-DCMAKE_${lang}_COMPILER_LAUNCHER=${CMAKE_${lang}_COMPILER_LAUNCHER}") - endif() - endforeach() - - if (BUILD_SHARED_LIBS) - set(UMPIRE_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) - else(BUILD_SHARED_LIBS) - set(UMPIRE_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif(BUILD_SHARED_LIBS) - - # N.B. Ninja needs spelling out the byproducts of custom targets, see https://cmake.org/cmake/help/v3.3/policy/CMP0058.html - set(UMPIRE_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/lib/libumpire${UMPIRE_DEFAULT_LIBRARY_SUFFIX}") - message(STATUS "custom target Umpire is expected to build these byproducts: ${UMPIRE_BUILD_BYPRODUCTS}") - - ExternalProject_Add(Umpire - PREFIX ${FETCHCONTENT_BASE_DIR} - STAMP_DIR ${FETCHCONTENT_BASE_DIR}/umpire-ep-artifacts - TMP_DIR ${FETCHCONTENT_BASE_DIR}/umpire-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable - #--Download step-------------- - DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} - GIT_REPOSITORY ${UMPIRE_URL} - GIT_TAG ${UMPIRE_TAG} - #--Configure step------------- - SOURCE_DIR ${EXTERNAL_SOURCE_DIR} - LIST_SEPARATOR :: - UPDATE_DISCONNECTED 1 - CMAKE_ARGS - ${UMPIRE_CMAKE_ARGS} - ${EXTERNAL_SOURCE_DIR} - #--Build step----------------- - BINARY_DIR ${EXTERNAL_BUILD_DIR} - BUILD_COMMAND ${CMAKE_COMMAND} --build . -v - BUILD_BYPRODUCTS ${UMPIRE_BUILD_BYPRODUCTS} - #--Install step--------------- - INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Umpire will be installed during TiledArray's installation." - ) - - # TiledArray_UMPIRE target depends on existence of these directories to be usable from the build tree at configure time - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_SOURCE_DIR}/src/tpl/umpire/camp/include") - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_BUILD_DIR}/src/tpl/umpire/camp/include") - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_BUILD_DIR}/include") - - # do install of Umpire as part of building TiledArray's install target - install(CODE - "execute_process( - COMMAND \"${CMAKE_COMMAND}\" \"--build\" \".\" \"--target\" \"install\" - WORKING_DIRECTORY \"${EXTERNAL_BUILD_DIR}\" - RESULT_VARIABLE error_code) - if(error_code) - message(FATAL_ERROR \"Failed to install Umpire\") - endif() - ") - - # Add Umpire dependency to External - add_dependencies(External-tiledarray Umpire) - - set(_UMPIRE_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) - - - add_library(TiledArray_UMPIRE INTERFACE) - - set_target_properties( - TiledArray_UMPIRE - PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES - "$;$;$;$;$;$;$" - INTERFACE_LINK_LIBRARIES - "$;$" - INTERFACE_COMPILE_DEFINITIONS - FMT_HEADER_ONLY=1 - ) - -install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) - -endif(_UMPIRE_INSTALL_DIR) - -#TODO test Umpire - -endif(NOT TARGET TiledArray_UMPIRE) diff --git a/external/versions.cmake b/external/versions.cmake index 84d4d27284..aa0ff63a4b 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -1,7 +1,7 @@ # for each dependency track both current and previous id (the variable for the latter must contain PREVIOUS) # to be able to auto-update them -set(TA_TRACKED_VGCMAKEKIT_TAG 6ecd3689f3d33d4426b47f8b68ba81b0efb7c80b) +set(TA_TRACKED_VGCMAKEKIT_TAG 4c949fd7ccfe4b4f0e103288a5c0f557c6e740c0) # N.B. may need to update INSTALL.md manually with the CUDA-specific version set(TA_TRACKED_EIGEN_VERSION 3.3.5) @@ -23,8 +23,8 @@ set(TA_TRACKED_BTAS_PREVIOUS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83) -set(TA_TRACKED_UMPIRE_TAG 8c85866107f78a58403e20a2ae8e1f24c9852287) -set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v2024.02.1) +set(TA_TRACKED_UMPIRE-CXX-ALLOCATOR_TAG a48ad360e20b9733263768b54aa24afe5894faa4) +set(TA_TRACKED_UMPIRE-CXX-ALLOCATOR_PREVIOUS_TAG 583ec579917103cbfee3ba5b67bab1582711b06e) set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81) set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 735340c596..5380295ea4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -133,7 +133,6 @@ TiledArray/expressions/unary_expr.h TiledArray/expressions/index_list.h TiledArray/external/btas.h TiledArray/external/madness.h -TiledArray/external/umpire.h TiledArray/host/env.h TiledArray/math/blas.h TiledArray/math/gemm_helper.h @@ -262,7 +261,7 @@ set_source_files_properties( # the list of libraries on which TiledArray depends on, will be cached later # when FetchContent umpire: set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers umpire) -set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers TiledArray_UMPIRE range-v3::range-v3) +set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers umpire-cxx-allocator range-v3::range-v3) if(TILEDARRAY_HAS_CUDA OR TILEDARRAY_HAS_HIP) diff --git a/src/TiledArray/external/device.h b/src/TiledArray/external/device.h index 76d769b472..64134f6be0 100644 --- a/src/TiledArray/external/device.h +++ b/src/TiledArray/external/device.h @@ -29,7 +29,7 @@ #include #include -#include +#include #if defined(TILEDARRAY_HAS_HIP) #include @@ -49,7 +49,10 @@ #include #include -#include +#include + +#include +#include namespace TiledArray::detail { @@ -908,16 +911,16 @@ device::Stream stream_for(const Range& range) { namespace detail { inline umpire::Allocator& get_um_allocator::operator()() { - return deviceEnv::instance()->um_allocator(); + return TiledArray::device::Env::instance()->um_allocator(); } inline umpire::Allocator& get_pinned_allocator::operator()() { - return deviceEnv::instance()->pinned_allocator(); + return TiledArray::device::Env::instance()->pinned_allocator(); } #endif // TILEDARRAY_HAS_DEVICE -} // namespace detail +} // namespace device #ifdef TILEDARRAY_HAS_CUDA namespace nvidia { diff --git a/src/TiledArray/external/umpire.h b/src/TiledArray/external/umpire.h deleted file mode 100644 index ac23a60260..0000000000 --- a/src/TiledArray/external/umpire.h +++ /dev/null @@ -1,324 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2021 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * Department of Chemistry, Virginia Tech - * Jan 31, 2018 - * - */ - -#ifndef TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED -#define TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED - -#include - -#include - -// for memory management -#include -#include -#include - -#include - -#include -#include - -namespace TiledArray { - -namespace detail { - -struct NullLock { - static void lock() {} - static void unlock() {} -}; - -template -class MutexLock { - static std::mutex mtx_; - - public: - static void lock() { mtx_.lock(); } - static void unlock() { mtx_.unlock(); } -}; - -template -std::mutex MutexLock::mtx_; - -} // namespace detail - -/// wraps a Umpire allocator into a -/// *standard-compliant* C++ allocator - -/// Optionally can be made thread safe by providing an appropriate \p StaticLock -/// \details based on the boilerplate by Howard Hinnant -/// (https://howardhinnant.github.io/allocator_boilerplate.html) -/// \tparam T type of allocated objects -/// \tparam StaticLock a type providing static `lock()` and `unlock()` methods ; -/// defaults to NullLock which does not lock -template -class umpire_based_allocator_impl { - public: - using value_type = T; - using pointer = value_type*; - using const_pointer = - typename std::pointer_traits::template rebind; - using void_pointer = - typename std::pointer_traits::template rebind; - using const_void_pointer = - typename std::pointer_traits::template rebind; - - using reference = T&; - using const_reference = const T&; - - using difference_type = - typename std::pointer_traits::difference_type; - using size_type = std::make_unsigned_t; - - umpire_based_allocator_impl(umpire::Allocator* umpalloc) noexcept - : umpalloc_(umpalloc) {} - - template - umpire_based_allocator_impl( - const umpire_based_allocator_impl& rhs) noexcept - : umpalloc_(rhs.umpalloc_) {} - - /// allocates memory using umpire dynamic pool - pointer allocate(size_t n) { - TA_ASSERT(umpalloc_); - - // QuickPool::allocate_internal does not handle zero-size allocations - size_t nbytes = n == 0 ? 1 : n * sizeof(T); - pointer result = nullptr; - auto* allocation_strategy = umpalloc_->getAllocationStrategy(); - - // critical section - StaticLock::lock(); - // this, instead of umpalloc_->allocate(n*sizeof(T)), profiles memory use - // even if introspection is off - result = - static_cast(allocation_strategy->allocate_internal(nbytes)); - StaticLock::unlock(); - - return result; - } - - /// deallocate memory using umpire dynamic pool - void deallocate(pointer ptr, size_t n) { - TA_ASSERT(umpalloc_); - - // QuickPool::allocate_internal does not handle zero-size allocations - const auto nbytes = n == 0 ? 1 : n * sizeof(T); - auto* allocation_strategy = umpalloc_->getAllocationStrategy(); - - // N.B. with multiple threads would have to do this test in - // the critical section of Umpire's ThreadSafeAllocator::deallocate - StaticLock::lock(); - TA_ASSERT(nbytes <= allocation_strategy->getCurrentSize()); - // this, instead of umpalloc_->deallocate(ptr, nbytes), profiles memory use - // even if introspection is off - allocation_strategy->deallocate_internal(ptr, nbytes); - StaticLock::unlock(); - } - - /// @return the underlying Umpire allocator - const umpire::Allocator* umpire_allocator() const { return umpalloc_; } - - private: - umpire::Allocator* umpalloc_; -}; // class umpire_based_allocator_impl - -template -bool operator==( - const umpire_based_allocator_impl& lhs, - const umpire_based_allocator_impl& rhs) noexcept { - return lhs.umpire_allocator() == rhs.umpire_allocator(); -} - -template -bool operator!=( - const umpire_based_allocator_impl& lhs, - const umpire_based_allocator_impl& rhs) noexcept { - return !(lhs == rhs); -} - -template -class umpire_based_allocator - : public umpire_based_allocator_impl { - public: - using base_type = umpire_based_allocator_impl; - using typename base_type::const_pointer; - using typename base_type::const_reference; - using typename base_type::pointer; - using typename base_type::reference; - using typename base_type::value_type; - - umpire_based_allocator() noexcept : base_type(&UmpireAllocatorAccessor{}()) {} - - template - umpire_based_allocator( - const umpire_based_allocator& - rhs) noexcept - : base_type( - static_cast&>( - rhs)) {} - - template - friend bool operator==( - const umpire_based_allocator& - lhs, - const umpire_based_allocator& - rhs) noexcept; -}; // class umpire_based_allocator - -template -bool operator==( - const umpire_based_allocator& lhs, - const umpire_based_allocator& - rhs) noexcept { - return lhs.umpire_allocator() == rhs.umpire_allocator(); -} - -template -bool operator!=( - const umpire_based_allocator& lhs, - const umpire_based_allocator& - rhs) noexcept { - return !(lhs == rhs); -} - -/// see -/// https://stackoverflow.com/questions/21028299/is-this-behavior-of-vectorresizesize-type-n-under-c11-and-boost-container/21028912#21028912 -template -class default_init_allocator : public A { - using a_t = std::allocator_traits; - - public: - using reference = typename A::reference; // std::allocator::reference - // deprecated in C++17, but thrust - // still relying on this - using const_reference = typename A::const_reference; // ditto - - template - struct rebind { - using other = - default_init_allocator>; - }; - - using A::A; - - default_init_allocator(A const& a) noexcept : A(a) {} - default_init_allocator(A&& a) noexcept : A(std::move(a)) {} - - template - void construct(U* ptr) noexcept( - std::is_nothrow_default_constructible::value) { - ::new (static_cast(ptr)) U; - } - template - void construct(U* ptr, Args&&... args) { - a_t::construct(static_cast(*this), ptr, std::forward(args)...); - } -}; - -} // namespace TiledArray - -namespace madness { -namespace archive { - -template -struct ArchiveLoadImpl> { - static inline void load( - const Archive& ar, - TiledArray::umpire_based_allocator_impl& allocator) { - std::string allocator_name; - ar & allocator_name; - allocator = TiledArray::umpire_based_allocator_impl( - umpire::ResourceManager::getInstance().getAllocator(allocator_name)); - } -}; - -template -struct ArchiveStoreImpl< - Archive, TiledArray::umpire_based_allocator_impl> { - static inline void store( - const Archive& ar, - const TiledArray::umpire_based_allocator_impl& allocator) { - ar & allocator.umpire_allocator()->getName(); - } -}; - -template -struct ArchiveLoadImpl> { - static inline void load(const Archive& ar, - TiledArray::default_init_allocator& allocator) { - if constexpr (!std::allocator_traits::is_always_equal::value) { - A base_allocator; - ar & base_allocator; - allocator = TiledArray::default_init_allocator(base_allocator); - } - } -}; - -template -struct ArchiveStoreImpl> { - static inline void store( - const Archive& ar, - const TiledArray::default_init_allocator& allocator) { - if constexpr (!std::allocator_traits::is_always_equal::value) { - ar& static_cast(allocator); - } - } -}; - -} // namespace archive -} // namespace madness - -namespace madness { -namespace archive { - -template -struct ArchiveLoadImpl> { - static inline void load( - const Archive& ar, - TiledArray::umpire_based_allocator& allocator) { - allocator = TiledArray::umpire_based_allocator{}; - } -}; - -template -struct ArchiveStoreImpl> { - static inline void store( - const Archive& ar, - const TiledArray::umpire_based_allocator< - T, StaticLock, UmpireAllocatorAccessor>& allocator) {} -}; - -} // namespace archive -} // namespace madness - -#endif // TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index e33aea5c18..00c36a5092 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -35,7 +35,7 @@ class aligned_allocator; } // namespace Eigen // fwddecl host_allocator -namespace TiledArray { +namespace umpire { namespace detail { struct get_host_allocator; struct NullLock; @@ -44,11 +44,19 @@ class MutexLock; } // namespace detail template -class umpire_based_allocator; +class allocator; template > class default_init_allocator; +}; // namespace umpire + +// fwddecl host_allocator +namespace TiledArray { +namespace detail { +struct get_host_allocator; +} // namespace detail + namespace host { class Env; } @@ -56,10 +64,9 @@ using hostEnv = host::Env; /// pooled thread-safe host memory allocator template -using host_allocator = - default_init_allocator, - detail::get_host_allocator>>; +using host_allocator = umpire::default_init_allocator< + T, umpire::allocator, + detail::get_host_allocator>>; } // namespace TiledArray namespace madness { @@ -112,15 +119,15 @@ struct get_pinned_allocator; /// pooled thread-safe unified memory (UM) allocator for device computing template -using device_um_allocator = default_init_allocator< - T, umpire_based_allocator, - detail::get_um_allocator>>; +using device_um_allocator = umpire::default_init_allocator< + T, umpire::allocator, + detail::get_um_allocator>>; /// pooled thread-safe pinned host memory allocator for device computing template -using device_pinned_allocator = default_init_allocator< - T, umpire_based_allocator, - detail::get_pinned_allocator>>; +using device_pinned_allocator = umpire::default_init_allocator< + T, umpire::allocator, + detail::get_pinned_allocator>>; /// \brief a vector that lives in UM, with most operations /// implemented on the CPU diff --git a/src/TiledArray/host/env.cpp b/src/TiledArray/host/env.cpp index 16d3a71a50..47823a0604 100644 --- a/src/TiledArray/host/env.cpp +++ b/src/TiledArray/host/env.cpp @@ -23,6 +23,11 @@ #include +#include + +#include +#include + namespace TiledArray { namespace detail { @@ -33,4 +38,67 @@ umpire::Allocator& get_host_allocator::operator()() { } // namespace detail +namespace host { + +std::unique_ptr& Env::instance() { + if (!instance_accessor()) { + initialize(); + } + return instance_accessor(); +} + +void Env::initialize(World& world, const std::uint64_t host_alloc_limit, + const std::uint64_t page_size) { + static std::mutex mtx; // to make initialize() reentrant + std::scoped_lock lock{mtx}; + // only the winner of the lock race gets to initialize + if (instance_accessor() == nullptr) { + // uncomment to debug umpire ops + // + // umpire::util::Logger::getActiveLogger()->setLoggingMsgLevel( + // umpire::util::message::Debug); + + // make thread-safe size-limited pool of host memory + + auto& rm = umpire::ResourceManager::getInstance(); + + // N.B. we don't rely on Umpire introspection (even for profiling) + constexpr auto introspect = false; + + // use QuickPool for host memory allocation, with min grain of 1 page + auto host_size_limited_alloc = + rm.makeAllocator( + "SizeLimited_HOST", rm.getAllocator("HOST"), host_alloc_limit); + auto host_dynamic_pool = + rm.makeAllocator( + "QuickPool_SizeLimited_HOST", host_size_limited_alloc, page_size, + page_size, /* alignment */ TILEDARRAY_ALIGN_SIZE); + + auto host_env = std::unique_ptr(new Env(world, host_dynamic_pool)); + instance_accessor() = std::move(host_env); + } +} + +World& Env::world() const { return *world_; } + +umpire::Allocator& Env::host_allocator() { return host_allocator_; } + +std::size_t Env::host_allocator_getActualHighWatermark() { + TA_ASSERT(dynamic_cast( + host_allocator_.getAllocationStrategy()) != nullptr); + return dynamic_cast( + host_allocator_.getAllocationStrategy()) + ->getActualHighwaterMark(); +} + +Env::Env(World& world, umpire::Allocator host_alloc) + : world_(&world), host_allocator_(host_alloc) {} + +std::unique_ptr& Env::instance_accessor() { + static std::unique_ptr instance_{nullptr}; + return instance_; +} + +} // namespace host + } // namespace TiledArray diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index b469704a72..8e21e5b52a 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -27,17 +27,9 @@ #include // for memory management -#include -#include -#include -#include +#include #include -#include -#include -#include - -#include namespace TiledArray { @@ -68,12 +60,7 @@ class Env { /// access the singleton instance; if not initialized will be /// initialized via Env::initialize() with the default params - static std::unique_ptr& instance() { - if (!instance_accessor()) { - initialize(); - } - return instance_accessor(); - } + static std::unique_ptr& instance(); // clang-format off /// initialize the instance using explicit params @@ -87,61 +74,25 @@ class Env { // clang-format on static void initialize(World& world = TiledArray::get_default_world(), const std::uint64_t host_alloc_limit = (1ul << 40), - const std::uint64_t page_size = (1ul << 25)) { - static std::mutex mtx; // to make initialize() reentrant - std::scoped_lock lock{mtx}; - // only the winner of the lock race gets to initialize - if (instance_accessor() == nullptr) { - // uncomment to debug umpire ops - // - // umpire::util::Logger::getActiveLogger()->setLoggingMsgLevel( - // umpire::util::message::Debug); - - // make thread-safe size-limited pool of host memory - - auto& rm = umpire::ResourceManager::getInstance(); - - // N.B. we don't rely on Umpire introspection (even for profiling) - constexpr auto introspect = false; - - // use QuickPool for host memory allocation, with min grain of 1 page - auto host_size_limited_alloc = - rm.makeAllocator( - "SizeLimited_HOST", rm.getAllocator("HOST"), host_alloc_limit); - auto host_dynamic_pool = - rm.makeAllocator( - "QuickPool_SizeLimited_HOST", host_size_limited_alloc, page_size, - page_size, /* alignment */ TILEDARRAY_ALIGN_SIZE); - - auto host_env = std::unique_ptr(new Env(world, host_dynamic_pool)); - instance_accessor() = std::move(host_env); - } - } - - World& world() const { return *world_; } + const std::uint64_t page_size = (1ul << 25)); + + World& world() const; /// @return an Umpire allocator that allocates from a /// host memory pool /// @warning this is not a thread-safe allocator, should be only used when /// wrapped into umpire_based_allocator_impl - umpire::Allocator& host_allocator() { return host_allocator_; } + umpire::Allocator& host_allocator(); // clang-format off /// @return the max actual amount of memory held by host_allocator() /// @details returns the value provided by `umpire::strategy::QuickPool::getHighWatermark()` /// @note if there is only 1 Umpire allocator using HOST memory this should be identical to the value returned by `umpire::ResourceManager::getInstance().getAllocator("HOST").getHighWatermark()` // clang-format on - std::size_t host_allocator_getActualHighWatermark() { - TA_ASSERT(dynamic_cast( - host_allocator_.getAllocationStrategy()) != nullptr); - return dynamic_cast( - host_allocator_.getAllocationStrategy()) - ->getActualHighwaterMark(); - } + std::size_t host_allocator_getActualHighWatermark(); protected: - Env(World& world, umpire::Allocator host_alloc) - : world_(&world), host_allocator_(host_alloc) {} + Env(World& world, umpire::Allocator host_alloc); private: // the world used to initialize this @@ -151,10 +102,7 @@ class Env { // N.B. not thread safe, so must be wrapped into umpire_based_allocator_impl umpire::Allocator host_allocator_; - inline static std::unique_ptr& instance_accessor() { - static std::unique_ptr instance_{nullptr}; - return instance_; - } + inline static std::unique_ptr& instance_accessor(); }; } // namespace host diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index c116241289..019a4e05d1 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -22,7 +22,6 @@ #include "TiledArray/config.h" -#include "TiledArray/external/umpire.h" #include "TiledArray/host/env.h" #include "TiledArray/platform.h" @@ -36,6 +35,8 @@ #include "TiledArray/util/logger.h" #include "TiledArray/util/ptr_registry.h" +#include + namespace TiledArray { namespace detail { diff --git a/src/TiledArray/util/vector.h b/src/TiledArray/util/vector.h index 88103be8ee..0e2c7ffd95 100644 --- a/src/TiledArray/util/vector.h +++ b/src/TiledArray/util/vector.h @@ -39,7 +39,6 @@ #define BOOST_CONTAINER_USE_STD_EXCEPTIONS 1 #endif -#include #include "TiledArray/config.h" #include @@ -47,6 +46,9 @@ #include "TiledArray/error.h" #include "TiledArray/platform.h" +#include +#include + namespace TiledArray { namespace container {