diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fef82b96..5bedea38 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -28,7 +28,18 @@ if (CHAI_ENABLE_CUDA OR CHAI_ENABLE_HIP) NAME chai-example.exe SOURCES example.cpp DEPENDS_ON ${chai_umpire_example_depends}) - + blt_add_executable( + NAME resource-depends.exe + SOURCES resource-depends.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( + NAME resource-not-managed.exe + SOURCES resource-not-managed.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( + NAME resource-multi-array.exe + SOURCES resource-multi-array.cpp + DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( NAME pinned.exe SOURCES pinned.cpp diff --git a/examples/resource-depends.cpp b/examples/resource-depends.cpp new file mode 100644 index 00000000..81cd9d60 --- /dev/null +++ b/examples/resource-depends.cpp @@ -0,0 +1,57 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#include "camp/resource.hpp" +#include "chai/ManagedArray.hpp" + +#include "../src/util/forall.hpp" +#include "../src/util/gpu_clock.hpp" + +#include +#include + + +int main() +{ + constexpr std::size_t ARRAY_SIZE{1000}; + int clockrate{get_clockrate()}; + +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource dev1{camp::resources::Hip{}}; + camp::resources::Resource dev2{camp::resources::Hip{}}; +#endif + camp::resources::Resource host{camp::resources::Host{}}; + + chai::ManagedArray array1(ARRAY_SIZE); + chai::ManagedArray array2(ARRAY_SIZE); + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] = i; + gpu_time_wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array2[i] = -1; + gpu_time_wait_for(20, clockrate); + }); + + dev1.wait_for(&e2); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] *= array2[i]; + gpu_time_wait_for(10, clockrate); + }); + + array1.move(chai::CPU, &dev1); + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%f ", array1[i]); + }); + printf("\n"); +} diff --git a/examples/resource-multi-array.cpp b/examples/resource-multi-array.cpp new file mode 100644 index 00000000..b450f8c9 --- /dev/null +++ b/examples/resource-multi-array.cpp @@ -0,0 +1,69 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#include "../src/util/forall.hpp" +#include "../src/util/gpu_clock.hpp" + +#include "chai/ManagedArray.hpp" +#include "camp/resource.hpp" + +#include +#include + + +int main() +{ + + constexpr int NUM_ARRAYS = 16; + constexpr std::size_t ARRAY_SIZE{100}; + + std::vector> arrays; + camp::resources::Resource host{camp::resources::Host{}}; + + + int clockrate{get_clockrate()}; + + for (std::size_t i = 0; i < NUM_ARRAYS; ++i) { + arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); + } + + for (auto array : arrays) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = i; + }); + } + + for (auto array : arrays) { +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif + + forall(&resource, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = array[i] * 2.0 + i; + gpu_time_wait_for(20, clockrate); + }); + } + + for (auto array : arrays) { +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif + + array.move(chai::CPU, &resource); + } + + for (auto array : arrays) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i == 25) { + printf("array[%d] = %f \n", i, array[i]); + } + }); + } +} diff --git a/examples/resource-not-managed.cpp b/examples/resource-not-managed.cpp new file mode 100644 index 00000000..7aece102 --- /dev/null +++ b/examples/resource-not-managed.cpp @@ -0,0 +1,52 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#include "camp/resource.hpp" + +#include "../src/util/forall.hpp" +#include "../src/util/gpu_clock.hpp" + +#include +#include + + +int main() +{ + constexpr std::size_t ARRAY_SIZE{1000}; + int clockrate{get_clockrate()}; + + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + + float * d_array1 = dev1.allocate(1000); + float * d_array2 = dev2.allocate(1000); + float * h_array1 = host.allocate(1000); + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array1[i] = i; + gpu_time_wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array2[i] = -1; + gpu_time_wait_for(20, clockrate); + }); + + dev1.wait_for(&e2); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array1[i] *= d_array2[i]; + gpu_time_wait_for(10, clockrate); + }); + + dev1.memcpy(h_array1, d_array1, sizeof(float) * 1000); + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%f ", h_array1[i]); + }); + printf("\n"); +} diff --git a/scripts/spack_packages/chai/package.py b/scripts/spack_packages/chai/package.py new file mode 100644 index 00000000..bf596de4 --- /dev/null +++ b/scripts/spack_packages/chai/package.py @@ -0,0 +1,293 @@ +# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + + +from spack import * + +import socket +import os + +from os import environ as env +from os.path import join as pjoin + +import re + +def cmake_cache_entry(name, value, comment=""): + """Generate a string for a cmake cache variable""" + + return 'set(%s "%s" CACHE PATH "%s")\n\n' % (name,value,comment) + + +def cmake_cache_string(name, string, comment=""): + """Generate a string for a cmake cache variable""" + + return 'set(%s "%s" CACHE STRING "%s")\n\n' % (name,string,comment) + + +def cmake_cache_option(name, boolean_value, comment=""): + """Generate a string for a cmake configuration option""" + + value = "ON" if boolean_value else "OFF" + return 'set(%s %s CACHE BOOL "%s")\n\n' % (name,value,comment) + + +def get_spec_path(spec, package_name, path_replacements = {}, use_bin = False) : + """Extracts the prefix path for the given spack package + path_replacements is a dictionary with string replacements for the path. + """ + + if not use_bin: + path = spec[package_name].prefix + else: + path = spec[package_name].prefix.bin + + path = os.path.realpath(path) + + for key in path_replacements: + path = path.replace(key, path_replacements[key]) + + return path + + +class Chai(CMakePackage, CudaPackage, ROCmPackage): + """ + Copy-hiding array interface for data migration between memory spaces + """ + + homepage = "https://github.com/LLNL/CHAI" + git = "https://github.com/LLNL/CHAI.git" + + version('develop', branch='develop', submodules='True') + version('master', branch='main', submodules='True') + version('2.1.1', tag='v2.1.1', submodules='True') + version('2.1.0', tag='v2.1.0', submodules='True') + version('2.0.0', tag='v2.0.0', submodules='True') + version('1.2.0', tag='v1.2.0', submodules='True') + version('1.1.0', tag='v1.1.0', submodules='True') + version('1.0', tag='v1.0', submodules='True') + + variant('shared', default=False, description='Build Shared Libs') + variant('raja', default=True, description='Build plugin for RAJA') + variant('tests', default='basic', values=('none', 'basic', 'benchmarks'), + multi=False, description='Tests to run') + + depends_on('umpire') + depends_on('umpire@main', when='@main') + + depends_on('camp') + depends_on('camp@main', when='@main') + + depends_on('raja', when="+raja") + depends_on('raja@main', when="@main+raja") + + depends_on('cmake@3.14:', type='build') + depends_on('umpire+cuda', when="+cuda") + depends_on('raja+cuda', when="+raja+cuda") + depends_on('umpire+cuda+allow-untested-versions', when="+cuda+allow-untested-versions") + depends_on('raja+cuda+allow-untested-versions', when="+raja+cuda+allow-untested-versions") + + for val in ROCmPackage.amdgpu_targets: + depends_on('raja amdgpu_target=%s' % val, when='amdgpu_target=%s' % val) + depends_on('umpire amdgpu_target=%s' % val, when='amdgpu_target=%s' % val) + + for sm_ in CudaPackage.cuda_arch_values: + depends_on('raja cuda_arch={0}'.format(sm_), + when='cuda_arch={0}'.format(sm_)) + depends_on('umpire cuda_arch={0}'.format(sm_), + when='cuda_arch={0}'.format(sm_)) + + phases = ['hostconfig', 'cmake', 'build', 'install'] + + def _get_sys_type(self, spec): + sys_type = str(spec.architecture) + # if on llnl systems, we can use the SYS_TYPE + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + return sys_type + + def _get_host_config_path(self, spec): + var='' + if '+cuda' in spec: + var= '-'.join([var,'cuda']) + + host_config_path = "hc-%s-%s-%s%s-%s.cmake" % (socket.gethostname().rstrip('1234567890'), + self._get_sys_type(spec), + spec.compiler, + var, + spec.dag_hash()) + dest_dir = self.stage.source_path + host_config_path = os.path.abspath(pjoin(dest_dir, host_config_path)) + return host_config_path + + def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): + """ + This method creates a 'host-config' file that specifies + all of the options used to configure and build CHAI. + + For more details about 'host-config' files see: + http://software.llnl.gov/conduit/building.html + + Note: + The `py_site_pkgs_dir` arg exists to allow a package that + subclasses this package provide a specific site packages + dir when calling this function. `py_site_pkgs_dir` should + be an absolute path or `None`. + + This is necessary because the spack `site_packages_dir` + var will not exist in the base class. For more details + on this issue see: https://github.com/spack/spack/issues/6261 + """ + + ####################### + # Compiler Info + ####################### + c_compiler = env["SPACK_CC"] + cpp_compiler = env["SPACK_CXX"] + + # Even though we don't have fortran code in our project we sometimes + # use the Fortran compiler to determine which libstdc++ to use + f_compiler = "" + if "SPACK_FC" in env.keys(): + # even if this is set, it may not exist + # do one more sanity check + if os.path.isfile(env["SPACK_FC"]): + f_compiler = env["SPACK_FC"] + + ####################################################################### + # By directly fetching the names of the actual compilers we appear + # to doing something evil here, but this is necessary to create a + # 'host config' file that works outside of the spack install env. + ####################################################################### + + sys_type = self._get_sys_type(spec) + + ############################################## + # Find and record what CMake is used + ############################################## + + cmake_exe = spec['cmake'].command.path + cmake_exe = os.path.realpath(cmake_exe) + + host_config_path = self._get_host_config_path(spec) + cfg = open(host_config_path, "w") + cfg.write("###################\n".format("#" * 60)) + cfg.write("# Generated host-config - Edit at own risk!\n") + cfg.write("###################\n".format("#" * 60)) + cfg.write("# Copyright (c) 2020, Lawrence Livermore National Security, LLC and\n") + cfg.write("# other CHAI Project Developers. See the top-level LICENSE file for\n") + cfg.write("# details.\n") + cfg.write("#\n") + cfg.write("# SPDX-License-Identifier: (BSD-3-Clause) \n") + cfg.write("###################\n\n".format("#" * 60)) + + cfg.write("#------------------\n".format("-" * 60)) + cfg.write("# SYS_TYPE: {0}\n".format(sys_type)) + cfg.write("# Compiler Spec: {0}\n".format(spec.compiler)) + cfg.write("# CMake executable path: %s\n" % cmake_exe) + cfg.write("#------------------\n\n".format("-" * 60)) + + ####################### + # Compiler Settings + ####################### + + cfg.write("#------------------\n".format("-" * 60)) + cfg.write("# Compilers\n") + cfg.write("#------------------\n\n".format("-" * 60)) + cfg.write(cmake_cache_entry("CMAKE_C_COMPILER", c_compiler)) + cfg.write(cmake_cache_entry("CMAKE_CXX_COMPILER", cpp_compiler)) + + # use global spack compiler flags + cflags = ' '.join(spec.compiler_flags['cflags']) + if cflags: + cfg.write(cmake_cache_entry("CMAKE_C_FLAGS", cflags)) + + cxxflags = ' '.join(spec.compiler_flags['cxxflags']) + if cxxflags: + cfg.write(cmake_cache_entry("CMAKE_CXX_FLAGS", cxxflags)) + + if ("gfortran" in f_compiler) and ("clang" in cpp_compiler): + libdir = pjoin(os.path.dirname( + os.path.dirname(f_compiler)), "lib") + flags = "" + for _libpath in [libdir, libdir + "64"]: + if os.path.exists(_libpath): + flags += " -Wl,-rpath,{0}".format(_libpath) + description = ("Adds a missing libstdc++ rpath") + if flags: + cfg.write(cmake_cache_entry("BLT_EXE_LINKER_FLAGS", flags, + description)) + + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + gcc_name_regex = re.compile(".*gcc-name.*") + + using_toolchain = list(filter(gcc_toolchain_regex.match, spec.compiler_flags['cxxflags'])) + using_gcc_name = list(filter(gcc_name_regex.match, spec.compiler_flags['cxxflags'])) + compilers_using_toolchain = ["pgi", "xl", "icpc"] + if any(compiler in cpp_compiler for compiler in compilers_using_toolchain): + if using_toolchain or using_gcc_name: + cfg.write(cmake_cache_entry("BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE", + "/usr/tce/packages/gcc/gcc-4.9.3/lib64;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64/gcc/powerpc64le-unknown-linux-gnu/4.9.3;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64;/usr/tce/packages/gcc/gcc-4.9.3/lib64/gcc/x86_64-unknown-linux-gnu/4.9.3")) + + if "+cuda" in spec: + cfg.write("#------------------{0}\n".format("-" * 60)) + cfg.write("# Cuda\n") + cfg.write("#------------------{0}\n\n".format("-" * 60)) + + cfg.write(cmake_cache_option("ENABLE_CUDA", True)) + + cudatoolkitdir = spec['cuda'].prefix + cfg.write(cmake_cache_entry("CUDA_TOOLKIT_ROOT_DIR", + cudatoolkitdir)) + cudacompiler = "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" + cfg.write(cmake_cache_entry("CMAKE_CUDA_COMPILER", + cudacompiler)) + + if not spec.satisfies('cuda_arch=none'): + cuda_arch = spec.variants['cuda_arch'].value + cuda_arch = "sm_{0}".format(cuda_arch[0]) + flag = '-arch {0}'.format(cuda_arch) + cfg.write(cmake_cache_string("CUDA_ARCH",cuda_arch)) + cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS", flag)) + + else: + cfg.write(cmake_cache_option("ENABLE_CUDA", False)) + + if "+raja" in spec: + cfg.write("#------------------{0}\n".format("-" * 60)) + cfg.write("# RAJA\n") + cfg.write("#------------------{0}\n\n".format("-" * 60)) + + cfg.write(cmake_cache_option("CHAI_ENABLE_RAJA_PLUGIN", True)) + raja_dir = spec['raja'].prefix + cfg.write(cmake_cache_entry("RAJA_DIR", raja_dir)) + else: + cfg.write(cmake_cache_option("CHAI_ENABLE_RAJA_PLUGIN", False)) + + # shared vs static libs + cfg.write(cmake_cache_option("BUILD_SHARED_LIBS","+shared" in spec)) + + cfg.write(cmake_cache_entry("umpire_DIR",spec['umpire'].prefix)) + camp_conf_path = spec['camp'].prefix + "/lib/cmake/camp" + cfg.write(cmake_cache_entry("camp_DIR",camp_conf_path)) + + cfg.write(cmake_cache_option("ENABLE_BENCHMARKS", 'tests=benchmarks' in spec)) + cfg.write(cmake_cache_option("ENABLE_TESTS", not 'tests=none' in spec)) + + ####################### + # Close and save + ####################### + cfg.write("\n") + cfg.close() + + print("OUT: host-config file {0}".format(host_config_path)) + + def cmake_args(self): + spec = self.spec + host_config_path = self._get_host_config_path(spec) + + options = [] + options.extend(['-C', host_config_path]) + + return options diff --git a/src/chai/ActiveResourceManager.hpp b/src/chai/ActiveResourceManager.hpp new file mode 100644 index 00000000..9512aebd --- /dev/null +++ b/src/chai/ActiveResourceManager.hpp @@ -0,0 +1,90 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-23, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#ifndef CHAI_ActiveResourceManager_HPP +#define CHAI_ActiveResourceManager_HPP + +#include "camp/resource.hpp" + +#include +#include + +namespace chai +{ + +/*! + * \Class to store list of Resource pointers. Holds data on the stack + * until a certain threshold, then uses heap memory. + */ +class ActiveResourceManager { + + /*! + * Size of array on the stack. + */ + static constexpr int BASE_SIZE = 16; + + /*! + * Base array on the stack. + */ + std::array m_res_base; + + /*! + * Heap container for extra resources if more than BASE_SIZE pushed. + */ + std::vector m_res_overflow; + + /*! + * Current number of active resources in the list. + */ + int m_size = 0; + +public: + /*! + * Default constructor. + */ + ActiveResourceManager() = default; + + /*! + * Return current size of the resource list. + * + * \return The current size of the resource list. + */ + int size(); + + /*! + * Push a new resource onto the list. + * + * \param res The resource to add. + */ + void push_back(camp::resources::Resource* res); + + /*! + * Clear all values on the heap and set m_size to 0. + */ + void clear(); + + /*! + * Check if empty. + * + * \return Whether or not the resource manager is empty. + */ + bool is_empty() const; + + /*! + * Get resource at given index. + * + * \param i The index at which to get a resource. + * + * \return The resource at the given index. + */ + camp::resources::Resource* operator [](int i) const; +}; + +} // end of namespace chai + +#include "chai/ActiveResourceManager.inl" + +#endif // CHAI_ActiveResourceManager_HPP diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl new file mode 100644 index 00000000..22c9a522 --- /dev/null +++ b/src/chai/ActiveResourceManager.inl @@ -0,0 +1,59 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-23, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#ifndef CHAI_ActiveResourceManager_INL +#define CHAI_ActiveResourceManager_INL + +#include "ActiveResourceManager.hpp" + +namespace chai +{ + +CHAI_INLINE +int ActiveResourceManager::size() { + return m_size; +} + + +CHAI_INLINE +void ActiveResourceManager::push_back(camp::resources::Resource * res) { + if (m_size < BASE_SIZE) { + m_res_base[m_size] = res; + } + else { + m_res_overflow.push_back(res); + } + + m_size++; +} + + +CHAI_INLINE +void ActiveResourceManager::clear() { + m_res_overflow.clear(); + m_size = 0; +} + + +CHAI_INLINE +bool ActiveResourceManager::is_empty() const { + return m_size == 0; +} + + +CHAI_INLINE +camp::resources::Resource* ActiveResourceManager::operator[](int i) const { + if (i < 0 || i >= m_size) { + return nullptr; + } + else { + return i < BASE_SIZE ? m_res_base[i] : m_res_overflow[i - BASE_SIZE]; + } +} + +} //end of namespace chai + +#endif // CHAI_ActiveResourceManager_INL diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 64c55037..2a732ccb 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -37,6 +37,7 @@ ArrayManager::ArrayManager() : { m_pointer_map.clear(); m_current_execution_space = NONE; + m_current_resource = nullptr; m_default_allocation_space = CPU; m_allocators[CPU] = @@ -166,6 +167,11 @@ void * ArrayManager::frontOfAllocation(void * pointer) { } void ArrayManager::setExecutionSpace(ExecutionSpace space) +{ + setExecutionSpace(space, nullptr); +} + +void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Resource* resource) { #if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) if (isGPUSimMode() && chai::NONE != space) { @@ -186,11 +192,20 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) #endif m_current_execution_space = space; + m_current_resource = resource; } void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, ExecutionSpace space) +{ + return move(pointer, pointer_record, nullptr, space); +} + +void* ArrayManager::move(void* pointer, + PointerRecord* pointer_record, + camp::resources::Resource* resource, + ExecutionSpace space) { // Check for default arg (NONE) if (space == NONE) { @@ -201,7 +216,7 @@ void* ArrayManager::move(void* pointer, return pointer; } - move(pointer_record, space); + move(pointer_record, space, resource); return pointer_record->m_pointers[space]; } @@ -211,6 +226,11 @@ ExecutionSpace ArrayManager::getExecutionSpace() return m_current_execution_space; } +camp::resources::Resource* ArrayManager::getResource() +{ + return m_current_resource; +} + void ArrayManager::registerTouch(PointerRecord* pointer_record) { registerTouch(pointer_record, m_current_execution_space); @@ -243,14 +263,17 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) /* Not all GPU platform runtimes (notably HIP), will give you asynchronous copies to the device by default, so we leverage * umpire's API for asynchronous copies using camp resources in this method, based off of the CHAI destination space * */ -static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager & manager, ExecutionSpace dst_space, ExecutionSpace src_space) { +static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager & manager, ExecutionSpace dst_space, ExecutionSpace src_space, camp::resources::Resource* res = nullptr) { #ifdef CHAI_ENABLE_CUDA - camp::resources::Resource device_resource(camp::resources::Cuda::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Cuda::get_default(); #elif defined(CHAI_ENABLE_HIP) - camp::resources::Resource device_resource(camp::resources::Hip::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Hip::get_default(); #else - camp::resources::Resource device_resource(camp::resources::Host::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Host::get_default(); #endif camp::resources::Resource host_resource(camp::resources::Host::get_default()); @@ -268,6 +291,13 @@ static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager } void ArrayManager::move(PointerRecord* record, ExecutionSpace space) +{ + move(record, space, nullptr); +} + +void ArrayManager::move(PointerRecord* record, + ExecutionSpace space, + camp::resources::Resource* resource) { if (space == NONE) { return; @@ -275,7 +305,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) callback(record, ACTION_CAPTURED, space); - if (space == record->m_last_space) { + if (space == record->m_last_space && !record->transfer_pending) { return; } @@ -313,7 +343,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) } else if (dst_pointer != src_pointer) { // Exclude the copy if src and dst are the same (can happen for PINNED memory) { - chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space); + chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space, resource); } callback(record, ACTION_MOVE, space); diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 9041ee71..cabd4276 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -22,6 +22,8 @@ #include "umpire/Allocator.hpp" #include "umpire/util/MemoryMap.hpp" +#include "camp/resource.hpp" + #if defined(CHAI_ENABLE_CUDA) #include #endif @@ -168,22 +170,55 @@ class ArrayManager */ CHAISHAREDDLL_API void setExecutionSpace(ExecutionSpace space); + /*! + * \brief Set the current execution space. + * + * \param space The space to set as current. + * \param resource The resource to set as current. + */ + CHAISHAREDDLL_API void setExecutionSpace(ExecutionSpace space, camp::resources::Resource *resource); + /*! * \brief Get the current execution space. * - * \return The current execution space.jo + * \return The current execution space. */ CHAISHAREDDLL_API ExecutionSpace getExecutionSpace(); /*! - * \brief Move data in pointer to the current execution space. + * \brief Get the current resource. + * + * \return The current resource. + */ + CHAISHAREDDLL_API camp::resources::Resource* getResource(); + + /*! + * \brief Move data in pointer to the given execution space. * * \param pointer Pointer to data in any execution space. + * \param pointer_record The pointer record. + * \param space The execution space to which to move the data. + * * \return Pointer to data in the current execution space. */ CHAISHAREDDLL_API void* move(void* pointer, PointerRecord* pointer_record, - ExecutionSpace = NONE); + ExecutionSpace space = NONE); + + /*! + * \brief Move data in pointer to the given execution space. + * + * \param pointer Pointer to data in any execution space. + * \param pointer_record The pointer record. + * \param resource The resource to use to move the data. + * \param space The execution space to which to move the data. + * + * \return Pointer to data in the current execution space. + */ + CHAISHAREDDLL_API void* move(void* pointer, + PointerRecord* pointer_record, + camp::resources::Resource* resource, + ExecutionSpace space = NONE); /*! * \brief Register a touch of the pointer in the current execution space. @@ -444,14 +479,23 @@ class ArrayManager /*! - * \brief Move data in PointerRecord to the corresponding ExecutionSpace. + * \brief Move data in the pointer record to the corresponding execution space. * - * \param record - * \param space + * \param record The pointer record. + * \param space The execution space to which to move the data. */ void move(PointerRecord* record, ExecutionSpace space); - - /*! + + /*! + * \brief Move data in the pointer record to the corresponding execution space. + * + * \param record The pointer record. + * \param space The execution space to which to move the data. + * \param resource The resource to use to move the data. + */ + void move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource); + + /*! * \brief Execute a user callback if callbacks are active * * \param record The pointer record containing the callback @@ -480,6 +524,11 @@ class ArrayManager */ static thread_local ExecutionSpace m_current_execution_space; + /*! + * Current resource. + */ + camp::resources::Resource* m_current_resource; + /** * Default space for new allocations. */ diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 2285c544..ea540930 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -12,6 +12,8 @@ configure_file( ${PROJECT_BINARY_DIR}/include/chai/config.hpp) set (chai_headers + ActiveResourceManager.hpp + ActiveResourceManager.inl ArrayManager.hpp ArrayManager.inl ChaiMacros.hpp @@ -32,7 +34,8 @@ set (chai_sources ArrayManager.cpp) set (chai_depends - umpire) + umpire + camp) if (CHAI_ENABLE_CUDA) set (chai_depends diff --git a/src/chai/ChaiMacros.hpp b/src/chai/ChaiMacros.hpp index 1d62ba84..e25ccec7 100644 --- a/src/chai/ChaiMacros.hpp +++ b/src/chai/ChaiMacros.hpp @@ -31,6 +31,10 @@ #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice #define gpuMemcpyDefault cudaMemcpyDefault +#define gpuDeviceProp_t cudaDeviceProp +#define gpuGetDevice cudaGetDevice +#define gpuGetDeviceProperties cudaGetDeviceProperties + // NOTE: Cannot have if defined(__HIPCC__) in the condition below, since __HIPCC__ comes from the included header hip_runtime below. #elif defined(CHAI_ENABLE_HIP) @@ -48,6 +52,11 @@ #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice #define gpuMemcpyDefault hipMemcpyDefault +#define gpuDeviceProp_t hipDeviceProp_t +#define gpuGetDevice hipGetDevice +#define gpuGetDeviceProperties hipGetDeviceProperties + + #else #define CHAI_HOST diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index bdc56492..17f44659 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -8,6 +8,7 @@ #define CHAI_ExecutionSpaces_HPP #include "chai/config.hpp" +#include "camp/resource.hpp" namespace chai { @@ -44,6 +45,26 @@ enum ExecutionSpace { #endif }; + +inline bool operator==(const ExecutionSpace& s, + const camp::resources::Platform& p) { + if (s == chai::CPU && p == camp::resources::Platform::host) { + return true; + } +#if defined(CHAI_ENABLE_CUDA) + else if (s == chai::GPU && p == camp::resources::Platform::cuda) { + return true; + } +#elif defined(CHAI_ENABLE_HIP) + else if (s == chai::GPU && p == camp::resources::Platform::hip) { + return true; + } +#endif + else { + return false; + } +} + } // end of namespace chai #endif // CHAI_ExecutionSpaces_HPP diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 9e402273..c3388969 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -161,9 +161,34 @@ class ManagedArray : public CHAICopyable */ CHAI_HOST void registerTouch(ExecutionSpace space); - CHAI_HOST void move(ExecutionSpace space=NONE, - bool registerTouch=!std::is_const::value) const; + /*! + * \brief Move the underlying data to the given execution space using the given resource. + * + * \param space The space to which to move the underlying data. + * \param resource The resource to use to move the underlying data. + * \param registerTouch Whether to mark the data as touched in the given space. + */ + CHAI_HOST void move(ExecutionSpace space, + camp::resources::Resource* resource, + bool registerTouch = !std::is_const::value) const; + /*! + * \brief Move the underlying data to the given execution space. + * + * \param space The space to which to move the underlying data. + * \param registerTouch Whether to mark the data as touched in the given space. + */ + CHAI_HOST void move(ExecutionSpace space = NONE, + bool registerTouch = !std::is_const::value) const; + + /*! + * \brief Get a slice of the ManagedArray. + * + * \param begin The start of the slice. + * \param elems The number of elements in the slice (-1 means use all remaining elements) + * + * \return A slice of the ManagedArray. + */ CHAI_HOST_DEVICE ManagedArray slice(size_t begin, size_t elems=(size_t)-1) const; /*! diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index b7e78b44..6eecda27 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -115,7 +115,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): if (m_pointer_record && !m_is_slice) { m_size = m_pointer_record->m_size; } - move(m_resource_manager->getExecutionSpace()); + move(m_resource_manager->getExecutionSpace(), m_resource_manager->getResource()); } #endif } @@ -367,41 +367,68 @@ template CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space, bool registerTouch) const +{ + move(space, nullptr, registerTouch); +} + +template +CHAI_INLINE +CHAI_HOST +void ManagedArray::move(ExecutionSpace space, + camp::resources::Resource* resource, + bool registerTouch) const { if (m_pointer_record != &ArrayManager::s_null_record) { - ExecutionSpace prev_space = m_pointer_record->m_last_space; - if (prev_space == CPU || prev_space == NONE) { - /// Move nested ManagedArrays first, so they are working with a valid m_active_pointer for the host, - // and so the meta data associated with them are updated before we move the other array down. - moveInnerImpl(); - } - CHAI_LOG(Debug, "Moving " << m_active_pointer); - m_active_base_pointer = static_cast(m_resource_manager->move((void *)m_active_base_pointer, m_pointer_record, space)); - m_active_pointer = m_active_base_pointer + m_offset; + ExecutionSpace prev_space = m_pointer_record->m_last_space; + + if (prev_space == CPU || prev_space == NONE) { + // Move nested ManagedArrays first, so they are working with a valid + // m_active_pointer for the host, and so the meta data associated with + // them are updated before we move the other array down. + moveInnerImpl(); + } + +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + if (resource != nullptr && + space == GPU && + m_pointer_record->m_last_resource != resource) { + m_pointer_record->m_res_manager.push_back(resource); + } +#endif + + CHAI_LOG(Debug, "Moving " << m_active_pointer); + m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, resource, space)); + m_active_pointer = m_active_base_pointer + m_offset; + CHAI_LOG(Debug, "Moved to " << m_active_pointer); - CHAI_LOG(Debug, "Moved to " << m_active_pointer); #if defined(CHAI_ENABLE_UM) if (m_pointer_record->m_last_space == UM) { - // just because we were allocated in UM doesn't mean our CHAICopyable array values were - moveInnerImpl(); + // Just because we were allocated in UM doesn't mean our CHAICopyable + // array values were + moveInnerImpl(); } else #endif #if defined(CHAI_ENABLE_PINNED) if (m_pointer_record->m_last_space == PINNED) { - // just because we were allocated in PINNED doesn't mean our CHAICopyable array values were - moveInnerImpl(); - } else + // Just because we were allocated in PINNED doesn't mean our CHAICopyable + // array values were + moveInnerImpl(); + } else #endif - if (registerTouch) { - CHAI_LOG(Debug, "T is non-const, registering touch of pointer" << m_active_pointer); - m_resource_manager->registerTouch(m_pointer_record, space); - } - if (space != GPU && prev_space == GPU) { - /// Move nested ManagedArrays after the move, so they are working with a valid m_active_pointer for the host, - // and so the meta data associated with them are updated with live GPU data - moveInnerImpl(); - } - } + if (registerTouch) { + CHAI_LOG(Debug, "Registering touch of pointer " << m_active_pointer); + m_resource_manager->registerTouch(m_pointer_record, space); + } + +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) + if (space != GPU && prev_space == GPU) { + // Move nested ManagedArrays after the move, so they are working with a + // valid m_active_pointer for the host, and so the meta data associated + // with them are updated with live GPU data + moveInnerImpl(); + } +#endif + } } template diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index e46ea899..272b9241 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -7,9 +7,12 @@ #ifndef CHAI_PointerRecord_HPP #define CHAI_PointerRecord_HPP +#include "chai/ActiveResourceManager.hpp" #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" +#include "camp/resource.hpp" + #include #include @@ -45,7 +48,6 @@ struct PointerRecord { */ bool m_owned[NUM_EXECUTION_SPACES]; - /*! * User defined callback triggered on memory operations. * @@ -54,11 +56,33 @@ struct PointerRecord { */ UserCallback m_user_callback; + /*! + * Array holding Umpire allocator IDs in each execution space. + */ int m_allocators[NUM_EXECUTION_SPACES]; + /*! + * Whether or not a transfer is pending. + */ + bool transfer_pending{false}; + + /*! + * An event that can be used to control asynchronous flow. + */ + camp::resources::Event m_event{}; + + /*! + * Last resource used by this array. + */ + camp::resources::Resource* m_last_resource{nullptr}; + + /*! + * The resource manager. + */ + ActiveResourceManager m_res_manager; + /*! * \brief Default constructor - * */ PointerRecord() : m_size(0), m_last_space(NONE) { m_user_callback = [] (const PointerRecord*, Action, ExecutionSpace) {}; diff --git a/src/util/forall.hpp b/src/util/forall.hpp index c90b944d..4f66786a 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -10,6 +10,7 @@ #include "chai/ArrayManager.hpp" #include "chai/ExecutionSpaces.hpp" #include "chai/config.hpp" +#include "camp/resource.hpp" #if defined(CHAI_ENABLE_UM) #if !defined(CHAI_THIN_GPU_ALLOCATE) @@ -54,6 +55,24 @@ void forall(sequential, int begin, int end, LOOP_BODY body) rm->setExecutionSpace(chai::NONE); } +template +camp::resources::Event forall_host(camp::resources::Resource* dev, int begin, int end, LOOP_BODY body) +{ + chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + +#if defined(CHAI_ENABLE_UM) + cudaDeviceSynchronize(); +#endif + + rm->setExecutionSpace(chai::CPU, dev); + + forall_kernel_cpu(begin, end, body); + + rm->setExecutionSpace(chai::NONE); + return dev->get_event(); +} + + #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) template @@ -123,6 +142,41 @@ void forall(gpu, int begin, int end, LOOP_BODY&& body) rm->setExecutionSpace(chai::NONE); } +template +camp::resources::Event forall_gpu(camp::resources::Resource* dev, int begin, int end, LOOP_BODY&& body) +{ + chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + + rm->setExecutionSpace(chai::GPU, dev); + + size_t blockSize = 32; + size_t gridSize = (end - begin + blockSize - 1) / blockSize; + +#if defined(CHAI_ENABLE_CUDA) + auto cuda = dev->get(); + forall_kernel_gpu<<>>(begin, end - begin, body); +#elif defined(CHAI_ENABLE_HIP) + auto hip = dev->get(); + hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,hip.get_stream(), + begin, end - begin, body); #endif + + rm->setExecutionSpace(chai::NONE); + return dev->get_event(); +} +#endif // if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + +template +camp::resources::Event forall(camp::resources::Resource *res, int begin, int end, LOOP_BODY&& body) +{ + auto platform = res->get_platform(); + switch(platform) { + case camp::resources::Platform::cuda: + case camp::resources::Platform::hip: + return forall_gpu(res, begin, end, body); + default: + return forall_host(res, begin, end, body); + } +} #endif // CHAI_forall_HPP diff --git a/src/util/gpu_clock.hpp b/src/util/gpu_clock.hpp new file mode 100644 index 00000000..bffa8f86 --- /dev/null +++ b/src/util/gpu_clock.hpp @@ -0,0 +1,40 @@ +#ifndef GPU_CLOCK_HPP +#define GPU_CLOCK_HPP + + +inline __host__ __device__ void +gpu_time_wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + //TODO: Generalize this... + int gpu_device = 0; + gpuDeviceProp_t deviceProp; + gpuGetDevice(&gpu_device); + gpuGetDeviceProperties(&deviceProp, gpu_device); + if (deviceProp.concurrentKernels == 0) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" gpu kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + +#endif // GPU_CLOCK_HPP diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index c8338b0c..d50a81d7 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -25,6 +25,21 @@ blt_add_test( NAME managed_array_test COMMAND managed_array_tests) +if (ENABLE_CUDA) + blt_add_executable( + NAME managed_array_resource_tests + SOURCES managed_array_resource_tests.cpp + DEPENDS_ON ${chai_integration_test_depends}) + + target_include_directories( + managed_array_resource_tests + PUBLIC ${PROJECT_BINARY_DIR}/include) + + blt_add_test( + NAME managed_array_resource_test + COMMAND managed_array_resource_tests) +endif() + if (CHAI_ENABLE_MANAGED_PTR) blt_add_executable( NAME managed_ptr_tests diff --git a/tests/integration/managed_array_resource_tests.cpp b/tests/integration/managed_array_resource_tests.cpp new file mode 100644 index 00000000..3fa86ceb --- /dev/null +++ b/tests/integration/managed_array_resource_tests.cpp @@ -0,0 +1,170 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#include "gtest/gtest.h" + +#define GPU_TEST(X, Y) \ + static void gpu_test_##X##Y(); \ + TEST(X, Y) { gpu_test_##X##Y(); } \ + static void gpu_test_##X##Y() + +#include "../src/util/forall.hpp" +#include "../src/util/gpu_clock.hpp" + +#include "chai/ManagedArray.hpp" +#include "chai/config.hpp" + +#ifdef CHAI_ENABLE_CUDA +GPU_TEST(ManagedArray, Simple) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = i; + }); + + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = array[i] * 2.0; + }); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); + + array.free(); +} + +GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = i; + }); + + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = array[i] * 2.0; + }); + + array.move(chai::CPU, &device); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); +} + +GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = i; + }); + + array.move(chai::GPU, &device); + + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = array[i] * 2.0; + }); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); + + array.free(); +} + +GPU_TEST(ManagedArray, MultiStreamDepends) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + int clockrate{get_clockrate()}; + + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + + chai::ManagedArray array1(ARRAY_SIZE); + chai::ManagedArray array2(ARRAY_SIZE); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] = i; + gpu_time_wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array2[i] = -1; + gpu_time_wait_for(20, clockrate); + }); + + dev1.wait_for(&e2); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] *= array2[i]; + gpu_time_wait_for(10, clockrate); + }); + + array1.move(chai::CPU, &dev1); + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array1[i], i*-1.0); + }); + + array1.free(); + array2.free(); +} + +GPU_TEST(ManagedArray, MultiStreamSingleArray) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + int clockrate{get_clockrate()}; + + chai::ManagedArray array1(ARRAY_SIZE); + + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 0) { + array1[i] = i; + gpu_time_wait_for(10, clockrate); + } + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 1) { + gpu_time_wait_for(20, clockrate); + array1[i] = i; + } + }); + + array1.move(chai::CPU, &dev1); + + camp::resources::Resource host{camp::resources::Host{}}; + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array1[i], (double)i); + }); +} +#endif //#ifdef CHAI_ENABLE_CUDA diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 49858db9..c8a8aea7 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -677,7 +677,9 @@ GPU_TEST(ManagedArray, PodTestGPU) TEST(ManagedArray, ExternalConstructorUnowned) { - float* data = static_cast(std::malloc(100 * sizeof(float))); + // The CPU space could be host memory or host pinned memory + auto allocator = chai::ArrayManager::getInstance()->getAllocator(chai::CPU); + float* data = static_cast(allocator.allocate(100 * sizeof(float))); for (int i = 0; i < 100; i++) { data[i] = 1.0f * i; @@ -686,7 +688,9 @@ TEST(ManagedArray, ExternalConstructorUnowned) chai::ManagedArray array = chai::makeManagedArray(data, 100, chai::CPU, false); - forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); + forall(sequential(), 0, 100, [=] (int i) { + ASSERT_EQ(data[i], array[i]); + }); array.free(); @@ -694,7 +698,7 @@ TEST(ManagedArray, ExternalConstructorUnowned) ASSERT_EQ(data[i], 1.0f * i); } - std::free(data); + allocator.deallocate(data); assert_empty_map(true); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index c453bc67..704d1884 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -4,12 +4,28 @@ # # SPDX-License-Identifier: BSD-3-Clause ############################################################################## + set (chai_unit_test_depends chai umpire gtest) blt_list_append(TO chai_unit_test_depends ELEMENTS cuda IF ${CHAI_ENABLE_CUDA}) blt_list_append(TO chai_unit_test_depends ELEMENTS blt::hip IF ${CHAI_ENABLE_HIP}) +# ExecutionSpace tests +blt_add_executable( + NAME execution_space_unit_test + SOURCES execution_space_unit_tests.cpp + DEPENDS_ON ${chai_unit_test_depends}) + +target_include_directories( + execution_space_unit_test + PUBLIC ${PROJECT_BINARY_DIR}/include) + +blt_add_test( + NAME execution_space_unit_test + COMMAND execution_space_unit_test) + +# ManagedArray tests blt_add_executable( NAME managed_array_unit_tests SOURCES managed_array_unit_tests.cpp diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp new file mode 100644 index 00000000..384edd49 --- /dev/null +++ b/tests/unit/execution_space_unit_tests.cpp @@ -0,0 +1,83 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- + +#include "gtest/gtest.h" + +#include "chai/config.hpp" +#include "chai/ExecutionSpaces.hpp" + +TEST(ExecutionSpace, Platforms) +{ + ASSERT_TRUE(chai::CPU == camp::resources::Platform::host); + ASSERT_FALSE(chai::CPU == camp::resources::Platform::undefined); +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) +#if defined(CHAI_ENABLE_CUDA) + ASSERT_TRUE(chai::GPU == camp::resources::Platform::cuda); +#else + ASSERT_TRUE(chai::GPU == camp::resources::Platform::hip); +#endif + ASSERT_FALSE(chai::GPU == camp::resources::Platform::undefined); +#endif +} + +TEST(ExecutionSpace, Host) +{ + camp::resources::Resource res{camp::resources::Host()}; + ASSERT_TRUE( chai::CPU == res.get().get_platform() ); +} + +#if defined(CHAI_ENABLE_CUDA) +TEST(ExecutionSpace, Cuda) +{ + camp::resources::Resource res{camp::resources::Cuda()}; + ASSERT_TRUE( chai::GPU == res.get().get_platform() ); +} +#endif // #if defined(CHAI_ENABLE_CUDA) + +#if defined(CHAI_ENABLE_HIP) +TEST(ExecutionSpace, Hip) +{ + camp::resources::Resource res{camp::resources::Hip()}; + ASSERT_TRUE( chai::GPU == res.get().get_platform() ); +} +#endif // #if defined(CHAI_ENABLE_HIP)