From 808172d9c1816176bf2b5101f263c4c905e896d4 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 25 Sep 2019 11:30:12 -0700 Subject: [PATCH 001/118] adding camp submodule --- .gitmodules | 3 +++ src/tpl/camp | 1 + 2 files changed, 4 insertions(+) create mode 160000 src/tpl/camp diff --git a/.gitmodules b/.gitmodules index 8564a4ae..28d0beb3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "src/tpl/umpire"] path = src/tpl/umpire url = https://github.com/LLNL/Umpire.git +[submodule "src/tpl/camp"] + path = src/tpl/camp + url = https://github.com/llnl/camp diff --git a/src/tpl/camp b/src/tpl/camp new file mode 160000 index 00000000..d0c4c754 --- /dev/null +++ b/src/tpl/camp @@ -0,0 +1 @@ +Subproject commit d0c4c754ba7fbcf2c864b57afc3fac9c1d310023 From ee604ad2618cebde5388ea9ad15a6722ea1b5f4b Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 25 Sep 2019 14:01:20 -0700 Subject: [PATCH 002/118] Switching camp branch to device, cmake register camp lib --- cmake/thirdparty/SetupChaiThirdparty.cmake | 11 +++++++++++ src/tpl/camp | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 7c50a86e..66d28162 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -52,3 +52,14 @@ if (DEFINED umpire_DIR) else () add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/umpire) endif() + +if (DEFINED camp_DIR) + find_package(camp REQUIRED) + + blt_register_library( + NAME camp + INCLUDES ${CAMP_INCLUDE_DIRS} + LIBRARIES camp) +else () + add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/camp) +endif() diff --git a/src/tpl/camp b/src/tpl/camp index d0c4c754..9569b20e 160000 --- a/src/tpl/camp +++ b/src/tpl/camp @@ -1 +1 @@ -Subproject commit d0c4c754ba7fbcf2c864b57afc3fac9c1d310023 +Subproject commit 9569b20e20a8fa2d5cb7006095c7435d198848f1 From 756b5dbbb64398dfddef138c03f6a44be2d5ca71 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 26 Sep 2019 15:06:01 -0700 Subject: [PATCH 003/118] Setting up example context test file --- examples/CMakeLists.txt | 5 ++++ examples/context.cpp | 21 ++++++++++++++ src/chai/CMakeLists.txt | 3 +- src/chai/ManagedArray.inl | 1 + src/util/forall.hpp | 61 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 examples/context.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5abdc2e3..604242a7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -55,6 +55,11 @@ if (ENABLE_HIP) hip) endif() +blt_add_executable( + NAME context-integration.exe + SOURCES context.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( NAME chai-umpire-example.exe SOURCES chai-umpire-allocators.cpp diff --git a/examples/context.cpp b/examples/context.cpp new file mode 100644 index 00000000..3ea6971b --- /dev/null +++ b/examples/context.cpp @@ -0,0 +1,21 @@ +#include "camp/device.hpp" +#include "../src/util/forall.hpp" +#include "chai/ManagedArray.hpp" + +int main() +{ + std::cout << "Chai Context Implementation\n"; + + camp::devices::Context res_context{camp::devices::Host()}; + + chai::ManagedArray array(10); + + std::cout << "defining lambda" << std::endl; + auto lambda = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; + + std::cout << "calling forall with context" << std::endl; + forall(&res_context, 0, 10, lambda); + + array.free(); + return 0; +} diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 7821fa3f..b5d4fefa 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -72,7 +72,8 @@ set (chai_sources ArrayManager.cpp) set (chai_depends - umpire) + umpire + camp) if (ENABLE_CUDA) set (chai_depends diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 18f4db7a..2b8a1af1 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -175,6 +175,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) move(m_resource_manager->getExecutionSpace()); + std::cout<<"Copying ManagedArray"< @@ -61,6 +63,7 @@ struct gpu { template void forall_kernel_cpu(int begin, int end, LOOP_BODY body) { + std::cout<<"for all loop"<setExecutionSpace(chai::NONE); } +template +camp::devices::Event forall_host(camp::devices::Context* dev, int begin, int end, LOOP_BODY body) +{ + chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + +#if defined(CHAI_ENABLE_UM) + cudaDeviceSynchronize(); +#endif + + rm->setExecutionSpace(chai::CPU); + + auto host = dev->get(); + std::cout << "forall kernel cpu call\n"; + forall_kernel_cpu(begin, end, body); + + rm->setExecutionSpace(chai::NONE); + return dev->get_event(); +} + + #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) template @@ -120,6 +143,42 @@ void forall(gpu, int begin, int end, LOOP_BODY&& body) rm->setExecutionSpace(chai::NONE); } -#endif +template +camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, LOOP_BODY&& body) +{ +// chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + +// rm->setExecutionSpace(chai::GPU); + + size_t blockSize = 32; + size_t gridSize = (end - begin + blockSize - 1) / blockSize; + +//#if defined(CHAI_ENABLE_CUDA) + auto cuda = dev->get(); + forall_kernel_gpu<<>>(begin, end - begin, body); +//#elif defined(CHAI_ENABLE_HIP) +// hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,0, +// begin, end - begin, body); +// hipDeviceSynchronize(); +//#endif + +// rm->setExecutionSpace(chai::NONE); + return dev->get_event(); +} +#endif // if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + +template +camp::devices::Event forall(camp::devices::Context *con, int begin, int end, LOOP_BODY&& body) +{ + auto platform = con->get_platform(); + switch(platform) { + case camp::devices::Platform::cuda: + case camp::devices::Platform::hip: + return forall_gpu(con, begin, end, body); + default: + std::cout << "forall host\n"; + return forall_host(con, begin, end, body); + } +} #endif // CHAI_forall_HPP From 71f6f6e541ba7ad2fafb2157918d8974a8ee896d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 27 Sep 2019 09:48:00 -0700 Subject: [PATCH 004/118] Passing context down to ArrayManager::move --- src/chai/ArrayManager.cpp | 71 +++++++++++++++++++++++++++++++++++++++ src/chai/ArrayManager.hpp | 25 +++++++++++++- src/chai/ManagedArray.hpp | 1 + src/chai/ManagedArray.inl | 31 ++++++++++++++++- src/util/forall.hpp | 2 +- 5 files changed, 127 insertions(+), 3 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 54a32176..609f915f 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -119,6 +119,15 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) m_current_execution_space = space; } +void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::devices::Context* context) +{ + CHAI_LOG("ArrayManager", "Setting execution space to " << space); + std::lock_guard lock(m_mutex); + + m_current_execution_space = space; + m_current_context = context; +} + void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, ExecutionSpace space) @@ -136,12 +145,37 @@ void* ArrayManager::move(void* pointer, return pointer_record->m_pointers[space]; } +void* ArrayManager::move(void* pointer, + PointerRecord* pointer_record, + camp::devices::Context* context, + ExecutionSpace space) +{ + // Check for default arg (NONE) + if (space == NONE) { + space = m_current_execution_space; + } + + if (space == NONE) { + return pointer; + } + + move(pointer_record, space, context); + + return pointer_record->m_pointers[space]; +} + ExecutionSpace ArrayManager::getExecutionSpace() { return m_current_execution_space; } +camp::devices::Context* ArrayManager::getContext() +{ + return m_current_context; +} + + void ArrayManager::registerTouch(PointerRecord* pointer_record) { registerTouch(pointer_record, m_current_execution_space); @@ -204,6 +238,43 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) resetTouch(record); } +void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devices::Context* context) +{ + if (space == NONE) { + return; + } + +#if defined(CHAI_ENABLE_UM) + if (record->m_last_space == UM) { + return; + } +#endif + + if (space == record->m_last_space) { + return; + } + + + void* src_pointer = record->m_pointers[record->m_last_space]; + void* dst_pointer = record->m_pointers[space]; + + if (!dst_pointer) { + allocate(record, space); + dst_pointer = record->m_pointers[space]; + } + + if (!record->m_touched[record->m_last_space]) { + auto dev = context->get(); + return; + } else { + callback(record, ACTION_MOVE, space, record->m_size); + std::lock_guard lock(m_mutex); + m_resource_manager.copy(dst_pointer, src_pointer); + } + + resetTouch(record); +} + void ArrayManager::allocate( PointerRecord* pointer_record, diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 82221d24..9db79228 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -53,6 +53,8 @@ #include "umpire/Allocator.hpp" #include "umpire/util/MemoryMap.hpp" +#include "camp/device.hpp" + namespace chai { @@ -100,6 +102,12 @@ class ArrayManager * \param space The space to set as current. */ void setExecutionSpace(ExecutionSpace space); + /*! + * \brief Set the current execution space. + * + * \param space The space to set as current. + */ + void setExecutionSpace(ExecutionSpace space, camp::devices::Context *context); /*! * \brief Get the current execution space. @@ -108,6 +116,9 @@ class ArrayManager */ ExecutionSpace getExecutionSpace(); + + camp::devices::Context* getContext(); + /*! * \brief Move data in pointer to the current execution space. * @@ -117,6 +128,11 @@ class ArrayManager void* move(void* pointer, PointerRecord* pointer_record, ExecutionSpace = NONE); + void* move(void* pointer, + PointerRecord* pointer_record, + camp::devices::Context* context, + ExecutionSpace = NONE); + /*! * \brief Register a touch of the pointer in the current execution space. @@ -302,6 +318,7 @@ class ArrayManager * \param space */ void move(PointerRecord* record, ExecutionSpace space); + void move(PointerRecord* record, ExecutionSpace space, camp::devices::Context* context); /*! * \brief Execute a user callback if callbacks are active @@ -321,10 +338,16 @@ class ArrayManager } /*! - * Current execution space. + * current execution space. */ ExecutionSpace m_current_execution_space; + /*! + * current context. + */ + camp::devices::Context* m_current_context; + + /** * Default space for new allocations */ diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 3bcabdad..6b975463 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -191,6 +191,7 @@ class ManagedArray : public CHAICopyable CHAI_HOST void registerTouch(ExecutionSpace space); CHAI_HOST void move(ExecutionSpace space); + CHAI_HOST void move(ExecutionSpace space, camp::devices::Context* context); CHAI_HOST ManagedArray slice(size_t begin, size_t end); /*! diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 2b8a1af1..7d7d3e33 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -174,7 +174,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): m_is_slice(other.m_is_slice) { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) - move(m_resource_manager->getExecutionSpace()); + move(m_resource_manager->getExecutionSpace(), m_resource_manager->getContext()); std::cout<<"Copying ManagedArray"<::move(ExecutionSpace space) } #endif } +template +CHAI_INLINE +CHAI_HOST +void ManagedArray::move(ExecutionSpace space, camp::devices::Context* context) +{ + ExecutionSpace prev_space = m_pointer_record->m_last_space; + + /* When moving from CPU to GPU we need to move the inner arrays before the outer array. */ + if (prev_space == CPU) { + moveInnerImpl(space); + } + + m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, context, space)); + m_active_pointer = m_active_base_pointer + m_offset; + + if (!std::is_const::value) { + CHAI_LOG("ManagedArray", "T is non-const, registering touch of pointer" << m_active_pointer); + m_resource_manager->registerTouch(m_pointer_record, space); + } + + if (space != NONE) m_pointer_record->m_last_space = space; + + /* When moving from GPU to CPU we need to move the inner arrays after the outer array. */ +#if defined(CHAI_ENABLE_CUDA) + if (prev_space == GPU) { + moveInnerImpl(space); + } +#endif +} template template diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 997dbdef..4e5f005b 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -96,7 +96,7 @@ camp::devices::Event forall_host(camp::devices::Context* dev, int begin, int end cudaDeviceSynchronize(); #endif - rm->setExecutionSpace(chai::CPU); + rm->setExecutionSpace(chai::CPU, dev); auto host = dev->get(); std::cout << "forall kernel cpu call\n"; From c87454cc767df5ec210eeb7c6e24f9a489eef99d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 4 Oct 2019 12:03:59 -0700 Subject: [PATCH 005/118] passing context to umpire copy --- cmake/thirdparty/SetupChaiThirdparty.cmake | 18 ++++++++++-------- src/chai/ArrayManager.cpp | 4 ++-- src/tpl/umpire | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 66d28162..c2e0284b 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -53,13 +53,15 @@ else () add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/umpire) endif() -if (DEFINED camp_DIR) - find_package(camp REQUIRED) +if (NOT TARGET camp) + if (DEFINED camp_DIR) + find_package(camp REQUIRED) - blt_register_library( - NAME camp - INCLUDES ${CAMP_INCLUDE_DIRS} - LIBRARIES camp) -else () - add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/camp) + blt_register_library( + NAME camp + INCLUDES ${CAMP_INCLUDE_DIRS} + LIBRARIES camp) + else () + add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/camp) + endif() endif() diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 609f915f..c456effe 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -264,12 +264,12 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } if (!record->m_touched[record->m_last_space]) { - auto dev = context->get(); +// auto dev = context->get(); return; } else { callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); - m_resource_manager.copy(dst_pointer, src_pointer); + m_resource_manager.copy(dst_pointer, src_pointer, *context); } resetTouch(record); diff --git a/src/tpl/umpire b/src/tpl/umpire index 82482fd7..3998730d 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 82482fd7450ab378db110f06f7e0302112c22c05 +Subproject commit 3998730dbe2c656a2c2133a740b7b322342135ff From a34f1e7075efa78b3c44507ee20ae8ecacd000d6 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 7 Oct 2019 14:01:47 -0700 Subject: [PATCH 006/118] setting exec space in gpu forall --- examples/context.cpp | 14 ++++++++++---- src/util/forall.hpp | 6 +++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 3ea6971b..39d8ab2d 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -6,15 +6,21 @@ int main() { std::cout << "Chai Context Implementation\n"; - camp::devices::Context res_context{camp::devices::Host()}; + camp::devices::Context cuda_context{camp::devices::Cua()}; + camp::devices::Context host_context{camp::devices::Host()}; chai::ManagedArray array(10); std::cout << "defining lambda" << std::endl; - auto lambda = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; + auto lambda_set = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; + auto lambda_check = [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }; - std::cout << "calling forall with context" << std::endl; - forall(&res_context, 0, 10, lambda); + std::cout << "calling forall with cuda context" << std::endl; + auto e = forall(&cuda_context, 0, 10, lambda_set); + + e.wait(); + std::cout << "calling forall with host context" << std::endl; + forall(&host_context, 0, 10, lambda_check); array.free(); return 0; diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 4e5f005b..fe4fb344 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -146,9 +146,9 @@ void forall(gpu, int begin, int end, LOOP_BODY&& body) template camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, LOOP_BODY&& body) { -// chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + chai::ArrayManager* rm = chai::ArrayManager::getInstance(); -// rm->setExecutionSpace(chai::GPU); + rm->setExecutionSpace(chai::GPU, dev); size_t blockSize = 32; size_t gridSize = (end - begin + blockSize - 1) / blockSize; @@ -162,7 +162,7 @@ camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, // hipDeviceSynchronize(); //#endif -// rm->setExecutionSpace(chai::NONE); + rm->setExecutionSpace(chai::NONE); return dev->get_event(); } #endif // if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) From 98a56fcbda2587a718b31d80dda29e46be00048f Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Tue, 8 Oct 2019 11:02:32 -0700 Subject: [PATCH 007/118] loop over multiple arrays --- examples/context.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 39d8ab2d..b4883eb5 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -9,14 +9,25 @@ int main() camp::devices::Context cuda_context{camp::devices::Cua()}; camp::devices::Context host_context{camp::devices::Host()}; - chai::ManagedArray array(10); + std::vector< chai::ManagedArray > arrays(5); + + for (auto array : arrays) { + camp::devices::Context ctx{camp::devices::Cua()}; + auto e = forall(&ctx, 0, 10, + [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); + array.move(chai::CPU, ctx); + } + + for (auto array : arrays) { + camp::devices::Context ctx{camp::devices::Host{}}; + auto e = forall(&ctx, 0,10, [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }); + } std::cout << "defining lambda" << std::endl; auto lambda_set = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; auto lambda_check = [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }; std::cout << "calling forall with cuda context" << std::endl; - auto e = forall(&cuda_context, 0, 10, lambda_set); e.wait(); std::cout << "calling forall with host context" << std::endl; From 877c4cdb42e21c630f3779158d1f20a50951bb8f Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 9 Oct 2019 14:33:53 -0700 Subject: [PATCH 008/118] ExecutionSpace/Platform equivalence operator. --- src/chai/ExecutionSpaces.hpp | 11 ++++ tests/unit/CMakeLists.txt | 24 +++++++ tests/unit/execution_space_unit_tests.cpp | 80 +++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 tests/unit/execution_space_unit_tests.cpp diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index f75ec0ec..08eec673 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -44,6 +44,7 @@ #define CHAI_ExecutionSpaces_HPP #include "chai/config.hpp" +#include "camp/device.hpp" namespace chai { @@ -68,6 +69,16 @@ enum ExecutionSpace { NUM_EXECUTION_SPACES }; +inline bool operator==(const ExecutionSpace& s, const camp::devices::Platform& p) { + if(s == chai::CPU && p == camp::devices::Platform::host) return true; +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + /*! Execution in GPU space */ + if (s == chai::GPU && (p == camp::devices::Platform::cuda || + p == camp::devices::Platform::hip)) return true; +#endif + return false; +} + } // end of namespace chai #endif // CHAI_ExecutionSpaces_HPP diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index fea76c55..4900178c 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -41,6 +41,9 @@ # POSSIBILITY OF SUCH DAMAGE. ####################################################################### +set (execution_space_test_depends + chai umpire gtest) + set (managed_array_test_depends chai umpire gtest) @@ -48,6 +51,9 @@ set (array_manager_test_depends chai umpire gtest) if (ENABLE_CUDA) + set (execution_space_test_depends + ${execution_space_test_depends} + cuda) set (managed_array_test_depends ${managed_array_test_depends} cuda) @@ -56,6 +62,9 @@ if (ENABLE_CUDA) cuda) endif () if (ENABLE_HIP) + set (execution_space_test_depends + ${execution_space_test_depends} + hip) set (managed_array_test_depends ${managed_array_test_depends} hip) @@ -64,6 +73,21 @@ if (ENABLE_HIP) hip) endif () +# ExecutionSpace tests +blt_add_executable( + NAME execution_space_unit_test + SOURCES execution_space_unit_tests.cpp + DEPENDS_ON ${execution_space_test_depends}) + +target_include_directories( + execution_space_unit_test + PUBLIC ${PROJECT_BINARY_DIR}/include) + +blt_add_test( + NAME execution_space_unit_test + COMMAND execution_space_unit_test) + +# ManagedArray tests blt_add_executable( NAME managed_array_unit_tests SOURCES managed_array_unit_tests.cpp diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp new file mode 100644 index 00000000..21649e14 --- /dev/null +++ b/tests/unit/execution_space_unit_tests.cpp @@ -0,0 +1,80 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- + +#include "gtest/gtest.h" + +#include "chai/config.hpp" +#include "chai/ExecutionSpaces.hpp" + +TEST(ExecutionSpace, Platforms) +{ + ASSERT_TRUE(chai::CPU == camp::devices::Platform::host); + ASSERT_FALSE(chai::CPU == camp::devices::Platform::undefined); +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + ASSERT_TRUE(chai::GPU == camp::devices::Platform::cuda); + ASSERT_TRUE(chai::GPU == camp::devices::Platform::hip); + ASSERT_FALSE(chai::GPU == camp::devices::Platform::undefined); +#endif +} + +TEST(ExecutionSpace, Host) +{ + camp::devices::Context ctx{camp::devices::Host()}; + ASSERT_TRUE( chai::CPU == ctx.get().get_platform() ); +} + +#if defined(CHAI_ENABLE_CUDA) +TEST(ExecutionSpace, Cuda) +{ + camp::devices::Context ctx{camp::devices::Cuda()}; + ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); +} +#endif // #if defined(CHAI_ENABLE_CUDA) + +#if defined(CHAI_ENABLE_HIP) +TEST(ExecutionSpace, Hip) +{ + camp::devices::Context ctx{camp::devices::Hip()}; + ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); +} +#endif // #if defined(CHAI_ENABLE_CUDA) From d7f3e44646af66af08b24e28a9141c2871115771 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 10 Oct 2019 11:07:10 -0700 Subject: [PATCH 009/118] Single memory transfer --- examples/context.cpp | 35 +++++++++++++++++------------------ src/chai/ArrayManager.cpp | 24 +++++++++++++++++++++++- src/chai/ManagedArray.inl | 2 +- src/chai/PointerRecord.hpp | 7 +++++++ 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index b4883eb5..072a7411 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -6,33 +6,32 @@ int main() { std::cout << "Chai Context Implementation\n"; - camp::devices::Context cuda_context{camp::devices::Cua()}; - camp::devices::Context host_context{camp::devices::Host()}; + //camp::devices::Context cuda_context{camp::devices::Cuda()}; + //camp::devices::Context host_context{camp::devices::Host()}; - std::vector< chai::ManagedArray > arrays(5); + std::vector< chai::ManagedArray > arrays(1); + + std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { - camp::devices::Context ctx{camp::devices::Cua()}; - auto e = forall(&ctx, 0, 10, - [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); - array.move(chai::CPU, ctx); + auto lambda_set = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; + camp::devices::Context ctx{camp::devices::Cuda()}; + array.allocate(10); + auto e = forall(&ctx, 0, 10, lambda_set); + array.move(chai::CPU, &ctx); } + std::cout << "calling forall with host context" << std::endl; for (auto array : arrays) { + auto lambda_check = [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }; camp::devices::Context ctx{camp::devices::Host{}}; - auto e = forall(&ctx, 0,10, [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }); + auto e = forall(&ctx, 0, 10, lambda_check); } - std::cout << "defining lambda" << std::endl; - auto lambda_set = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; - auto lambda_check = [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }; - - std::cout << "calling forall with cuda context" << std::endl; - - e.wait(); - std::cout << "calling forall with host context" << std::endl; - forall(&host_context, 0, 10, lambda_check); + for (auto array : arrays) { + std::cout<< array[0] << std::endl; + } - array.free(); + for (auto a : arrays) a.free(); return 0; } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index c456effe..6f01afff 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -269,7 +269,29 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } else { callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); - m_resource_manager.copy(dst_pointer, src_pointer, *context); + record->m_last_context->print_platform(); + std::cout << (record->m_last_context == nullptr) << std::endl; + + //if (space == chai::CPU && transfer_pending) { + // context->wait_on(&m_event); + // transfer_pending = false; + //} + camp::devices::Context* ctx; + if (space == chai::CPU){ + ctx = record->m_last_context; + }else{ + ctx = context; + } + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); + if (space == chai::CPU){ + e.wait(); + } + //if (space == chai::CPU && context->get_platform() == camp::devices::Platform::Cuda){ + // transfer_pending = true; + // m_event = e; + //} + + //if (transfer_pending) context->wait_on(&e); } resetTouch(record); diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 7d7d3e33..e39894b4 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -175,7 +175,6 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) move(m_resource_manager->getExecutionSpace(), m_resource_manager->getContext()); - std::cout<<"Copying ManagedArray"<::move(ExecutionSpace space, camp::devices::Context* context } if (space != NONE) m_pointer_record->m_last_space = space; + if (space != NONE) m_pointer_record->m_last_context = context; /* When moving from GPU to CPU we need to move the inner arrays after the outer array. */ #if defined(CHAI_ENABLE_CUDA) diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index da4f344e..d8c4eb67 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -46,6 +46,8 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" +#include "camp/device.hpp" + #include #include @@ -91,6 +93,11 @@ struct PointerRecord { UserCallback m_user_callback; int m_allocators[NUM_EXECUTION_SPACES]; + + + //bool transfer_pending; + //camp::devices::Event m_event; + camp::devices::Context* m_last_context = nullptr; }; } // end of namespace chai From 2c44762b697ea297d9e01a0192dbbed55e6a8625 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 10 Oct 2019 14:39:04 -0700 Subject: [PATCH 010/118] gpu -> cpu transfer, move blocking --- examples/context.cpp | 72 +++++++++++++++++++++++++++++++------- src/chai/ArrayManager.cpp | 22 ++++++------ src/chai/PointerRecord.hpp | 4 +-- 3 files changed, 72 insertions(+), 26 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 072a7411..ffbc335e 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -1,35 +1,83 @@ #include "camp/device.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" +#include int main() { std::cout << "Chai Context Implementation\n"; - //camp::devices::Context cuda_context{camp::devices::Cuda()}; - //camp::devices::Context host_context{camp::devices::Host()}; - std::vector< chai::ManagedArray > arrays(1); + float kernel_time = 20; + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 1000)); +#else + clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); +#endif + + + + const int NUM_ARRAYS = 2; + const int ARRAY_SIZE = 10; + std::vector< chai::ManagedArray > arrays; + std::vector< camp::devices::Context > cuda_ctx; + for (int i = 0; i < NUM_ARRAYS; i++) { + arrays.push_back(chai::ManagedArray(10)); + cuda_ctx.push_back(camp::devices::Context{camp::devices::Cuda()}); + } std::cout << "calling forall with cuda context" << std::endl; - for (auto array : arrays) { - auto lambda_set = [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }; - camp::devices::Context ctx{camp::devices::Cuda()}; - array.allocate(10); - auto e = forall(&ctx, 0, 10, lambda_set); - array.move(chai::CPU, &ctx); + for (int i = 0; i < NUM_ARRAYS; i++) { + auto array = arrays[i]; auto ctx = &cuda_ctx[i]; + + auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { + array[idx] = idx * 2; + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } + }; + + auto e = forall(ctx, 0, ARRAY_SIZE, clock_lambda_1); + //array.move(chai::CPU, ctx); // asynchronous move + } + + + for (int i = 0; i < NUM_ARRAYS; i++) { + auto array = arrays[i]; auto ctx = &cuda_ctx[i]; + array.move(chai::CPU, ctx); // asynchronous move } std::cout << "calling forall with host context" << std::endl; for (auto array : arrays) { - auto lambda_check = [=] CHAI_HOST_DEVICE (int i) { array[i] = 123; }; + auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { + array[idx] *= array[idx]; + }; camp::devices::Context ctx{camp::devices::Host{}}; - auto e = forall(&ctx, 0, 10, lambda_check); + auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } for (auto array : arrays) { - std::cout<< array[0] << std::endl; + for (int i = 0; i < ARRAY_SIZE; i++) { + std::cout<< array[i] << " "; + } + std::cout << std::endl; } for (auto a : arrays) a.free(); diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 6f01afff..a3365201 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -269,13 +269,11 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } else { callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); - record->m_last_context->print_platform(); - std::cout << (record->m_last_context == nullptr) << std::endl; - //if (space == chai::CPU && transfer_pending) { - // context->wait_on(&m_event); - // transfer_pending = false; - //} + if (space == chai::CPU && record->transfer_pending) { + // record->m_last_context->wait_on(&record->m_event); + record->transfer_pending = false; + } camp::devices::Context* ctx; if (space == chai::CPU){ ctx = record->m_last_context; @@ -283,13 +281,13 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic ctx = context; } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); - if (space == chai::CPU){ - e.wait(); - } - //if (space == chai::CPU && context->get_platform() == camp::devices::Platform::Cuda){ - // transfer_pending = true; - // m_event = e; + //if (space == chai::CPU){ + // e.wait(); //} + if (space == chai::CPU && chai::GPU == context->get_platform()){ + record->transfer_pending = true; + record->m_event = e; + } //if (transfer_pending) context->wait_on(&e); } diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index d8c4eb67..07f01d6b 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -95,8 +95,8 @@ struct PointerRecord { int m_allocators[NUM_EXECUTION_SPACES]; - //bool transfer_pending; - //camp::devices::Event m_event; + bool transfer_pending; + camp::devices::Event m_event; camp::devices::Context* m_last_context = nullptr; }; From fac090e53e99c7dd02ac02b1b89e6ef3d82ac2ba Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 10 Oct 2019 16:05:47 -0700 Subject: [PATCH 011/118] async copies, segfault on print --- examples/context.cpp | 21 ++++++++++++--------- src/chai/ArrayManager.cpp | 5 +++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index ffbc335e..7b70acce 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -35,14 +35,16 @@ int main() std::vector< chai::ManagedArray > arrays; std::vector< camp::devices::Context > cuda_ctx; for (int i = 0; i < NUM_ARRAYS; i++) { - arrays.push_back(chai::ManagedArray(10)); + arrays.push_back(chai::ManagedArray(10, chai::GPU)); cuda_ctx.push_back(camp::devices::Context{camp::devices::Cuda()}); } std::cout << "calling forall with cuda context" << std::endl; - for (int i = 0; i < NUM_ARRAYS; i++) { - auto array = arrays[i]; auto ctx = &cuda_ctx[i]; + for (auto array : arrays) { + //for (int i = 0; i < NUM_ARRAYS; i++) { + // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; + camp::devices::Context ctx{camp::devices::Cuda()}; auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] = idx * 2; unsigned int start_clock = (unsigned int) clock(); @@ -54,15 +56,15 @@ int main() } }; - auto e = forall(ctx, 0, ARRAY_SIZE, clock_lambda_1); - //array.move(chai::CPU, ctx); // asynchronous move + auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_1); + array.move(chai::CPU, &ctx); // asynchronous move } - for (int i = 0; i < NUM_ARRAYS; i++) { - auto array = arrays[i]; auto ctx = &cuda_ctx[i]; - array.move(chai::CPU, ctx); // asynchronous move - } + //for (int i = 0; i < NUM_ARRAYS; i++) { + // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; + // array.move(chai::CPU, ctx); // asynchronous move + //} std::cout << "calling forall with host context" << std::endl; for (auto array : arrays) { @@ -73,6 +75,7 @@ int main() auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } + std::cout << "printing..." << std::endl; for (auto array : arrays) { for (int i = 0; i < ARRAY_SIZE; i++) { std::cout<< array[i] << " "; diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index a3365201..fe6e834f 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -69,7 +69,8 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = - new umpire::Allocator(m_resource_manager.getAllocator("HOST")); + //new umpire::Allocator(m_resource_manager.getAllocator("HOST")); + new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) m_allocators[GPU] = new umpire::Allocator(m_resource_manager.getAllocator("DEVICE")); @@ -271,7 +272,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic std::lock_guard lock(m_mutex); if (space == chai::CPU && record->transfer_pending) { - // record->m_last_context->wait_on(&record->m_event); + record->m_last_context->wait_on(&record->m_event); record->transfer_pending = false; } camp::devices::Context* ctx; From 61ef0abc0ca13b9dc41b8635fe1c4b75dcc3f52c Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 17 Oct 2019 13:09:18 -0700 Subject: [PATCH 012/118] switching to forall loop --- examples/context.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 7b70acce..28c2ff40 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -30,7 +30,7 @@ int main() - const int NUM_ARRAYS = 2; + const int NUM_ARRAYS = 1; const int ARRAY_SIZE = 10; std::vector< chai::ManagedArray > arrays; std::vector< camp::devices::Context > cuda_ctx; @@ -75,11 +75,15 @@ int main() auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } - std::cout << "printing..." << std::endl; + //std::cout << "printing..." << std::endl; for (auto array : arrays) { - for (int i = 0; i < ARRAY_SIZE; i++) { - std::cout<< array[i] << " "; - } + auto print = [=] (int idx) { + int val = array[idx]; + std::cout<< val << " "; + //printf("%i ", array[idx]); + }; + //camp::devices::Context ctx{camp::devices::Host{}}; + forall(sequential(), 0, ARRAY_SIZE, print); std::cout << std::endl; } From 9fe6a0474ca4b37d469a049a1f43e5d0835e3501 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 17 Oct 2019 15:58:15 -0700 Subject: [PATCH 013/118] trying to print --- examples/context.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 28c2ff40..4762d015 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -70,20 +70,28 @@ int main() for (auto array : arrays) { auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] *= array[idx]; + printf("%i ", array[idx]); }; camp::devices::Context ctx{camp::devices::Host{}}; auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } + for(auto array : arrays) { + auto p = array.getActiveBasePointer(); + auto a = chai::ArrayManager::getPointerRecord((void *)p); + std::cout << a << std::endl; + } + //std::cout << "printing..." << std::endl; for (auto array : arrays) { - auto print = [=] (int idx) { - int val = array[idx]; - std::cout<< val << " "; - //printf("%i ", array[idx]); + auto print = [=] CHAI_HOST_DEVICE (int idx) { + float val = array[idx]; + //std::cout<< val << " "; + printf("%i ", array[idx]); }; - //camp::devices::Context ctx{camp::devices::Host{}}; - forall(sequential(), 0, ARRAY_SIZE, print); + camp::devices::Context ctx{camp::devices::Host{}}; + forall(&ctx, 0, ARRAY_SIZE, print); + //forall(sequential(), 0, ARRAY_SIZE, print); std::cout << std::endl; } From 1ef9f9de240bbb2882e73b405eac99059e1ea3af Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 12:03:53 -0700 Subject: [PATCH 014/118] Tweak context exampel --- examples/context.cpp | 47 ++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 4762d015..3539d1a6 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -41,57 +41,36 @@ int main() std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { - //for (int i = 0; i < NUM_ARRAYS; i++) { - // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; - camp::devices::Context ctx{camp::devices::Cuda()}; - auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { + auto e = forall(&ctx, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int idx) { array[idx] = idx * 2; unsigned int start_clock = (unsigned int) clock(); clock_t clock_offset = 0; - while (clock_offset < time_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); + while (clock_offset < time_clocks) { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); } - }; - - auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_1); + }); array.move(chai::CPU, &ctx); // asynchronous move } - - //for (int i = 0; i < NUM_ARRAYS; i++) { - // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; - // array.move(chai::CPU, ctx); // asynchronous move - //} - std::cout << "calling forall with host context" << std::endl; + for (auto array : arrays) { - auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { + camp::devices::Context ctx{camp::devices::Host{}}; + auto e = forall(&ctx, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int idx) { array[idx] *= array[idx]; printf("%i ", array[idx]); - }; - camp::devices::Context ctx{camp::devices::Host{}}; - auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); - } - - for(auto array : arrays) { - auto p = array.getActiveBasePointer(); - auto a = chai::ArrayManager::getPointerRecord((void *)p); - std::cout << a << std::endl; + }); } - //std::cout << "printing..." << std::endl; + std::cout << "printing..." << std::endl; for (auto array : arrays) { - auto print = [=] CHAI_HOST_DEVICE (int idx) { + camp::devices::Context ctx{camp::devices::Host{}}; + forall(&ctx, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int idx) { float val = array[idx]; - //std::cout<< val << " "; printf("%i ", array[idx]); - }; - camp::devices::Context ctx{camp::devices::Host{}}; - forall(&ctx, 0, ARRAY_SIZE, print); - //forall(sequential(), 0, ARRAY_SIZE, print); + }); std::cout << std::endl; } From 7c20b10e867c5eaaae103e54bb4f36f9f421ad84 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 18 Oct 2019 12:38:37 -0700 Subject: [PATCH 015/118] print w & w/o context, remove forall debug output --- examples/context.cpp | 60 +++++++++++++++++++++++++++++++------------- src/util/forall.hpp | 3 --- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 4762d015..8e370634 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -29,7 +29,6 @@ int main() #endif - const int NUM_ARRAYS = 1; const int ARRAY_SIZE = 10; std::vector< chai::ManagedArray > arrays; @@ -39,6 +38,37 @@ int main() cuda_ctx.push_back(camp::devices::Context{camp::devices::Cuda()}); } + std::cout << "Setting all = 10 w/ Context\n"; + for (auto array : arrays) { + auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { + array[idx] = 10; + }; + camp::devices::Context ctx{camp::devices::Host{}}; + auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); + } + + std::cout << "std cout WITHOUT Context : "; + for (auto array : arrays) { + auto print = [=] (int idx) { + float val = array[idx]; + std::cout<< val << " "; + }; + camp::devices::Context ctx{camp::devices::Host{}}; + forall(sequential(), 0, ARRAY_SIZE, print); + std::cout << std::endl; + } + + std::cout << "printf w/ Context : "; + for (auto array : arrays) { + auto print = [=] CHAI_HOST_DEVICE (int idx) { + float val = array[idx]; + printf("%i ", array[idx]); + }; + camp::devices::Context ctx{camp::devices::Host{}}; + forall(&ctx, 0, ARRAY_SIZE, print); + printf("\n"); + } + std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { //for (int i = 0; i < NUM_ARRAYS; i++) { @@ -60,41 +90,35 @@ int main() array.move(chai::CPU, &ctx); // asynchronous move } - - //for (int i = 0; i < NUM_ARRAYS; i++) { - // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; - // array.move(chai::CPU, ctx); // asynchronous move - //} - std::cout << "calling forall with host context" << std::endl; for (auto array : arrays) { auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] *= array[idx]; - printf("%i ", array[idx]); }; camp::devices::Context ctx{camp::devices::Host{}}; auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } - for(auto array : arrays) { - auto p = array.getActiveBasePointer(); - auto a = chai::ArrayManager::getPointerRecord((void *)p); - std::cout << a << std::endl; - } - - //std::cout << "printing..." << std::endl; + std::cout << "printf w/ Context : "; for (auto array : arrays) { auto print = [=] CHAI_HOST_DEVICE (int idx) { float val = array[idx]; - //std::cout<< val << " "; printf("%i ", array[idx]); }; camp::devices::Context ctx{camp::devices::Host{}}; forall(&ctx, 0, ARRAY_SIZE, print); - //forall(sequential(), 0, ARRAY_SIZE, print); + printf("\n"); + } + std::cout << "std cout WITHOUT Context : "; + for (auto array : arrays) { + auto print = [=] (int idx) { + float val = array[idx]; + std::cout<< val << " "; + }; + camp::devices::Context ctx{camp::devices::Host{}}; + forall(sequential(), 0, ARRAY_SIZE, print); std::cout << std::endl; } - for (auto a : arrays) a.free(); return 0; } diff --git a/src/util/forall.hpp b/src/util/forall.hpp index fe4fb344..9d5e33d6 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -63,7 +63,6 @@ struct gpu { template void forall_kernel_cpu(int begin, int end, LOOP_BODY body) { - std::cout<<"for all loop"<setExecutionSpace(chai::CPU, dev); auto host = dev->get(); - std::cout << "forall kernel cpu call\n"; forall_kernel_cpu(begin, end, body); rm->setExecutionSpace(chai::NONE); @@ -176,7 +174,6 @@ camp::devices::Event forall(camp::devices::Context *con, int begin, int end, LOO case camp::devices::Platform::hip: return forall_gpu(con, begin, end, body); default: - std::cout << "forall host\n"; return forall_host(con, begin, end, body); } } From feaaa5108381d5efa5c3a40973553f801245a2af Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 18 Oct 2019 12:42:08 -0700 Subject: [PATCH 016/118] forcing wait after copy --- src/chai/ArrayManager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index fe6e834f..17a74deb 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -282,9 +282,9 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic ctx = context; } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); - //if (space == chai::CPU){ - // e.wait(); - //} + if (space == chai::CPU){ + e.wait(); + } if (space == chai::CPU && chai::GPU == context->get_platform()){ record->transfer_pending = true; record->m_event = e; From b30d107eaa506f35f80fe1d4400a2e61cedba703 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 12:45:34 -0700 Subject: [PATCH 017/118] Fixup logging in ManagedArray --- src/chai/ManagedArray.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 8c11c460..c6b79644 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -364,7 +364,7 @@ void ManagedArray::move(ExecutionSpace space, camp::devices::Context* context m_active_pointer = m_active_base_pointer + m_offset; if (!std::is_const::value) { - CHAI_LOG("ManagedArray", "T is non-const, registering touch of pointer" << m_active_pointer); + CHAI_LOG(Debug, "T is non-const, registering touch of pointer" << m_active_pointer); m_resource_manager->registerTouch(m_pointer_record, space); } From d877248606a6a459ffdfe43757f2994ae2dc5e2f Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 12:45:59 -0700 Subject: [PATCH 018/118] Update logging in ArrayManager --- src/chai/ArrayManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 1c98202e..c28899fe 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -85,7 +85,7 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::devices::Context* context) { - CHAI_LOG("ArrayManager", "Setting execution space to " << space); + CHAI_LOG(Debug, "Setting execution space to " << space); std::lock_guard lock(m_mutex); m_current_execution_space = space; From ec14672f8076b6079a3ad541ba76e5cd64e819ae Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 13:08:15 -0700 Subject: [PATCH 019/118] Add dummy context example --- examples/CMakeLists.txt | 5 +++++ examples/context-simple.cpp | 29 +++++++++++++++++++++++++++++ src/chai/ArrayManager.cpp | 7 +++---- 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 examples/context-simple.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 671f419e..1b7ca1ed 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -23,6 +23,11 @@ blt_add_executable( SOURCES context.cpp DEPENDS_ON ${chai_umpire_example_depends}) +blt_add_executable( + NAME context-simple.exe + SOURCES context-simple.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( NAME chai-umpire-example.exe SOURCES chai-umpire-allocators.cpp diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp new file mode 100644 index 00000000..55b450e0 --- /dev/null +++ b/examples/context-simple.cpp @@ -0,0 +1,29 @@ +#include "camp/device.hpp" +#include "../src/util/forall.hpp" +#include "chai/ManagedArray.hpp" + +int main() +{ + camp::devices::Context host{camp::devices::Host{}}; + camp::devices::Context device{camp::devices::Cuda{}}; + + constexpr std::size_t ARRAY_SIZE{1024}; + + chai::ManagedArray array(ARRAY_SIZE); + + // set on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = i; + }); + + + // double on device + forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = array[i] * 2.0; + }); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + printf("array[%d] = %f \n", i, array[i]); + }); +} diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index cd53474d..8292b726 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -234,7 +234,8 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic std::lock_guard lock(m_mutex); if (space == chai::CPU && record->transfer_pending) { - record->m_last_context->wait_on(&record->m_event); + // record->m_last_context->wait_on(&record->m_event); + record->m_event.wait(); record->transfer_pending = false; } camp::devices::Context* ctx; @@ -244,9 +245,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic ctx = context; } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); - if (space == chai::CPU){ - e.wait(); - } + if (space == chai::CPU && chai::GPU == context->get_platform()){ record->transfer_pending = true; record->m_event = e; From f073f76ca1452b980293f4be58bb3e8cdce38ec6 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 13:12:23 -0700 Subject: [PATCH 020/118] Move simple more complex --- examples/context-simple.cpp | 25 +++++++++++++++++++------ src/chai/ArrayManager.cpp | 5 ++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index 55b450e0..eecf4914 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -5,25 +5,38 @@ int main() { camp::devices::Context host{camp::devices::Host{}}; - camp::devices::Context device{camp::devices::Cuda{}}; + + camp::devices::Context device_one{camp::devices::Cuda{}}; + camp::devices::Context device_two{camp::devices::Cuda{}}; constexpr std::size_t ARRAY_SIZE{1024}; - chai::ManagedArray array(ARRAY_SIZE); + chai::ManagedArray array_one(ARRAY_SIZE); + chai::ManagedArray array_two(ARRAY_SIZE); // set on host forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - array[i] = i; + array_one[i] = i; + array_two[i] = i; }); // double on device - forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - array[i] = array[i] * 2.0; + forall(&device_one, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array_one[i] = array_one[i] * 2.0; + }); + forall(&device_two, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array_two[i] = array_two[i] / 2.0; }); + array_one.move(chai::CPU, &device_one); + array_two.move(chai::CPU, &device_two); + // print on host forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - printf("array[%d] = %f \n", i, array[i]); + if (i == 256) { + printf("array_one[%d] = %f \n", i, array_one[i]); + printf("array_two[%d] = %f \n", i, array_two[i]); + } }); } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 8292b726..f544ca7d 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -213,7 +213,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } #endif - if (space == record->m_last_space) { + if (space == record->m_last_space && !record->transfer_pending) { return; } @@ -237,7 +237,10 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic // record->m_last_context->wait_on(&record->m_event); record->m_event.wait(); record->transfer_pending = false; + + return; } + camp::devices::Context* ctx; if (space == chai::CPU){ ctx = record->m_last_context; From 0058134df2113089bc1bd9b328be01c89617ee6d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 18 Oct 2019 14:02:07 -0700 Subject: [PATCH 021/118] Cleaning context-integration example code --- examples/context.cpp | 54 +++----------------------------------------- 1 file changed, 3 insertions(+), 51 deletions(-) diff --git a/examples/context.cpp b/examples/context.cpp index 8e370634..ac0f5c5f 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -5,9 +5,6 @@ int main() { - std::cout << "Chai Context Implementation\n"; - - float kernel_time = 20; int cuda_device = 0; @@ -28,51 +25,16 @@ int main() clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); #endif - - const int NUM_ARRAYS = 1; + const int NUM_ARRAYS = 8; const int ARRAY_SIZE = 10; std::vector< chai::ManagedArray > arrays; - std::vector< camp::devices::Context > cuda_ctx; + for (int i = 0; i < NUM_ARRAYS; i++) { arrays.push_back(chai::ManagedArray(10, chai::GPU)); - cuda_ctx.push_back(camp::devices::Context{camp::devices::Cuda()}); - } - - std::cout << "Setting all = 10 w/ Context\n"; - for (auto array : arrays) { - auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { - array[idx] = 10; - }; - camp::devices::Context ctx{camp::devices::Host{}}; - auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); - } - - std::cout << "std cout WITHOUT Context : "; - for (auto array : arrays) { - auto print = [=] (int idx) { - float val = array[idx]; - std::cout<< val << " "; - }; - camp::devices::Context ctx{camp::devices::Host{}}; - forall(sequential(), 0, ARRAY_SIZE, print); - std::cout << std::endl; - } - - std::cout << "printf w/ Context : "; - for (auto array : arrays) { - auto print = [=] CHAI_HOST_DEVICE (int idx) { - float val = array[idx]; - printf("%i ", array[idx]); - }; - camp::devices::Context ctx{camp::devices::Host{}}; - forall(&ctx, 0, ARRAY_SIZE, print); - printf("\n"); } std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { - //for (int i = 0; i < NUM_ARRAYS; i++) { - // auto array = arrays[i]; auto ctx = &cuda_ctx[i]; camp::devices::Context ctx{camp::devices::Cuda()}; auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { @@ -99,17 +61,6 @@ int main() auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } - std::cout << "printf w/ Context : "; - for (auto array : arrays) { - auto print = [=] CHAI_HOST_DEVICE (int idx) { - float val = array[idx]; - printf("%i ", array[idx]); - }; - camp::devices::Context ctx{camp::devices::Host{}}; - forall(&ctx, 0, ARRAY_SIZE, print); - printf("\n"); - } - std::cout << "std cout WITHOUT Context : "; for (auto array : arrays) { auto print = [=] (int idx) { float val = array[idx]; @@ -119,6 +70,7 @@ int main() forall(sequential(), 0, ARRAY_SIZE, print); std::cout << std::endl; } + for (auto a : arrays) a.free(); return 0; } From 2344529cd59eec84057ad0fb227afb6ee7ae7637 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 Oct 2019 15:13:18 -0700 Subject: [PATCH 022/118] Start adding context tests --- src/chai/ArrayManager.cpp | 3 + tests/integration/CMakeLists.txt | 15 ++++ .../managed_array_context_tests.cpp | 90 +++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 tests/integration/managed_array_context_tests.cpp diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index f544ca7d..454df618 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -252,6 +252,9 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic if (space == chai::CPU && chai::GPU == context->get_platform()){ record->transfer_pending = true; record->m_event = e; + } else if ( (space == chai::CPU) + && (chai::CPU == context->get_platform())) { + e.wait(); } //if (transfer_pending) context->wait_on(&e); diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index bd12651b..21b5d647 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -30,3 +30,18 @@ target_include_directories( blt_add_test( NAME managed_array_test COMMAND managed_array_tests) + +if (ENABLE_CUDA) +blt_add_executable( + NAME managed_array_context_tests + SOURCES managed_array_context_tests.cpp + DEPENDS_ON ${managed_array_test_depends}) + +target_include_directories( + managed_array_context_tests + PUBLIC ${PROJECT_BINARY_DIR}/include) + +blt_add_test( + NAME managed_array_context_test + COMMAND managed_array_context_tests) +endif () diff --git a/tests/integration/managed_array_context_tests.cpp b/tests/integration/managed_array_context_tests.cpp new file mode 100644 index 00000000..5071db56 --- /dev/null +++ b/tests/integration/managed_array_context_tests.cpp @@ -0,0 +1,90 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#include "gtest/gtest.h" + +#define GPU_TEST(X, Y) \ + static void gpu_test_##X##Y(); \ + TEST(X, Y) { gpu_test_##X##Y(); } \ + static void gpu_test_##X##Y() + +#include "../src/util/forall.hpp" + +#include "chai/ManagedArray.hpp" +#include "chai/config.hpp" + +GPU_TEST(ManagedArray, Simple) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::devices::Context host{camp::devices::Host{}}; + camp::devices::Context device{camp::devices::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = i; + }); + + forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = array[i] * 2.0; + }); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); +} + +GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::devices::Context host{camp::devices::Host{}}; + camp::devices::Context device{camp::devices::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = i; + }); + + forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = array[i] * 2.0; + }); + + array.move(chai::CPU, &device); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); +} + +GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + + camp::devices::Context host{camp::devices::Host{}}; + camp::devices::Context device{camp::devices::Cuda{}}; + + chai::ManagedArray array(ARRAY_SIZE); + + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = i; + }); + + array.move(chai::GPU, &device); + + forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = array[i] * 2.0; + }); + + // print on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + EXPECT_DOUBLE_EQ(array[i], i*2.0); + }); +} From 3de01e2e9a00b1d486cc07f5ee28e794d3e97915 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 07:50:13 -0700 Subject: [PATCH 023/118] Removing camp submodule --- src/tpl/camp | 1 - 1 file changed, 1 deletion(-) delete mode 160000 src/tpl/camp diff --git a/src/tpl/camp b/src/tpl/camp deleted file mode 160000 index 9569b20e..00000000 --- a/src/tpl/camp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9569b20e20a8fa2d5cb7006095c7435d198848f1 From 52b4599e4b931ba4a0d66b1354c504d4cca602d1 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 10:00:05 -0700 Subject: [PATCH 024/118] Check if Cuda is enabled before building Cuda Context examples. --- examples/CMakeLists.txt | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 1b7ca1ed..4c42bd01 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -18,16 +18,6 @@ if (ENABLE_HIP) hip) endif() -blt_add_executable( - NAME context-integration.exe - SOURCES context.cpp - DEPENDS_ON ${chai_umpire_example_depends}) - -blt_add_executable( - NAME context-simple.exe - SOURCES context-simple.cpp - DEPENDS_ON ${chai_umpire_example_depends}) - blt_add_executable( NAME chai-umpire-example.exe SOURCES chai-umpire-allocators.cpp @@ -38,4 +28,13 @@ if (ENABLE_CUDA OR ENABLE_HIP) NAME chai-example.exe SOURCES example.cpp DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( + NAME context-integration.exe + SOURCES context.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + + blt_add_executable( + NAME context-simple.exe + SOURCES context-simple.cpp + DEPENDS_ON ${chai_umpire_example_depends}) endif () From 73d35579202c532ac95427afe98b580338ea314d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 10:15:08 -0700 Subject: [PATCH 025/118] Context namespace -> Resources --- examples/context-simple.cpp | 8 ++++---- examples/context.cpp | 8 ++++---- src/chai/ArrayManager.cpp | 14 +++++++------- src/chai/ArrayManager.hpp | 12 ++++++------ src/chai/ExecutionSpaces.hpp | 10 +++++----- src/chai/ManagedArray.hpp | 2 +- src/chai/ManagedArray.inl | 2 +- src/chai/PointerRecord.hpp | 6 +++--- src/tpl/umpire | 2 +- src/util/forall.hpp | 16 ++++++++-------- tests/unit/execution_space_unit_tests.cpp | 22 +++++++++++----------- 11 files changed, 51 insertions(+), 51 deletions(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index eecf4914..89ea5133 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -1,13 +1,13 @@ -#include "camp/device.hpp" +#include "camp/resources.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" int main() { - camp::devices::Context host{camp::devices::Host{}}; + camp::resources::Context host{camp::resources::Host{}}; - camp::devices::Context device_one{camp::devices::Cuda{}}; - camp::devices::Context device_two{camp::devices::Cuda{}}; + camp::resources::Context device_one{camp::resources::Cuda{}}; + camp::resources::Context device_two{camp::resources::Cuda{}}; constexpr std::size_t ARRAY_SIZE{1024}; diff --git a/examples/context.cpp b/examples/context.cpp index ac0f5c5f..f9904bde 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -1,4 +1,4 @@ -#include "camp/device.hpp" +#include "camp/resources.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" #include @@ -36,7 +36,7 @@ int main() std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { - camp::devices::Context ctx{camp::devices::Cuda()}; + camp::resources::Context ctx{camp::resources::Cuda()}; auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] = idx * 2; unsigned int start_clock = (unsigned int) clock(); @@ -57,7 +57,7 @@ int main() auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] *= array[idx]; }; - camp::devices::Context ctx{camp::devices::Host{}}; + camp::resources::Context ctx{camp::resources::Host{}}; auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } @@ -66,7 +66,7 @@ int main() float val = array[idx]; std::cout<< val << " "; }; - camp::devices::Context ctx{camp::devices::Host{}}; + camp::resources::Context ctx{camp::resources::Host{}}; forall(sequential(), 0, ARRAY_SIZE, print); std::cout << std::endl; } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index f544ca7d..34d769cb 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -83,7 +83,7 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) m_current_execution_space = space; } -void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::devices::Context* context) +void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Context* context) { CHAI_LOG(Debug, "Setting execution space to " << space); std::lock_guard lock(m_mutex); @@ -111,7 +111,7 @@ void* ArrayManager::move(void* pointer, } void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, - camp::devices::Context* context, + camp::resources::Context* context, ExecutionSpace space) { // Check for default arg (NONE) @@ -134,7 +134,7 @@ ExecutionSpace ArrayManager::getExecutionSpace() return m_current_execution_space; } -camp::devices::Context* ArrayManager::getContext() +camp::resources::Context* ArrayManager::getContext() { return m_current_context; } @@ -201,7 +201,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) resetTouch(record); } -void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devices::Context* context) +void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Context* context) { if (space == NONE) { return; @@ -227,7 +227,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } if (!record->m_touched[record->m_last_space]) { -// auto dev = context->get(); +// auto dev = context->get(); return; } else { callback(record, ACTION_MOVE, space, record->m_size); @@ -241,7 +241,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic return; } - camp::devices::Context* ctx; + camp::resources::Context* ctx; if (space == chai::CPU){ ctx = record->m_last_context; }else{ @@ -249,7 +249,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::devic } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); - if (space == chai::CPU && chai::GPU == context->get_platform()){ + if (space == chai::CPU && context->is_async()){ record->transfer_pending = true; record->m_event = e; } diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index b70ff130..67ae55e1 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -17,7 +17,7 @@ #include "umpire/Allocator.hpp" #include "umpire/util/MemoryMap.hpp" -#include "camp/device.hpp" +#include "camp/resources.hpp" namespace chai { @@ -71,7 +71,7 @@ class ArrayManager * * \param space The space to set as current. */ - void setExecutionSpace(ExecutionSpace space, camp::devices::Context *context); + void setExecutionSpace(ExecutionSpace space, camp::resources::Context *context); /*! * \brief Get the current execution space. @@ -81,7 +81,7 @@ class ArrayManager ExecutionSpace getExecutionSpace(); - camp::devices::Context* getContext(); + camp::resources::Context* getContext(); /*! * \brief Move data in pointer to the current execution space. @@ -94,7 +94,7 @@ class ArrayManager ExecutionSpace = NONE); void* move(void* pointer, PointerRecord* pointer_record, - camp::devices::Context* context, + camp::resources::Context* context, ExecutionSpace = NONE); @@ -282,7 +282,7 @@ class ArrayManager * \param space */ void move(PointerRecord* record, ExecutionSpace space); - void move(PointerRecord* record, ExecutionSpace space, camp::devices::Context* context); + void move(PointerRecord* record, ExecutionSpace space, camp::resources::Context* context); /*! * \brief Execute a user callback if callbacks are active @@ -309,7 +309,7 @@ class ArrayManager /*! * current context. */ - camp::devices::Context* m_current_context; + camp::resources::Context* m_current_context; /** diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index 0bf07300..b4ddfc52 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -8,7 +8,7 @@ #define CHAI_ExecutionSpaces_HPP #include "chai/config.hpp" -#include "camp/device.hpp" +#include "camp/resources.hpp" namespace chai { @@ -33,12 +33,12 @@ enum ExecutionSpace { NUM_EXECUTION_SPACES }; -inline bool operator==(const ExecutionSpace& s, const camp::devices::Platform& p) { - if(s == chai::CPU && p == camp::devices::Platform::host) return true; +inline bool operator==(const ExecutionSpace& s, const camp::resources::Platform& p) { + if(s == chai::CPU && p == camp::resources::Platform::host) return true; #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) /*! Execution in GPU space */ - if (s == chai::GPU && (p == camp::devices::Platform::cuda || - p == camp::devices::Platform::hip)) return true; + if (s == chai::GPU && (p == camp::resources::Platform::cuda || + p == camp::resources::Platform::hip)) return true; #endif return false; } diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index a5e683f0..e7baf7fd 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -155,7 +155,7 @@ class ManagedArray : public CHAICopyable CHAI_HOST void registerTouch(ExecutionSpace space); CHAI_HOST void move(ExecutionSpace space); - CHAI_HOST void move(ExecutionSpace space, camp::devices::Context* context); + CHAI_HOST void move(ExecutionSpace space, camp::resources::Context* context); CHAI_HOST ManagedArray slice(size_t begin, size_t end); /*! diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index c6b79644..c2b48751 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -351,7 +351,7 @@ void ManagedArray::move(ExecutionSpace space) template CHAI_INLINE CHAI_HOST -void ManagedArray::move(ExecutionSpace space, camp::devices::Context* context) +void ManagedArray::move(ExecutionSpace space, camp::resources::Context* context) { ExecutionSpace prev_space = m_pointer_record->m_last_space; diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 0a4c9722..9027ae22 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -10,7 +10,7 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" -#include "camp/device.hpp" +#include "camp/resources.hpp" #include #include @@ -60,8 +60,8 @@ struct PointerRecord { bool transfer_pending; - camp::devices::Event m_event; - camp::devices::Context* m_last_context = nullptr; + camp::resources::Event m_event; + camp::resources::Context* m_last_context = nullptr; }; } // end of namespace chai diff --git a/src/tpl/umpire b/src/tpl/umpire index 3998730d..ae559c82 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 3998730dbe2c656a2c2133a740b7b322342135ff +Subproject commit ae559c8239df4a42866650e6e44f370865b8e489 diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 60d7a7f6..2ea81ff9 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -11,7 +11,7 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/config.hpp" //#include "camp/device.hpp" -#include "camp/device.hpp" +#include "camp/resources.hpp" #if defined(CHAI_ENABLE_UM) #include @@ -51,7 +51,7 @@ void forall(sequential, int begin, int end, LOOP_BODY body) rm->setExecutionSpace(chai::NONE); } template -camp::devices::Event forall_host(camp::devices::Context* dev, int begin, int end, LOOP_BODY body) +camp::resources::Event forall_host(camp::resources::Context* dev, int begin, int end, LOOP_BODY body) { chai::ArrayManager* rm = chai::ArrayManager::getInstance(); @@ -61,7 +61,7 @@ camp::devices::Event forall_host(camp::devices::Context* dev, int begin, int end rm->setExecutionSpace(chai::CPU, dev); - auto host = dev->get(); + auto host = dev->get(); forall_kernel_cpu(begin, end, body); rm->setExecutionSpace(chai::NONE); @@ -106,7 +106,7 @@ void forall(gpu, int begin, int end, LOOP_BODY&& body) rm->setExecutionSpace(chai::NONE); } template -camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, LOOP_BODY&& body) +camp::resources::Event forall_gpu(camp::resources::Context* dev, int begin, int end, LOOP_BODY&& body) { chai::ArrayManager* rm = chai::ArrayManager::getInstance(); @@ -116,7 +116,7 @@ camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, size_t gridSize = (end - begin + blockSize - 1) / blockSize; //#if defined(CHAI_ENABLE_CUDA) - auto cuda = dev->get(); + auto cuda = dev->get(); forall_kernel_gpu<<>>(begin, end - begin, body); //#elif defined(CHAI_ENABLE_HIP) // hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,0, @@ -130,12 +130,12 @@ camp::devices::Event forall_gpu(camp::devices::Context* dev, int begin, int end, #endif // if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) template -camp::devices::Event forall(camp::devices::Context *con, int begin, int end, LOOP_BODY&& body) +camp::resources::Event forall(camp::resources::Context *con, int begin, int end, LOOP_BODY&& body) { auto platform = con->get_platform(); switch(platform) { - case camp::devices::Platform::cuda: - case camp::devices::Platform::hip: + case camp::resources::Platform::cuda: + case camp::resources::Platform::hip: return forall_gpu(con, begin, end, body); default: return forall_host(con, begin, end, body); diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp index 21649e14..c3f667da 100644 --- a/tests/unit/execution_space_unit_tests.cpp +++ b/tests/unit/execution_space_unit_tests.cpp @@ -48,33 +48,33 @@ TEST(ExecutionSpace, Platforms) { - ASSERT_TRUE(chai::CPU == camp::devices::Platform::host); - ASSERT_FALSE(chai::CPU == camp::devices::Platform::undefined); + ASSERT_TRUE(chai::CPU == camp::resources::Platform::host); + ASSERT_FALSE(chai::CPU == camp::resources::Platform::undefined); #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - ASSERT_TRUE(chai::GPU == camp::devices::Platform::cuda); - ASSERT_TRUE(chai::GPU == camp::devices::Platform::hip); - ASSERT_FALSE(chai::GPU == camp::devices::Platform::undefined); + ASSERT_TRUE(chai::GPU == camp::resources::Platform::cuda); + ASSERT_TRUE(chai::GPU == camp::resources::Platform::hip); + ASSERT_FALSE(chai::GPU == camp::resources::Platform::undefined); #endif } TEST(ExecutionSpace, Host) { - camp::devices::Context ctx{camp::devices::Host()}; - ASSERT_TRUE( chai::CPU == ctx.get().get_platform() ); + camp::resources::Context ctx{camp::resources::Host()}; + ASSERT_TRUE( chai::CPU == ctx.get().get_platform() ); } #if defined(CHAI_ENABLE_CUDA) TEST(ExecutionSpace, Cuda) { - camp::devices::Context ctx{camp::devices::Cuda()}; - ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); + camp::resources::Context ctx{camp::resources::Cuda()}; + ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); } #endif // #if defined(CHAI_ENABLE_CUDA) #if defined(CHAI_ENABLE_HIP) TEST(ExecutionSpace, Hip) { - camp::devices::Context ctx{camp::devices::Hip()}; - ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); + camp::resources::Context ctx{camp::resources::Hip()}; + ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); } #endif // #if defined(CHAI_ENABLE_CUDA) From 94cb802b765b5224801601b330391d7e0b785d83 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Mon, 21 Oct 2019 10:31:59 -0700 Subject: [PATCH 026/118] Update simple example --- examples/context-simple.cpp | 91 ++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index eecf4914..d9522944 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -2,41 +2,78 @@ #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" +#include +#include + +inline __host__ __device__ void +wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + int main() { + constexpr std::size_t ARRAY_SIZE{1000000}; + std::vector> arrays; camp::devices::Context host{camp::devices::Host{}}; - camp::devices::Context device_one{camp::devices::Cuda{}}; - camp::devices::Context device_two{camp::devices::Cuda{}}; - - constexpr std::size_t ARRAY_SIZE{1024}; + int clockrate{get_clockrate()}; - chai::ManagedArray array_one(ARRAY_SIZE); - chai::ManagedArray array_two(ARRAY_SIZE); + for (std::size_t i = 0; i < 8; ++i) { + arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); + } - // set on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - array_one[i] = i; - array_two[i] = i; - }); + for (auto array : arrays) { + // set on host + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = i; + }); + } + for (auto array : arrays) { + camp::devices::Context context{camp::devices::Cuda{}}; - // double on device - forall(&device_one, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - array_one[i] = array_one[i] * 2.0; - }); - forall(&device_two, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - array_two[i] = array_two[i] / 2.0; - }); + forall(&context, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + array[i] = array[i] * 2.0; + wait_for(1000, clockrate); + }); - array_one.move(chai::CPU, &device_one); - array_two.move(chai::CPU, &device_two); + array.move(chai::CPU, &context); + } - // print on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - if (i == 256) { - printf("array_one[%d] = %f \n", i, array_one[i]); - printf("array_two[%d] = %f \n", i, array_two[i]); - } - }); + for (auto array : arrays) { + forall(&host, 255, 257, [=] __host__ __device__ (int i) { + if (i == 256) { + printf("array[%d] = %f \n", i, array[i]); + } + }); + } } From ba87903e240cea3ca06185c42efc5cc76c373960 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 11:37:22 -0700 Subject: [PATCH 027/118] Namespace change for tests --- tests/integration/managed_array_context_tests.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integration/managed_array_context_tests.cpp b/tests/integration/managed_array_context_tests.cpp index 5071db56..c9792bb2 100644 --- a/tests/integration/managed_array_context_tests.cpp +++ b/tests/integration/managed_array_context_tests.cpp @@ -20,8 +20,8 @@ GPU_TEST(ManagedArray, Simple) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::devices::Context host{camp::devices::Host{}}; - camp::devices::Context device{camp::devices::Cuda{}}; + camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Context device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); @@ -43,8 +43,8 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::devices::Context host{camp::devices::Host{}}; - camp::devices::Context device{camp::devices::Cuda{}}; + camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Context device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); @@ -68,8 +68,8 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::devices::Context host{camp::devices::Host{}}; - camp::devices::Context device{camp::devices::Cuda{}}; + camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Context device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); From 99919f7f590bfed4bfba44a8bf8ee0a5bc01995e Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 11:39:04 -0700 Subject: [PATCH 028/118] Example array size and time change --- examples/context-simple.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index 2bfd4830..af353f25 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -41,7 +41,7 @@ int get_clockrate() int main() { - constexpr std::size_t ARRAY_SIZE{1000000}; + constexpr std::size_t ARRAY_SIZE{1000}; std::vector> arrays; camp::resources::Context host{camp::resources::Host{}}; @@ -63,7 +63,7 @@ int main() forall(&context, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { array[i] = array[i] * 2.0; - wait_for(1000, clockrate); + wait_for(20, clockrate); }); array.move(chai::CPU, &context); From b1ed7ac73206bcdc28259ce22531b81cbf5ada3b Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 13:58:53 -0700 Subject: [PATCH 029/118] Fixing Non Cuda build tests. --- src/chai/ArrayManager.cpp | 7 ++++++- tests/integration/managed_array_context_tests.cpp | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 07a6b1fd..a093299b 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -33,12 +33,17 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = - //new umpire::Allocator(m_resource_manager.getAllocator("HOST")); +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); +#else + new umpire::Allocator(m_resource_manager.getAllocator("HOST")); +#endif + #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) m_allocators[GPU] = new umpire::Allocator(m_resource_manager.getAllocator("DEVICE")); #endif + #if defined(CHAI_ENABLE_UM) m_allocators[UM] = new umpire::Allocator(m_resource_manager.getAllocator("UM")); diff --git a/tests/integration/managed_array_context_tests.cpp b/tests/integration/managed_array_context_tests.cpp index c9792bb2..0d703f4b 100644 --- a/tests/integration/managed_array_context_tests.cpp +++ b/tests/integration/managed_array_context_tests.cpp @@ -16,6 +16,7 @@ #include "chai/ManagedArray.hpp" #include "chai/config.hpp" +#ifdef CHAI_ENABLE_CUDA GPU_TEST(ManagedArray, Simple) { constexpr std::size_t ARRAY_SIZE{1024}; @@ -88,3 +89,4 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) EXPECT_DOUBLE_EQ(array[i], i*2.0); }); } +#endif //#ifdef CHAI_ENABLE_CUDA From 33172ef5ba319e51f86d4c9d849a5be0aaebeb0f Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 15:34:57 -0700 Subject: [PATCH 030/118] Fixing no context gpu call --- src/chai/ArrayManager.cpp | 6 ++++++ src/util/forall.hpp | 16 ++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index a093299b..3c601dad 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -252,6 +252,12 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou }else{ ctx = context; } + + if (ctx == nullptr){ + m_resource_manager.copy(dst_pointer, src_pointer); + return; + } + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); if (space == chai::CPU && context->is_async()){ diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 2ea81ff9..fcdffa76 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -115,14 +115,14 @@ camp::resources::Event forall_gpu(camp::resources::Context* dev, int begin, int size_t blockSize = 32; size_t gridSize = (end - begin + blockSize - 1) / blockSize; -//#if defined(CHAI_ENABLE_CUDA) - auto cuda = dev->get(); - forall_kernel_gpu<<>>(begin, end - begin, body); -//#elif defined(CHAI_ENABLE_HIP) -// hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,0, -// begin, end - begin, body); -// hipDeviceSynchronize(); -//#endif +#if defined(CHAI_ENABLE_CUDA) +auto cuda = dev->get(); +forall_kernel_gpu<<>>(begin, end - begin, body); +#elif defined(CHAI_ENABLE_HIP) + hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,0, + begin, end - begin, body); + hipDeviceSynchronize(); +#endif rm->setExecutionSpace(chai::NONE); return dev->get_event(); From 6ecb678da595c3059e6ab5a50b90311a87b65bce Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 15:53:49 -0700 Subject: [PATCH 031/118] updating umpire --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index ae559c82..b29760cc 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit ae559c8239df4a42866650e6e44f370865b8e489 +Subproject commit b29760cc4f380ec9ce041e863caf1a70d42b571d From 7d85179e20c31a2c14c97e48bc1b1bcb06734424 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 16:32:10 -0700 Subject: [PATCH 032/118] Dependent context example. --- examples/CMakeLists.txt | 5 ++- examples/context-depends.cpp | 78 ++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 examples/context-depends.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4c42bd01..1457bbe3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -32,7 +32,10 @@ if (ENABLE_CUDA OR ENABLE_HIP) NAME context-integration.exe SOURCES context.cpp DEPENDS_ON ${chai_umpire_example_depends}) - + blt_add_executable( + NAME context-depends.exe + SOURCES context-depends.cpp + DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( NAME context-simple.exe SOURCES context-simple.cpp diff --git a/examples/context-depends.cpp b/examples/context-depends.cpp new file mode 100644 index 00000000..1667eb9b --- /dev/null +++ b/examples/context-depends.cpp @@ -0,0 +1,78 @@ +#include "camp/resources.hpp" +#include "../src/util/forall.hpp" +#include "chai/ManagedArray.hpp" + +#include +#include + +inline __host__ __device__ void +wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + +int main() +{ + constexpr std::size_t ARRAY_SIZE{1000}; + int clockrate{get_clockrate()}; + + chai::ManagedArray array1(ARRAY_SIZE); + chai::ManagedArray array2(ARRAY_SIZE); + + camp::resources::Context dev1{camp::resources::Cuda{}}; + camp::resources::Context dev2{camp::resources::Cuda{}}; + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] = i; + wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array2[i] = -1; + wait_for(20, clockrate); + }); + + e2.wait(); + e1.wait(); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] *= array2[i]; + wait_for(10, clockrate); + }); + + array1.move(chai::CPU, &dev1); + camp::resources::Context host{camp::resources::Host{}}; + + forall(sequential(), 0, 10, [=] (int i) { + std::cout << array1[i] << " "; + }); + std::cout << std::endl; +} From cab243537caef852d6f583f164302915dce13797 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 17:17:25 -0700 Subject: [PATCH 033/118] Using context to print on depends example. --- examples/context-depends.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/context-depends.cpp b/examples/context-depends.cpp index 1667eb9b..584e646a 100644 --- a/examples/context-depends.cpp +++ b/examples/context-depends.cpp @@ -69,10 +69,11 @@ int main() }); array1.move(chai::CPU, &dev1); + camp::resources::Context host{camp::resources::Host{}}; - forall(sequential(), 0, 10, [=] (int i) { - std::cout << array1[i] << " "; + forall(&host, 0, 10, [=] __host__ __device__ (int i) { + printf("%f \n", array1[i]); }); - std::cout << std::endl; + printf("\n"); } From eeb9b82d70ec13f0e4a1d38663f8b629f84007df Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 21 Oct 2019 17:27:49 -0700 Subject: [PATCH 034/118] Fixing all example prints. --- examples/context-depends.cpp | 4 ++-- examples/context-simple.cpp | 2 +- examples/context.cpp | 12 +++++------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/context-depends.cpp b/examples/context-depends.cpp index 584e646a..0529c561 100644 --- a/examples/context-depends.cpp +++ b/examples/context-depends.cpp @@ -72,8 +72,8 @@ int main() camp::resources::Context host{camp::resources::Host{}}; - forall(&host, 0, 10, [=] __host__ __device__ (int i) { - printf("%f \n", array1[i]); + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%f ", array1[i]); }); printf("\n"); } diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index af353f25..4f5da65d 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -70,7 +70,7 @@ int main() } for (auto array : arrays) { - forall(&host, 255, 257, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { if (i == 256) { printf("array[%d] = %f \n", i, array[i]); } diff --git a/examples/context.cpp b/examples/context.cpp index f9904bde..dc9df914 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -61,14 +61,12 @@ int main() auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); } + camp::resources::Context host{camp::resources::Host{}}; for (auto array : arrays) { - auto print = [=] (int idx) { - float val = array[idx]; - std::cout<< val << " "; - }; - camp::resources::Context ctx{camp::resources::Host{}}; - forall(sequential(), 0, ARRAY_SIZE, print); - std::cout << std::endl; + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%i ", int(array[i]) ); + }); + printf("\n"); } for (auto a : arrays) a.free(); From de9f7b6a352b7acfd985f517bdf4e04c885212d9 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 22 Oct 2019 15:58:04 -0700 Subject: [PATCH 035/118] New wait_on strategy, first pass. --- examples/context-simple.cpp | 2 +- examples/context.cpp | 8 ++++---- src/chai/ArrayManager.cpp | 36 ++++++++++++++++++++---------------- src/tpl/umpire | 2 +- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index 4f5da65d..046d898b 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -47,7 +47,7 @@ int main() int clockrate{get_clockrate()}; - for (std::size_t i = 0; i < 8; ++i) { + for (std::size_t i = 0; i < 10; ++i) { arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); } diff --git a/examples/context.cpp b/examples/context.cpp index dc9df914..c4a38bc3 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -25,7 +25,7 @@ int main() clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); #endif - const int NUM_ARRAYS = 8; + const int NUM_ARRAYS = 16; const int ARRAY_SIZE = 10; std::vector< chai::ManagedArray > arrays; @@ -43,8 +43,8 @@ int main() clock_t clock_offset = 0; while (clock_offset < time_clocks) { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); } }; @@ -64,7 +64,7 @@ int main() camp::resources::Context host{camp::resources::Host{}}; for (auto array : arrays) { forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { - printf("%i ", int(array[i]) ); + printf("%i ", int(array[i]) ); }); printf("\n"); } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 3c601dad..b330c11d 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -238,35 +238,39 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); - if (space == chai::CPU && record->transfer_pending) { - // record->m_last_context->wait_on(&record->m_event); - record->m_event.wait(); + //if (space == chai::CPU && record->transfer_pending) { + if (record->transfer_pending) { + context->wait_on(&record->m_event); + //record->m_last_context->wait_on(&record->m_event); + //record->m_event.wait(); record->transfer_pending = false; return; } - camp::resources::Context* ctx; - if (space == chai::CPU){ - ctx = record->m_last_context; - }else{ - ctx = context; - } + //camp::resources::Context* ctx; + //if (space == chai::CPU){ + // ctx = record->m_last_context; + //}else{ + // ctx = context; + //} - if (ctx == nullptr){ + //if (ctx == nullptr){ + if (context == nullptr){ m_resource_manager.copy(dst_pointer, src_pointer); return; } - auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); + //auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *context); - if (space == chai::CPU && context->is_async()){ + //if (space == chai::CPU && context->is_async()){ record->transfer_pending = true; record->m_event = e; - } else if ( (space == chai::CPU) - && (chai::CPU == context->get_platform())) { - e.wait(); - } + //} else if ( (space == chai::CPU) + // && (chai::CPU == context->get_platform())) { + // e.wait(); + //} //if (transfer_pending) context->wait_on(&e); } diff --git a/src/tpl/umpire b/src/tpl/umpire index b29760cc..3e91af07 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit b29760cc4f380ec9ce041e863caf1a70d42b571d +Subproject commit 3e91af077889077923b3185076c16c4493bdeef5 From 5897d649efef572381f582f771c29ebdf92202bd Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 23 Oct 2019 13:28:11 -0700 Subject: [PATCH 036/118] Reducing example array size. --- examples/context-simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index 046d898b..bce9242e 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -41,7 +41,7 @@ int get_clockrate() int main() { - constexpr std::size_t ARRAY_SIZE{1000}; + constexpr std::size_t ARRAY_SIZE{100}; std::vector> arrays; camp::resources::Context host{camp::resources::Host{}}; From 4f0aebccd2a1af106d7c0a3af1b14474e1fc1106 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 23 Oct 2019 13:48:42 -0700 Subject: [PATCH 037/118] Change context on CPU space. Fix test Segfault. --- src/chai/ArrayManager.cpp | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index b330c11d..f2072586 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -232,47 +232,33 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou } if (!record->m_touched[record->m_last_space]) { -// auto dev = context->get(); return; } else { callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); - //if (space == chai::CPU && record->transfer_pending) { if (record->transfer_pending) { context->wait_on(&record->m_event); - //record->m_last_context->wait_on(&record->m_event); - //record->m_event.wait(); record->transfer_pending = false; - return; } - //camp::resources::Context* ctx; - //if (space == chai::CPU){ - // ctx = record->m_last_context; - //}else{ - // ctx = context; - //} + camp::resources::Context* ctx; + if (space == chai::CPU){ + ctx = record->m_last_context; + }else{ + ctx = context; + } - //if (ctx == nullptr){ - if (context == nullptr){ + if (ctx == nullptr){ m_resource_manager.copy(dst_pointer, src_pointer); return; } - //auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); - auto e = m_resource_manager.copy(dst_pointer, src_pointer, *context); - - //if (space == chai::CPU && context->is_async()){ - record->transfer_pending = true; - record->m_event = e; - //} else if ( (space == chai::CPU) - // && (chai::CPU == context->get_platform())) { - // e.wait(); - //} - - //if (transfer_pending) context->wait_on(&e); + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); + record->transfer_pending = true; + record->m_event = e; + } resetTouch(record); From 1b95c0f725ccef5e50614f2a0e04e7d94e40039f Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 23 Oct 2019 15:24:01 -0700 Subject: [PATCH 038/118] Adding non ManagedArray context example --- examples/CMakeLists.txt | 5 ++ examples/context-not-managed.cpp | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 examples/context-not-managed.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 1457bbe3..9fc0a25c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -40,4 +40,9 @@ if (ENABLE_CUDA OR ENABLE_HIP) NAME context-simple.exe SOURCES context-simple.cpp DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( + NAME context-not-managed.exe + SOURCES context-not-managed.cpp + DEPENDS_ON ${chai_umpire_example_depends}) + endif () diff --git a/examples/context-not-managed.cpp b/examples/context-not-managed.cpp new file mode 100644 index 00000000..c4325268 --- /dev/null +++ b/examples/context-not-managed.cpp @@ -0,0 +1,78 @@ +#include "camp/resources.hpp" +#include "../src/util/forall.hpp" + +#include +#include + +inline __host__ __device__ void +wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + +int main() +{ + constexpr std::size_t ARRAY_SIZE{1000}; + int clockrate{get_clockrate()}; + + camp::resources::Context dev1{camp::resources::Cuda{}}; + camp::resources::Context dev2{camp::resources::Cuda{}}; + camp::resources::Context host{camp::resources::Host{}}; + + float * d_array1 = dev1.allocate(1000); + float * d_array2 = dev2.allocate(1000); + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array1[i] = i; + wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array2[i] = -1; + wait_for(20, clockrate); + }); + + e2.wait(); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + d_array1[i] *= d_array2[i]; + wait_for(10, clockrate); + }); + + + float * h_array1 = host.allocate(1000); + dev1.memcpy(h_array1, d_array1, sizeof(float) * 1000); + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%f ", h_array1[i]); + }); + printf("\n"); +} From 2f2e5783616b3eae12002f44f08fd7c46b23d1ce Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 24 Oct 2019 10:28:54 -0700 Subject: [PATCH 039/118] Example trying to access out of bounds --- examples/context-simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index bce9242e..3bc38149 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -71,7 +71,7 @@ int main() for (auto array : arrays) { forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { - if (i == 256) { + if (i == 25) { printf("array[%d] = %f \n", i, array[i]); } }); From 2365e1e3b9725b8f2b8367ae8015de69f0a84a20 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 24 Oct 2019 10:43:09 -0700 Subject: [PATCH 040/118] Updating umpire --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 3e91af07..4530ccd3 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 3e91af077889077923b3185076c16c4493bdeef5 +Subproject commit 4530ccd348c54a552000cbb16bbff155662cb27d From 5d16effdcdb187195ca15aa36fd786d85a1c8d72 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 24 Oct 2019 14:43:24 -0700 Subject: [PATCH 041/118] Updating Umpire --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 4530ccd3..6ba3c9e8 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 4530ccd348c54a552000cbb16bbff155662cb27d +Subproject commit 6ba3c9e8b3ff87e44365afd4cd35f2cef4894283 From b3ced7a5399ddb14792a3499a45bd40cc8130965 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 24 Oct 2019 17:00:56 -0700 Subject: [PATCH 042/118] Updating Camp with seperated resource/context headers --- examples/context-depends.cpp | 2 +- examples/context-not-managed.cpp | 2 +- examples/context-simple.cpp | 2 +- examples/context.cpp | 2 +- src/chai/ArrayManager.hpp | 2 +- src/chai/ExecutionSpaces.hpp | 2 +- src/chai/PointerRecord.hpp | 2 +- src/tpl/umpire | 2 +- src/util/forall.hpp | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/context-depends.cpp b/examples/context-depends.cpp index 0529c561..2a00f90a 100644 --- a/examples/context-depends.cpp +++ b/examples/context-depends.cpp @@ -1,4 +1,4 @@ -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" diff --git a/examples/context-not-managed.cpp b/examples/context-not-managed.cpp index c4325268..4ed1adf2 100644 --- a/examples/context-not-managed.cpp +++ b/examples/context-not-managed.cpp @@ -1,4 +1,4 @@ -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #include "../src/util/forall.hpp" #include diff --git a/examples/context-simple.cpp b/examples/context-simple.cpp index 3bc38149..8e7639cb 100644 --- a/examples/context-simple.cpp +++ b/examples/context-simple.cpp @@ -1,4 +1,4 @@ -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" diff --git a/examples/context.cpp b/examples/context.cpp index c4a38bc3..e7eb2d64 100644 --- a/examples/context.cpp +++ b/examples/context.cpp @@ -1,4 +1,4 @@ -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" #include diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 67ae55e1..4f0b1c02 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -17,7 +17,7 @@ #include "umpire/Allocator.hpp" #include "umpire/util/MemoryMap.hpp" -#include "camp/resources.hpp" +#include "camp/contexts.hpp" namespace chai { diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index b4ddfc52..3e49d62a 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -8,7 +8,7 @@ #define CHAI_ExecutionSpaces_HPP #include "chai/config.hpp" -#include "camp/resources.hpp" +#include "camp/contexts.hpp" namespace chai { diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 9027ae22..c1c95b7f 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -10,7 +10,7 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #include #include diff --git a/src/tpl/umpire b/src/tpl/umpire index 6ba3c9e8..c7835a8c 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 6ba3c9e8b3ff87e44365afd4cd35f2cef4894283 +Subproject commit c7835a8cee5928c3d9fa511278d94f2289b007e4 diff --git a/src/util/forall.hpp b/src/util/forall.hpp index fcdffa76..01a6024e 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -11,7 +11,7 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/config.hpp" //#include "camp/device.hpp" -#include "camp/resources.hpp" +#include "camp/contexts.hpp" #if defined(CHAI_ENABLE_UM) #include From 5926b9ee24f7903a728017e0d3bfdbe056d17a5e Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 10 Dec 2019 16:18:57 -0800 Subject: [PATCH 043/118] Using a list of events for handling multi-context management --- src/chai/ArrayManager.cpp | 10 +++++++--- src/chai/PointerRecord.hpp | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index f2072586..c3992d35 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -238,7 +238,11 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou std::lock_guard lock(m_mutex); if (record->transfer_pending) { - context->wait_on(&record->m_event); + //if (!record->m_active_context_events.empty()) { + for (auto e : record->m_active_context_events){ + context->wait_on(&e); + } + record->m_active_context_events.clear(); record->transfer_pending = false; return; } @@ -257,8 +261,8 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); record->transfer_pending = true; - record->m_event = e; - + //record->m_event = e; + record->m_active_context_events.push_back(e); } resetTouch(record); diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index c1c95b7f..435f976a 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -14,6 +14,7 @@ #include #include +#include namespace chai { @@ -60,8 +61,9 @@ struct PointerRecord { bool transfer_pending; - camp::resources::Event m_event; + //camp::resources::Event m_event; camp::resources::Context* m_last_context = nullptr; + std::list m_active_context_events; }; } // end of namespace chai From 9b72e0b62e16fb0635d399ced464094b49a56229 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 8 Jan 2020 08:49:45 -0800 Subject: [PATCH 044/118] Adding multi array/context example and reverting last changes. --- examples/CMakeLists.txt | 4 ++ examples/context-multi-array.cpp | 76 ++++++++++++++++++++++++++++++++ src/chai/ArrayManager.cpp | 11 ++--- src/chai/PointerRecord.hpp | 4 +- 4 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 examples/context-multi-array.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9fc0a25c..ed8c9526 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -44,5 +44,9 @@ if (ENABLE_CUDA OR ENABLE_HIP) NAME context-not-managed.exe SOURCES context-not-managed.cpp DEPENDS_ON ${chai_umpire_example_depends}) + blt_add_executable( + NAME context-multi-array.exe + SOURCES context-multi-array.cpp + DEPENDS_ON ${chai_umpire_example_depends}) endif () diff --git a/examples/context-multi-array.cpp b/examples/context-multi-array.cpp new file mode 100644 index 00000000..80030533 --- /dev/null +++ b/examples/context-multi-array.cpp @@ -0,0 +1,76 @@ +#include "camp/contexts.hpp" +#include "../src/util/forall.hpp" +#include "chai/ManagedArray.hpp" + +#include +#include + +inline __host__ __device__ void +wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + +int main() +{ + constexpr std::size_t ARRAY_SIZE{1000}; + int clockrate{get_clockrate()}; + + chai::ManagedArray array1(ARRAY_SIZE); + + camp::resources::Context dev1{camp::resources::Cuda{}}; + camp::resources::Context dev2{camp::resources::Cuda{}}; + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 1) { + wait_for(20, clockrate); + array1[i] = i; + } + }); + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 0) { + array1[i] = i; + wait_for(10, clockrate); + } + }); + + e1.wait(); + e2.wait(); + array1.move(chai::CPU, &dev1); + + camp::resources::Context host{camp::resources::Host{}}; + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + printf("%f ", array1[i]); + }); + printf("\n"); +} diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index c3992d35..69332205 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -117,7 +117,7 @@ void* ArrayManager::move(void* pointer, void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, camp::resources::Context* context, - ExecutionSpace space) + ExecutionSpace space) { // Check for default arg (NONE) if (space == NONE) { @@ -238,11 +238,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou std::lock_guard lock(m_mutex); if (record->transfer_pending) { - //if (!record->m_active_context_events.empty()) { - for (auto e : record->m_active_context_events){ - context->wait_on(&e); - } - record->m_active_context_events.clear(); + context->wait_on(&record->m_event); record->transfer_pending = false; return; } @@ -261,8 +257,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); record->transfer_pending = true; - //record->m_event = e; - record->m_active_context_events.push_back(e); + record->m_event = e; } resetTouch(record); diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 435f976a..c1c95b7f 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -14,7 +14,6 @@ #include #include -#include namespace chai { @@ -61,9 +60,8 @@ struct PointerRecord { bool transfer_pending; - //camp::resources::Event m_event; + camp::resources::Event m_event; camp::resources::Context* m_last_context = nullptr; - std::list m_active_context_events; }; } // end of namespace chai From 1de6885ff715cd5102a0fb249c8bc56d7944495d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 8 Jan 2020 10:26:28 -0800 Subject: [PATCH 045/118] Working multi context per array --- examples/context-multi-array.cpp | 2 -- src/chai/ArrayManager.cpp | 6 ++++++ src/chai/ManagedArray.inl | 4 ++++ src/chai/PointerRecord.hpp | 2 ++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/context-multi-array.cpp b/examples/context-multi-array.cpp index 80030533..07157b3b 100644 --- a/examples/context-multi-array.cpp +++ b/examples/context-multi-array.cpp @@ -63,8 +63,6 @@ int main() } }); - e1.wait(); - e2.wait(); array1.move(chai::CPU, &dev1); camp::resources::Context host{camp::resources::Host{}}; diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 69332205..235b7687 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -255,6 +255,12 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou return; } + if (record->m_active_contexts.size() > 1) { + for (auto c : record->m_active_contexts) { + c->get_event().wait(); + } + } + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); record->transfer_pending = true; record->m_event = e; diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index c2b48751..38fe0200 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -360,6 +360,10 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte moveInnerImpl(space); } + if (space == GPU && m_pointer_record->m_last_context != context ){ + m_pointer_record->m_active_contexts.push_back(context); + } + m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, context, space)); m_active_pointer = m_active_base_pointer + m_offset; diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index c1c95b7f..09a5004c 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -14,6 +14,7 @@ #include #include +#include namespace chai { @@ -62,6 +63,7 @@ struct PointerRecord { bool transfer_pending; camp::resources::Event m_event; camp::resources::Context* m_last_context = nullptr; + std::list m_active_contexts; }; } // end of namespace chai From 20019123dde19ba0d0ab67ee83b1cce21f17c5ad Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 8 Jan 2020 10:34:42 -0800 Subject: [PATCH 046/118] Clearing active context/resource list on copy --- src/chai/ArrayManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 235b7687..b8674a79 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -239,6 +239,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (record->transfer_pending) { context->wait_on(&record->m_event); + record->m_active_contexts.clear(); record->transfer_pending = false; return; } From 27eadce94fc800040b047a4acb77269b768b25d5 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 8 Jan 2020 10:50:43 -0800 Subject: [PATCH 047/118] Enqueuing waits on stream --- src/chai/ArrayManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index b8674a79..83fc5754 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -258,7 +258,8 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (record->m_active_contexts.size() > 1) { for (auto c : record->m_active_contexts) { - c->get_event().wait(); + auto c_event = c->get_event(); + ctx->wait_on(&c_event); } } From 7e2ffb50b19c49cfc250d83b271d0a49ef41b179 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 15 Jan 2020 08:39:25 -0800 Subject: [PATCH 048/118] Changing active context list to an array --- src/chai/ArrayManager.cpp | 8 ++++---- src/chai/ManagedArray.inl | 3 ++- src/chai/PointerRecord.hpp | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 83fc5754..dbafed50 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -239,7 +239,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (record->transfer_pending) { context->wait_on(&record->m_event); - record->m_active_contexts.clear(); + record->m_active_count = 0; record->transfer_pending = false; return; } @@ -256,9 +256,9 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou return; } - if (record->m_active_contexts.size() > 1) { - for (auto c : record->m_active_contexts) { - auto c_event = c->get_event(); + if (record->m_active_count > 1) { + for (int i = 0; i < record->m_active_count; i++) { + auto c_event = record->m_active_contexts[i]->get_event(); ctx->wait_on(&c_event); } } diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 38fe0200..f964ac28 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -361,7 +361,8 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte } if (space == GPU && m_pointer_record->m_last_context != context ){ - m_pointer_record->m_active_contexts.push_back(context); + m_pointer_record->m_active_contexts[m_pointer_record->m_active_count] = context; + m_pointer_record->m_active_count++; } m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, context, space)); diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 09a5004c..ae10777f 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace chai { @@ -63,7 +63,8 @@ struct PointerRecord { bool transfer_pending; camp::resources::Event m_event; camp::resources::Context* m_last_context = nullptr; - std::list m_active_contexts; + std::array m_active_contexts; + int m_active_count = 0; }; } // end of namespace chai From 521505e5c408d31a7b1c83e2de70e6c301be8a7b Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 29 Jan 2020 15:20:48 -0800 Subject: [PATCH 049/118] Removing commented code for camp submodule --- cmake/thirdparty/SetupChaiThirdparty.cmake | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 24a43dc8..21a63f9a 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -20,18 +20,6 @@ if (NOT TARGET umpire) endif() endif() -#if (NOT TARGET camp) -# if (DEFINED camp_DIR) -# find_package(camp REQUIRED) -# -# blt_register_library( -# NAME camp -# INCLUDES ${CAMP_INCLUDE_DIRS} -# LIBRARIES camp) -# else () -# add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/camp) -# endif() -#endif() if (ENABLE_RAJA_PLUGIN) if (NOT TARGET RAJA) if (DEFINED RAJA_DIR) From abc8d718a9bdc52dab13a482d3ff163f3f6945fe Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 29 Jan 2020 16:47:03 -0800 Subject: [PATCH 050/118] Fixing errors when compiling without Cuda/Hip --- src/chai/ManagedArray.inl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index f964ac28..23e4620f 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -360,10 +360,12 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte moveInnerImpl(space); } +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) if (space == GPU && m_pointer_record->m_last_context != context ){ m_pointer_record->m_active_contexts[m_pointer_record->m_active_count] = context; m_pointer_record->m_active_count++; } +#endif m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, context, space)); m_active_pointer = m_active_base_pointer + m_offset; @@ -377,7 +379,7 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte if (space != NONE) m_pointer_record->m_last_context = context; /* When moving from GPU to CPU we need to move the inner arrays after the outer array. */ -#if defined(CHAI_ENABLE_CUDA) +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) if (prev_space == GPU) { moveInnerImpl(space); } From 543062955e500ba8b69df5b2305b7d8477563311 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 3 Feb 2020 13:48:22 -0800 Subject: [PATCH 051/118] Adding ActiveResourceManager Container class for handling multiple resources per ManagedArray --- src/chai/ActiveResourceManager.hpp | 77 ++++++++++++++++++++++++++++++ src/chai/ActiveResourceManager.inl | 59 +++++++++++++++++++++++ src/chai/ArrayManager.cpp | 8 ++-- src/chai/ManagedArray.inl | 3 +- src/chai/PointerRecord.hpp | 4 +- 5 files changed, 143 insertions(+), 8 deletions(-) create mode 100644 src/chai/ActiveResourceManager.hpp create mode 100644 src/chai/ActiveResourceManager.inl diff --git a/src/chai/ActiveResourceManager.hpp b/src/chai/ActiveResourceManager.hpp new file mode 100644 index 00000000..20b07ff8 --- /dev/null +++ b/src/chai/ActiveResourceManager.hpp @@ -0,0 +1,77 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#ifndef CHAI_ActiveResourceManager_HPP +#define CHAI_ActiveResourceManager_HPP + +#include "camp/contexts.hpp" + +#include +#include + +namespace chai +{ + +/*! + * \Class to store list of Resource pointers. Holds data on the stack + * until a certain threshold, then uses heap memory. + */ +class ActiveResourceManager { + + /*! + * Size of array on the stack. + */ + static constexpr int BASE_SIZE = 16; + + /*! + * Base array on the stack. + */ + std::array m_res_base; + + /*! + * Heap containter for extra resources if more than BASE_SIZE pushed. + */ + std::vector m_res_overflow; + + /*! + * Current number of active resources in the list. + */ + int m_size = 0; + +public: + ActiveResourceManager(); + + /*! + * Retrun current size of the resource list. + */ + int size(); + + /*! + * Push a new resource onto the list. + */ + void push_back(camp::resources::Context* res); + + /*! + * Clear all values on the heap and set m_size to 0. + */ + void clear(); + + /*! + * Check if empty. + */ + bool is_empty() const; + + /*! + * Get resource at given index. + */ + camp::resources::Context* operator [](int i) const; +}; + +} // end of namespace chai + +#include "chai/ActiveResourceManager.inl" + +#endif // CHAI_ActiveResourceManager_HPP diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl new file mode 100644 index 00000000..24343d7c --- /dev/null +++ b/src/chai/ActiveResourceManager.inl @@ -0,0 +1,59 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// +#ifndef CHAI_ActiveResourceManager_INL +#define CHAI_ActiveResourceManager_INL + +#include "ActiveResourceManager.hpp" + +namespace chai +{ + +CHAI_INLINE +ActiveResourceManager::ActiveResourceManager(): + m_size(0) +{ +} + + +CHAI_INLINE +int ActiveResourceManager::size() { + return m_size; +} + + +CHAI_INLINE +void ActiveResourceManager::push_back(camp::resources::Context * res) { + if (m_size < BASE_SIZE) + m_res_base[m_size] = res; + else + m_res_overflow.push_back(res); + m_size++; +} + + +CHAI_INLINE +void ActiveResourceManager::clear() { + m_res_overflow.clear(); + m_size = 0; +} + + +CHAI_INLINE +bool ActiveResourceManager::is_empty() const { + return m_size < 1; +} + + +CHAI_INLINE +camp::resources::Context* ActiveResourceManager::operator [](int i) const { + if (i >= m_size) return nullptr; + return i < BASE_SIZE ? m_res_base[i] : m_res_overflow[i - BASE_SIZE]; +} + +} //end of namespace chai + +#endif // CHAI_ActiveResourceManager_INL diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index dbafed50..286d68f4 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -239,7 +239,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (record->transfer_pending) { context->wait_on(&record->m_event); - record->m_active_count = 0; + record->m_res_manager.clear(); record->transfer_pending = false; return; } @@ -256,9 +256,9 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou return; } - if (record->m_active_count > 1) { - for (int i = 0; i < record->m_active_count; i++) { - auto c_event = record->m_active_contexts[i]->get_event(); + if (!record->m_res_manager.is_empty()) { + for (int i = 0; i < record->m_res_manager.size(); i++) { + auto c_event = record->m_res_manager[i]->get_event(); ctx->wait_on(&c_event); } } diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 23e4620f..0baddc99 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -362,8 +362,7 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) if (space == GPU && m_pointer_record->m_last_context != context ){ - m_pointer_record->m_active_contexts[m_pointer_record->m_active_count] = context; - m_pointer_record->m_active_count++; + m_pointer_record->m_res_manager.push_back(context); } #endif diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index ae10777f..a5beafad 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -7,6 +7,7 @@ #ifndef CHAI_PointerRecord_HPP #define CHAI_PointerRecord_HPP +#include "chai/ActiveResourceManager.hpp" #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" @@ -63,8 +64,7 @@ struct PointerRecord { bool transfer_pending; camp::resources::Event m_event; camp::resources::Context* m_last_context = nullptr; - std::array m_active_contexts; - int m_active_count = 0; + ActiveResourceManager m_res_manager; }; } // end of namespace chai From fbc00a27a7c7dca5af3807060c542be3e9d4125f Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 5 Feb 2020 13:40:30 -0800 Subject: [PATCH 052/118] Updating Umpire --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index c7835a8c..fcecbb8a 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit c7835a8cee5928c3d9fa511278d94f2289b007e4 +Subproject commit fcecbb8ada267d9e16635d86b848dea4b8f16314 From 9b07d5ddff4d331f5d358be34dd508d76a9c06da Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 5 Feb 2020 14:31:55 -0800 Subject: [PATCH 053/118] Updating camp Resource objects --- examples/CMakeLists.txt | 20 +++++++------- ...ntext-depends.cpp => resource-depends.cpp} | 8 +++--- ...lti-array.cpp => resource-multi-array.cpp} | 8 +++--- ...t-managed.cpp => resource-not-managed.cpp} | 8 +++--- ...context-simple.cpp => resource-simple.cpp} | 10 +++---- examples/{context.cpp => resource.cpp} | 14 +++++----- src/chai/ArrayManager.cpp | 26 +++++++++---------- src/chai/ArrayManager.hpp | 14 +++++----- src/chai/ExecutionSpaces.hpp | 2 +- src/chai/ManagedArray.hpp | 2 +- src/chai/ManagedArray.inl | 8 +++--- src/chai/PointerRecord.hpp | 4 +-- src/tpl/umpire | 2 +- src/util/forall.hpp | 15 +++++------ tests/integration/CMakeLists.txt | 10 +++---- ...s.cpp => managed_array_resource_tests.cpp} | 12 ++++----- tests/unit/execution_space_unit_tests.cpp | 12 ++++----- 17 files changed, 87 insertions(+), 88 deletions(-) rename examples/{context-depends.cpp => resource-depends.cpp} (89%) rename examples/{context-multi-array.cpp => resource-multi-array.cpp} (89%) rename examples/{context-not-managed.cpp => resource-not-managed.cpp} (90%) rename examples/{context-simple.cpp => resource-simple.cpp} (87%) rename examples/{context.cpp => resource.cpp} (83%) rename tests/integration/{managed_array_context_tests.cpp => managed_array_resource_tests.cpp} (85%) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 098be12d..b285cbe6 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -33,24 +33,24 @@ if (ENABLE_CUDA OR ENABLE_HIP) SOURCES example.cpp DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( - NAME context-integration.exe - SOURCES context.cpp + NAME resource-integration.exe + SOURCES resource.cpp DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( - NAME context-depends.exe - SOURCES context-depends.cpp + NAME resource-depends.exe + SOURCES resource-depends.cpp DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( - NAME context-simple.exe - SOURCES context-simple.cpp + NAME resource-simple.exe + SOURCES resource-simple.cpp DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( - NAME context-not-managed.exe - SOURCES context-not-managed.cpp + NAME resource-not-managed.exe + SOURCES resource-not-managed.cpp DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( - NAME context-multi-array.exe - SOURCES context-multi-array.cpp + NAME resource-multi-array.exe + SOURCES resource-multi-array.cpp DEPENDS_ON ${chai_umpire_example_depends}) endif () diff --git a/examples/context-depends.cpp b/examples/resource-depends.cpp similarity index 89% rename from examples/context-depends.cpp rename to examples/resource-depends.cpp index 2a00f90a..85285865 100644 --- a/examples/context-depends.cpp +++ b/examples/resource-depends.cpp @@ -1,4 +1,4 @@ -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" @@ -47,8 +47,8 @@ int main() chai::ManagedArray array1(ARRAY_SIZE); chai::ManagedArray array2(ARRAY_SIZE); - camp::resources::Context dev1{camp::resources::Cuda{}}; - camp::resources::Context dev2{camp::resources::Cuda{}}; + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array1[i] = i; @@ -70,7 +70,7 @@ int main() array1.move(chai::CPU, &dev1); - camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Resource host{camp::resources::Host{}}; forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { printf("%f ", array1[i]); diff --git a/examples/context-multi-array.cpp b/examples/resource-multi-array.cpp similarity index 89% rename from examples/context-multi-array.cpp rename to examples/resource-multi-array.cpp index 80030533..4f2ad408 100644 --- a/examples/context-multi-array.cpp +++ b/examples/resource-multi-array.cpp @@ -1,4 +1,4 @@ -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" @@ -46,8 +46,8 @@ int main() chai::ManagedArray array1(ARRAY_SIZE); - camp::resources::Context dev1{camp::resources::Cuda{}}; - camp::resources::Context dev2{camp::resources::Cuda{}}; + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { if (i % 2 == 1) { @@ -67,7 +67,7 @@ int main() e2.wait(); array1.move(chai::CPU, &dev1); - camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Resource host{camp::resources::Host{}}; forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { printf("%f ", array1[i]); diff --git a/examples/context-not-managed.cpp b/examples/resource-not-managed.cpp similarity index 90% rename from examples/context-not-managed.cpp rename to examples/resource-not-managed.cpp index 4ed1adf2..4a34460a 100644 --- a/examples/context-not-managed.cpp +++ b/examples/resource-not-managed.cpp @@ -1,4 +1,4 @@ -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include "../src/util/forall.hpp" #include @@ -43,9 +43,9 @@ int main() constexpr std::size_t ARRAY_SIZE{1000}; int clockrate{get_clockrate()}; - camp::resources::Context dev1{camp::resources::Cuda{}}; - camp::resources::Context dev2{camp::resources::Cuda{}}; - camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; float * d_array1 = dev1.allocate(1000); float * d_array2 = dev2.allocate(1000); diff --git a/examples/context-simple.cpp b/examples/resource-simple.cpp similarity index 87% rename from examples/context-simple.cpp rename to examples/resource-simple.cpp index 8e7639cb..2806c65f 100644 --- a/examples/context-simple.cpp +++ b/examples/resource-simple.cpp @@ -1,4 +1,4 @@ -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" @@ -43,7 +43,7 @@ int main() { constexpr std::size_t ARRAY_SIZE{100}; std::vector> arrays; - camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Resource host{camp::resources::Host{}}; int clockrate{get_clockrate()}; @@ -59,14 +59,14 @@ int main() } for (auto array : arrays) { - camp::resources::Context context{camp::resources::Cuda{}}; + camp::resources::Resource resource{camp::resources::Cuda{}}; - forall(&context, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&resource, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { array[i] = array[i] * 2.0; wait_for(20, clockrate); }); - array.move(chai::CPU, &context); + array.move(chai::CPU, &resource); } for (auto array : arrays) { diff --git a/examples/context.cpp b/examples/resource.cpp similarity index 83% rename from examples/context.cpp rename to examples/resource.cpp index e7eb2d64..5a630d45 100644 --- a/examples/context.cpp +++ b/examples/resource.cpp @@ -1,4 +1,4 @@ -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" #include @@ -36,7 +36,7 @@ int main() std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { - camp::resources::Context ctx{camp::resources::Cuda()}; + camp::resources::Resource res{camp::resources::Cuda()}; auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] = idx * 2; unsigned int start_clock = (unsigned int) clock(); @@ -48,8 +48,8 @@ int main() } }; - auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_1); - array.move(chai::CPU, &ctx); // asynchronous move + auto e = forall(&res, 0, ARRAY_SIZE, clock_lambda_1); + array.move(chai::CPU, &res); // asynchronous move } std::cout << "calling forall with host context" << std::endl; @@ -57,11 +57,11 @@ int main() auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] *= array[idx]; }; - camp::resources::Context ctx{camp::resources::Host{}}; - auto e = forall(&ctx, 0, ARRAY_SIZE, clock_lambda_2); + camp::resources::Resource res{camp::resources::Host{}}; + auto e = forall(&res, 0, ARRAY_SIZE, clock_lambda_2); } - camp::resources::Context host{camp::resources::Host{}}; + camp::resources::Resource host{camp::resources::Host{}}; for (auto array : arrays) { forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { printf("%i ", int(array[i]) ); diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 69332205..c4ea9b88 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -88,13 +88,13 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) m_current_execution_space = space; } -void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Context* context) +void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Resource* resource) { CHAI_LOG(Debug, "Setting execution space to " << space); std::lock_guard lock(m_mutex); m_current_execution_space = space; - m_current_context = context; + m_current_resource = resource; } void* ArrayManager::move(void* pointer, @@ -116,7 +116,7 @@ void* ArrayManager::move(void* pointer, } void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, - camp::resources::Context* context, + camp::resources::Resource* resource, ExecutionSpace space) { // Check for default arg (NONE) @@ -128,7 +128,7 @@ void* ArrayManager::move(void* pointer, return pointer; } - move(pointer_record, space, context); + move(pointer_record, space, resource); return pointer_record->m_pointers[space]; } @@ -139,9 +139,9 @@ ExecutionSpace ArrayManager::getExecutionSpace() return m_current_execution_space; } -camp::resources::Context* ArrayManager::getContext() +camp::resources::Resource* ArrayManager::getResource() { - return m_current_context; + return m_current_resource; } @@ -206,7 +206,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) resetTouch(record); } -void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Context* context) +void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource) { if (space == NONE) { return; @@ -238,24 +238,24 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou std::lock_guard lock(m_mutex); if (record->transfer_pending) { - context->wait_on(&record->m_event); + resource->wait_for(&record->m_event); record->transfer_pending = false; return; } - camp::resources::Context* ctx; + camp::resources::Resource* res; if (space == chai::CPU){ - ctx = record->m_last_context; + res = record->m_last_resource; }else{ - ctx = context; + res = resource; } - if (ctx == nullptr){ + if (res == nullptr){ m_resource_manager.copy(dst_pointer, src_pointer); return; } - auto e = m_resource_manager.copy(dst_pointer, src_pointer, *ctx); + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); record->transfer_pending = true; record->m_event = e; } diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index dc317151..fbcd6e30 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -22,7 +22,7 @@ #include "umpire/Allocator.hpp" #include "umpire/util/MemoryMap.hpp" -#include "camp/contexts.hpp" +#include "camp/resource.hpp" namespace chai { @@ -76,7 +76,7 @@ class ArrayManager * * \param space The space to set as current. */ - void setExecutionSpace(ExecutionSpace space, camp::resources::Context *context); + void setExecutionSpace(ExecutionSpace space, camp::resources::Resource *resource); /*! * \brief Get the current execution space. @@ -86,7 +86,7 @@ class ArrayManager ExecutionSpace getExecutionSpace(); - camp::resources::Context* getContext(); + camp::resources::Resource* getResource(); /*! * \brief Move data in pointer to the current execution space. @@ -99,7 +99,7 @@ class ArrayManager ExecutionSpace = NONE); void* move(void* pointer, PointerRecord* pointer_record, - camp::resources::Context* context, + camp::resources::Resource* resource, ExecutionSpace = NONE); @@ -302,7 +302,7 @@ class ArrayManager * \param space */ void move(PointerRecord* record, ExecutionSpace space); - void move(PointerRecord* record, ExecutionSpace space, camp::resources::Context* context); + void move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource); /*! * \brief Execute a user callback if callbacks are active @@ -327,9 +327,9 @@ class ArrayManager ExecutionSpace m_current_execution_space; /*! - * current context. + * current resource. */ - camp::resources::Context* m_current_context; + camp::resources::Resource* m_current_resource; /** diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index 3e49d62a..28d26af6 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -8,7 +8,7 @@ #define CHAI_ExecutionSpaces_HPP #include "chai/config.hpp" -#include "camp/contexts.hpp" +#include "camp/resource.hpp" namespace chai { diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index e7baf7fd..84b5dfbf 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -155,7 +155,7 @@ class ManagedArray : public CHAICopyable CHAI_HOST void registerTouch(ExecutionSpace space); CHAI_HOST void move(ExecutionSpace space); - CHAI_HOST void move(ExecutionSpace space, camp::resources::Context* context); + CHAI_HOST void move(ExecutionSpace space, camp::resources::Resource* resource); CHAI_HOST ManagedArray slice(size_t begin, size_t end); /*! diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index c2b48751..60bd5f92 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -138,7 +138,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): m_is_slice(other.m_is_slice) { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) - move(m_resource_manager->getExecutionSpace(), m_resource_manager->getContext()); + move(m_resource_manager->getExecutionSpace(), m_resource_manager->getResource()); #endif } @@ -351,7 +351,7 @@ void ManagedArray::move(ExecutionSpace space) template CHAI_INLINE CHAI_HOST -void ManagedArray::move(ExecutionSpace space, camp::resources::Context* context) +void ManagedArray::move(ExecutionSpace space, camp::resources::Resource* resource) { ExecutionSpace prev_space = m_pointer_record->m_last_space; @@ -360,7 +360,7 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte moveInnerImpl(space); } - m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, context, space)); + m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, resource, space)); m_active_pointer = m_active_base_pointer + m_offset; if (!std::is_const::value) { @@ -369,7 +369,7 @@ void ManagedArray::move(ExecutionSpace space, camp::resources::Context* conte } if (space != NONE) m_pointer_record->m_last_space = space; - if (space != NONE) m_pointer_record->m_last_context = context; + if (space != NONE) m_pointer_record->m_last_resource = resource; /* When moving from GPU to CPU we need to move the inner arrays after the outer array. */ #if defined(CHAI_ENABLE_CUDA) diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index c1c95b7f..f3a9a681 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -10,7 +10,7 @@ #include "chai/ExecutionSpaces.hpp" #include "chai/Types.hpp" -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #include #include @@ -61,7 +61,7 @@ struct PointerRecord { bool transfer_pending; camp::resources::Event m_event; - camp::resources::Context* m_last_context = nullptr; + camp::resources::Resource* m_last_resource = nullptr; }; } // end of namespace chai diff --git a/src/tpl/umpire b/src/tpl/umpire index fcecbb8a..06feec33 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit fcecbb8ada267d9e16635d86b848dea4b8f16314 +Subproject commit 06feec33c6a06517e81c1b8fb0934007923c2c44 diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 01a6024e..3804c0c6 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -10,8 +10,7 @@ #include "chai/ArrayManager.hpp" #include "chai/ExecutionSpaces.hpp" #include "chai/config.hpp" -//#include "camp/device.hpp" -#include "camp/contexts.hpp" +#include "camp/resource.hpp" #if defined(CHAI_ENABLE_UM) #include @@ -51,7 +50,7 @@ void forall(sequential, int begin, int end, LOOP_BODY body) rm->setExecutionSpace(chai::NONE); } template -camp::resources::Event forall_host(camp::resources::Context* dev, int begin, int end, LOOP_BODY body) +camp::resources::Event forall_host(camp::resources::Resource* dev, int begin, int end, LOOP_BODY body) { chai::ArrayManager* rm = chai::ArrayManager::getInstance(); @@ -106,7 +105,7 @@ void forall(gpu, int begin, int end, LOOP_BODY&& body) rm->setExecutionSpace(chai::NONE); } template -camp::resources::Event forall_gpu(camp::resources::Context* dev, int begin, int end, LOOP_BODY&& body) +camp::resources::Event forall_gpu(camp::resources::Resource* dev, int begin, int end, LOOP_BODY&& body) { chai::ArrayManager* rm = chai::ArrayManager::getInstance(); @@ -130,15 +129,15 @@ forall_kernel_gpu<<>>(begin, end - be #endif // if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) template -camp::resources::Event forall(camp::resources::Context *con, int begin, int end, LOOP_BODY&& body) +camp::resources::Event forall(camp::resources::Resource *res, int begin, int end, LOOP_BODY&& body) { - auto platform = con->get_platform(); + auto platform = res->get_platform(); switch(platform) { case camp::resources::Platform::cuda: case camp::resources::Platform::hip: - return forall_gpu(con, begin, end, body); + return forall_gpu(res, begin, end, body); default: - return forall_host(con, begin, end, body); + return forall_host(res, begin, end, body); } } diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index c6f458f1..b0314585 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -26,17 +26,17 @@ blt_add_test( if (ENABLE_CUDA) blt_add_executable( - NAME managed_array_context_tests - SOURCES managed_array_context_tests.cpp + NAME managed_array_resource_tests + SOURCES managed_array_resource_tests.cpp DEPENDS_ON ${chai_integration_test_depends}) target_include_directories( - managed_array_context_tests + managed_array_resource_tests PUBLIC ${PROJECT_BINARY_DIR}/include) blt_add_test( - NAME managed_array_context_test - COMMAND managed_array_context_tests) + NAME managed_array_resource_test + COMMAND managed_array_resource_tests) endif() blt_add_executable( diff --git a/tests/integration/managed_array_context_tests.cpp b/tests/integration/managed_array_resource_tests.cpp similarity index 85% rename from tests/integration/managed_array_context_tests.cpp rename to tests/integration/managed_array_resource_tests.cpp index 0d703f4b..ae65e9bf 100644 --- a/tests/integration/managed_array_context_tests.cpp +++ b/tests/integration/managed_array_resource_tests.cpp @@ -21,8 +21,8 @@ GPU_TEST(ManagedArray, Simple) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::resources::Context host{camp::resources::Host{}}; - camp::resources::Context device{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); @@ -44,8 +44,8 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::resources::Context host{camp::resources::Host{}}; - camp::resources::Context device{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); @@ -69,8 +69,8 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) { constexpr std::size_t ARRAY_SIZE{1024}; - camp::resources::Context host{camp::resources::Host{}}; - camp::resources::Context device{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp index c3f667da..adf23dd4 100644 --- a/tests/unit/execution_space_unit_tests.cpp +++ b/tests/unit/execution_space_unit_tests.cpp @@ -59,22 +59,22 @@ TEST(ExecutionSpace, Platforms) TEST(ExecutionSpace, Host) { - camp::resources::Context ctx{camp::resources::Host()}; - ASSERT_TRUE( chai::CPU == ctx.get().get_platform() ); + camp::resources::Resource res{camp::resources::Host()}; + ASSERT_TRUE( chai::CPU == res.get().get_platform() ); } #if defined(CHAI_ENABLE_CUDA) TEST(ExecutionSpace, Cuda) { - camp::resources::Context ctx{camp::resources::Cuda()}; - ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); + camp::resources::Resource res{camp::resources::Cuda()}; + ASSERT_TRUE( chai::GPU == res.get().get_platform() ); } #endif // #if defined(CHAI_ENABLE_CUDA) #if defined(CHAI_ENABLE_HIP) TEST(ExecutionSpace, Hip) { - camp::resources::Context ctx{camp::resources::Hip()}; - ASSERT_TRUE( chai::GPU == ctx.get().get_platform() ); + camp::resources::Resource res{camp::resources::Hip()}; + ASSERT_TRUE( chai::GPU == res.get().get_platform() ); } #endif // #if defined(CHAI_ENABLE_CUDA) From 1a7148d54922445c1ec0e01a3164f14d886f18f0 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 19 Feb 2020 13:19:49 -0800 Subject: [PATCH 054/118] Bumping umpire --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 06feec33..1b99fc32 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 06feec33c6a06517e81c1b8fb0934007923c2c44 +Subproject commit 1b99fc32b4f3e64cc49938495ad5404e7acf52c1 From bdb74655de3aab357e8787621d823ac473265c89 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 6 Apr 2020 15:59:12 -0700 Subject: [PATCH 055/118] Updating Umpire and BLTv0.3.0 --- blt | 2 +- src/tpl/umpire | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blt b/blt index 30ccea5a..2c192774 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit 30ccea5ad9853bd6397d8c67deed88b55916d2be +Subproject commit 2c192774b587c245ec2d7022b2e862395ffa8a21 diff --git a/src/tpl/umpire b/src/tpl/umpire index 1b99fc32..4b102ad2 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 1b99fc32b4f3e64cc49938495ad5404e7acf52c1 +Subproject commit 4b102ad26c1d81d708f6673c133f511b1ccc5f63 From f20b59d132ca46fbe652fc1ff8a1cd1c3d144415 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 6 Apr 2020 15:59:45 -0700 Subject: [PATCH 056/118] Fixing benchmark_api header name change. --- benchmarks/chai_arraymanager_benchmarks.cpp | 2 +- benchmarks/chai_managedarray_benchmarks.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/chai_arraymanager_benchmarks.cpp b/benchmarks/chai_arraymanager_benchmarks.cpp index c58a4987..a49eeb65 100644 --- a/benchmarks/chai_arraymanager_benchmarks.cpp +++ b/benchmarks/chai_arraymanager_benchmarks.cpp @@ -6,7 +6,7 @@ ////////////////////////////////////////////////////////////////////////////// #include -#include "benchmark/benchmark_api.h" +#include "benchmark/benchmark.h" #include "chai/ArrayManager.hpp" diff --git a/benchmarks/chai_managedarray_benchmarks.cpp b/benchmarks/chai_managedarray_benchmarks.cpp index 4fcb33bf..89576257 100644 --- a/benchmarks/chai_managedarray_benchmarks.cpp +++ b/benchmarks/chai_managedarray_benchmarks.cpp @@ -6,7 +6,7 @@ ////////////////////////////////////////////////////////////////////////////// #include -#include "benchmark/benchmark_api.h" +#include "benchmark/benchmark.h" #include "chai/ManagedArray.hpp" #include "chai/config.hpp" From 1a39b70590e106b5faa38c347154c94df7588960 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 26 May 2020 13:41:22 -0700 Subject: [PATCH 057/118] Fixing failed tests on Managed_Array_Tests. --- src/chai/ArrayManager.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index d14faeae..8b241772 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -36,11 +36,7 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); -#else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); -#endif #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) #if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) From 93053085d99d0a2dddc4c55bfe0cc3b4b599200f Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 4 Jun 2020 12:39:18 -0700 Subject: [PATCH 058/118] Trying to unify CHAI move --- src/chai/ArrayManager.cpp | 114 ++++++++-------------- tests/integration/managed_array_tests.cpp | 9 +- 2 files changed, 49 insertions(+), 74 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 8b241772..c0b95065 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -36,7 +36,11 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) + new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); +#else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); +#endif #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) #if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) @@ -257,55 +261,12 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) void ArrayManager::move(PointerRecord* record, ExecutionSpace space) { - if (space == NONE) { - return; - } - - callback(record, ACTION_CAPTURED, space); - - if (space == record->m_last_space) { - return; - } - -#if defined(CHAI_ENABLE_UM) - if (record->m_last_space == UM) { - return; - } -#endif - -#if defined(CHAI_ENABLE_PINNED) - if (record->m_last_space == PINNED) { - if (space == CPU) { - syncIfNeeded(); - } - return; - } -#endif - - void* src_pointer = record->m_pointers[record->m_last_space]; - void* dst_pointer = record->m_pointers[space]; - - if (!dst_pointer) { - allocate(record, space); - dst_pointer = record->m_pointers[space]; - } - - if (!record->m_touched[record->m_last_space]) { - return; - } else if (dst_pointer != src_pointer) { - // Exclude the copy if src and dst are the same (can happen for PINNED memory) - { - std::lock_guard lock(m_mutex); - m_resource_manager.copy(dst_pointer, src_pointer); - } - - callback(record, ACTION_MOVE, space); - } - - resetTouch(record); + move(record,space,nullptr); } + void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource) { + if (space == NONE) { return; } @@ -341,40 +302,47 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (!record->m_touched[record->m_last_space]) { return; - } else if (dst_pointer != src_pointer) { - // Exclude the copy if src and dst are the same (can happen for PINNED memory) - { + } else { + // Logical flow for when we are using resources. + // This is terrible and needs re-evaluation. + if (resource){ + callback(record, ACTION_MOVE, space); std::lock_guard lock(m_mutex); - m_resource_manager.copy(dst_pointer, src_pointer); - } - callback(record, ACTION_MOVE, space); + if (record->transfer_pending) { + resource->wait_for(&record->m_event); + record->transfer_pending = false; + return; + } - } else { - callback(record, ACTION_MOVE, space); - std::lock_guard lock(m_mutex); + camp::resources::Resource* res; + if (space == chai::CPU){ + res = record->m_last_resource; + }else{ + res = resource; + } - if (record->transfer_pending) { - resource->wait_for(&record->m_event); - record->transfer_pending = false; - return; - } + if (res == nullptr){ + m_resource_manager.copy(dst_pointer, src_pointer); + return; + } - camp::resources::Resource* res; - if (space == chai::CPU){ - res = record->m_last_resource; - }else{ - res = resource; - } + auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); + record->transfer_pending = true; + record->m_event = e; - if (res == nullptr){ - m_resource_manager.copy(dst_pointer, src_pointer); - return; - } + // Default logical flow when not using non resource move. + } else { + if (dst_pointer != src_pointer) { + // Exclude the copy if src and dst are the same (can happen for PINNED memory) + { + std::lock_guard lock(m_mutex); + m_resource_manager.copy(dst_pointer, src_pointer); + } - auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); - record->transfer_pending = true; - record->m_event = e; + callback(record, ACTION_MOVE, space); + } + } } resetTouch(record); diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 957c8fe9..d3e5b046 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -779,7 +779,12 @@ TEST(ManagedArray, ExternalConstructorUnowned) TEST(ManagedArray, ExternalConstructorOwned) { +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + float* data; + cudaMallocHost(&data, 20*sizeof(float)); +#else float* data = static_cast(std::malloc(20 * sizeof(float))); +#endif for (int i = 0; i < 20; i++) { data[i] = 1.0f * i; @@ -796,7 +801,9 @@ TEST(ManagedArray, ExternalConstructorOwned) #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) GPU_TEST(ManagedArray, ExternalUnownedMoveToGPU) { - float data[20]; + float* data; + cudaMallocHost(&data, 20*sizeof(float)); + for (int i = 0; i < 20; i++) { data[i] = 0.; } From f41551e422faccb3b79ea0066c83ceb7a7b21f24 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 5 Jun 2020 13:52:32 -0700 Subject: [PATCH 059/118] Printable callback Types and ExecutionSpaces --- src/chai/ExecutionSpaces.hpp | 15 ++++++++ src/chai/Types.hpp | 9 +++++ tests/integration/managed_array_tests.cpp | 42 +++++++++++------------ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index 206c6635..7fd024ad 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -44,6 +44,21 @@ enum ExecutionSpace { ,PINNED #endif }; +static std::vector PrintExecSpace = { + (char *)"NONE", + (char *)"CPU", +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) + (char *)"GPU", +#endif +#if defined(CHAI_ENABLE_UM) + (char *)"UM", +#endif +#if defined(CHAI_ENABLE_PINNED) + (char *)"PINNED", +#endif + (char *)"NUM_EXECUTION_SPACES" +}; + inline bool operator==(const ExecutionSpace& s, const camp::resources::Platform& p) { if(s == chai::CPU && p == camp::resources::Platform::host) return true; diff --git a/src/chai/Types.hpp b/src/chai/Types.hpp index 0164a2e0..343263e5 100644 --- a/src/chai/Types.hpp +++ b/src/chai/Types.hpp @@ -9,6 +9,7 @@ // Std library headers #include +#include // CHAI headers #include "chai/ExecutionSpaces.hpp" @@ -30,6 +31,14 @@ namespace chai typedef unsigned int uint; enum Action { ACTION_ALLOC, ACTION_FREE, ACTION_MOVE, ACTION_CAPTURED, ACTION_FOUND_ABANDONED, ACTION_LEAKED }; + static std::vector PrintAction = { + (char *)"ACTION_ALLOC", + (char *)"ACTION_FREE", + (char *)"ACTION_MOVE", + (char *)"ACTION_CAPTURED", + (char *)"ACTION_FOUND_ABANDONED", + (char *)"ACTION_LEAKED" + }; using UserCallback = std::function; } // end of namespace chai diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index d3e5b046..ad72907a 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -777,27 +777,27 @@ TEST(ManagedArray, ExternalConstructorUnowned) assert_empty_map(true); } -TEST(ManagedArray, ExternalConstructorOwned) -{ -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - float* data; - cudaMallocHost(&data, 20*sizeof(float)); -#else - float* data = static_cast(std::malloc(20 * sizeof(float))); -#endif - - for (int i = 0; i < 20; i++) { - data[i] = 1.0f * i; - } - - chai::ManagedArray array = - chai::makeManagedArray(data, 20, chai::CPU, true); - - forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); - - array.free(); - assert_empty_map(true); -} +//TEST(ManagedArray, ExternalConstructorOwned) +//{ +//#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) +// float* data; +// cudaMallocHost(&data, 20*sizeof(float)); +//#else +// float* data = static_cast(std::malloc(20 * sizeof(float))); +//#endif +// +// for (int i = 0; i < 20; i++) { +// data[i] = 1.0f * i; +// } +// +// chai::ManagedArray array = +// chai::makeManagedArray(data, 20, chai::CPU, true); +// +// forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); +// +// array.free(); +// assert_empty_map(true); +//} #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) GPU_TEST(ManagedArray, ExternalUnownedMoveToGPU) { From 00ef5e588e92fcaa4cbba1e2f6ec81018be38d81 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 5 Jun 2020 13:56:11 -0700 Subject: [PATCH 060/118] Adding name of array to callback, need to remove later. --- examples/resource-simple.cpp | 26 ++++++++++++++++++++++++++ src/chai/ManagedArray.hpp | 3 ++- src/chai/PointerRecord.hpp | 2 ++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/examples/resource-simple.cpp b/examples/resource-simple.cpp index 2806c65f..5fbf89a2 100644 --- a/examples/resource-simple.cpp +++ b/examples/resource-simple.cpp @@ -41,16 +41,41 @@ int get_clockrate() int main() { + auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) + { + const size_t bytes = record->m_size; + printf("%s cback: act=%s, space=%s, bytes=%ld\n", record->name.c_str(), chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); + if (act == chai::ACTION_MOVE) + { + if (s == chai::CPU) + { + printf("Moved to host\n"); + } + else if (s == chai::GPU) + { + printf("Moved to device\n"); + } + } + if (act == chai::ACTION_FOUND_ABANDONED) { + printf("in abandoned!\n"); + //ASSERT_EQ(false,true); + } + }; + constexpr std::size_t ARRAY_SIZE{100}; std::vector> arrays; camp::resources::Resource host{camp::resources::Host{}}; + int clockrate{get_clockrate()}; for (std::size_t i = 0; i < 10; ++i) { arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); + arrays[i].m_pointer_record->name = "array "+ std::to_string(i); + arrays[i].setUserCallback(callBack); } + for (auto array : arrays) { // set on host forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { @@ -66,6 +91,7 @@ int main() wait_for(20, clockrate); }); + std::cout<< "Move to CPU called" << std::endl; array.move(chai::CPU, &resource); } diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 8bf580dc..53e87046 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -421,9 +421,10 @@ class ManagedArray : public CHAICopyable /*! * Pointer to PointerRecord data. */ - mutable PointerRecord* m_pointer_record = nullptr; mutable bool m_is_slice = false; +public: + mutable PointerRecord* m_pointer_record = nullptr; }; /*! diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index f56492fa..89885997 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -75,6 +75,8 @@ struct PointerRecord { m_allocators[space] = 0; } } + + std::string name; }; } // end of namespace chai From bc0177fd6b211b0190cd2a9d67a546734f5960b1 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 5 Jun 2020 13:57:30 -0700 Subject: [PATCH 061/118] More debug statements --- src/chai/ArrayManager.cpp | 28 ++++++++++++++++++---------- src/chai/ManagedArray.inl | 3 ++- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index c0b95065..a50d59e4 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -37,7 +37,7 @@ ArrayManager::ArrayManager() : m_allocators[CPU] = #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); + new umpire::Allocator(m_resource_manager.getAllocator("HOST")); #else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); #endif @@ -261,6 +261,7 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) void ArrayManager::move(PointerRecord* record, ExecutionSpace space) { + std::cout<< "Resource null"<m_last_space == PINNED) { - if (space == CPU) { - syncIfNeeded(); - } - return; - } -#endif +//#if defined(CHAI_ENABLE_PINNED) +// if (record->m_last_space == PINNED) { +// if (space == CPU) { +// syncIfNeeded(); +// } +// return; +// } +//#endif void* src_pointer = record->m_pointers[record->m_last_space]; void* dst_pointer = record->m_pointers[space]; @@ -306,11 +307,12 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou // Logical flow for when we are using resources. // This is terrible and needs re-evaluation. if (resource){ - callback(record, ACTION_MOVE, space); std::lock_guard lock(m_mutex); if (record->transfer_pending) { resource->wait_for(&record->m_event); + //record->m_event.wait(); + std::cout<< " - "<name<<" Resource copy end" << std::endl; record->transfer_pending = false; return; } @@ -324,15 +326,21 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (res == nullptr){ m_resource_manager.copy(dst_pointer, src_pointer); + std::cout << " + res null "; + callback(record, ACTION_MOVE, space); return; } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); + std::cout<< " - "<name<<" Resource copy start" << std::endl; + callback(record, ACTION_MOVE, space); record->transfer_pending = true; record->m_event = e; // Default logical flow when not using non resource move. } else { + std::cout<< "Resource null 2"< CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space) const -{ +{ + std::cout<< "ManageArray Move Null res" << std::endl; if (m_pointer_record != &ArrayManager::s_null_record) { ExecutionSpace prev_space = m_pointer_record->m_last_space; if (prev_space == CPU || prev_space == NONE) { From 13095be75f47e5dc0af8e08afa36e3dcac53682d Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 5 Jun 2020 13:58:14 -0700 Subject: [PATCH 062/118] Callbacks for managed_array_resource_tests --- .../managed_array_resource_tests.cpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/integration/managed_array_resource_tests.cpp b/tests/integration/managed_array_resource_tests.cpp index ae65e9bf..8adbc699 100644 --- a/tests/integration/managed_array_resource_tests.cpp +++ b/tests/integration/managed_array_resource_tests.cpp @@ -19,12 +19,33 @@ #ifdef CHAI_ENABLE_CUDA GPU_TEST(ManagedArray, Simple) { + auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) + { + const size_t bytes = record->m_size; + printf("cback: act=%s, space=%s, bytes=%ld\n", chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); + if (act == chai::ACTION_MOVE) + { + if (s == chai::CPU) + { + printf("Moved to host\n"); + } + else if (s == chai::GPU) + { + printf("Moved to device\n"); + } + } + if (act == chai::ACTION_FOUND_ABANDONED) { + printf("in abandoned!\n"); + ASSERT_EQ(false,true); + } + }; constexpr std::size_t ARRAY_SIZE{1024}; camp::resources::Resource host{camp::resources::Host{}}; camp::resources::Resource device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); + array.setUserCallback(callBack); forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { array[i] = i; @@ -34,6 +55,8 @@ GPU_TEST(ManagedArray, Simple) array[i] = array[i] * 2.0; }); + array.move(chai::CPU, &device); + // print on host forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { EXPECT_DOUBLE_EQ(array[i], i*2.0); From 6d73a40d33cccd45ed15d61fecf035b0514b8689 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 8 Jun 2020 15:25:54 -0700 Subject: [PATCH 063/118] Lots of prints for debugging... --- examples/resource.cpp | 27 +++++++++++++++++++++++++++ src/chai/ArrayManager.cpp | 7 +++++++ 2 files changed, 34 insertions(+) diff --git a/examples/resource.cpp b/examples/resource.cpp index 5a630d45..91f30d48 100644 --- a/examples/resource.cpp +++ b/examples/resource.cpp @@ -29,14 +29,38 @@ int main() const int ARRAY_SIZE = 10; std::vector< chai::ManagedArray > arrays; + auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) + { + const size_t bytes = record->m_size; + printf("%s cback: act=%s, space=%s, bytes=%ld\n", record->name.c_str(), chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); + if (act == chai::ACTION_MOVE) + { + if (s == chai::CPU) + { + printf("Moved to host\n"); + } + else if (s == chai::GPU) + { + printf("Moved to device\n"); + } + } + if (act == chai::ACTION_FOUND_ABANDONED) { + printf("in abandoned!\n"); + //ASSERT_EQ(false,true); + } + }; + for (int i = 0; i < NUM_ARRAYS; i++) { arrays.push_back(chai::ManagedArray(10, chai::GPU)); + arrays[i].m_pointer_record->name = "array "+ std::to_string(i); + arrays[i].setUserCallback(callBack); } std::cout << "calling forall with cuda context" << std::endl; for (auto array : arrays) { camp::resources::Resource res{camp::resources::Cuda()}; + auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { array[idx] = idx * 2; unsigned int start_clock = (unsigned int) clock(); @@ -48,7 +72,10 @@ int main() } }; + + std::cout << "Calling forall" << std::endl; auto e = forall(&res, 0, ARRAY_SIZE, clock_lambda_1); + std::cout << "Move to CPU called" << std::endl; array.move(chai::CPU, &res); // asynchronous move } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index a50d59e4..a7e005e8 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -297,6 +297,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou void* dst_pointer = record->m_pointers[space]; if (!dst_pointer) { + std::cout<<"ALLLOCATING!!!!"<m_pointers[space]; } @@ -310,8 +311,12 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou std::lock_guard lock(m_mutex); if (record->transfer_pending) { + if (!&record->m_event) + std::cout<< "Event NULL" << std::endl; + resource->wait_for(&record->m_event); //record->m_event.wait(); + std::cout<< " - "<name<<" Resource copy end" << std::endl; record->transfer_pending = false; return; @@ -332,6 +337,8 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); + //m_resource_manager.copy(dst_pointer, src_pointer); + //auto e = res->get_event(); std::cout<< " - "<name<<" Resource copy start" << std::endl; callback(record, ACTION_MOVE, space); record->transfer_pending = true; From d76456b8aac6c0c36ae0d753fe29eae90d319448 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 8 Jun 2020 15:33:15 -0700 Subject: [PATCH 064/118] Change to stop segfaults, incorrect values returned. --- src/chai/ArrayManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index a7e005e8..1714dcce 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -314,7 +314,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (!&record->m_event) std::cout<< "Event NULL" << std::endl; - resource->wait_for(&record->m_event); + //resource->wait_for(&record->m_event); //record->m_event.wait(); std::cout<< " - "<name<<" Resource copy end" << std::endl; From b1047b8f20f6e04a43fd212fe53ef0859ef9e259 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 11 Jun 2020 08:40:55 -0700 Subject: [PATCH 065/118] Make sure PointerRecord is initialized correctly --- src/chai/ArrayManager.cpp | 9 ++------- src/chai/PointerRecord.hpp | 6 +++--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 1714dcce..99b8c578 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -33,6 +33,7 @@ ArrayManager::ArrayManager() : { m_pointer_map.clear(); m_current_execution_space = NONE; + m_current_resource = nullptr; m_default_allocation_space = CPU; m_allocators[CPU] = @@ -311,13 +312,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou std::lock_guard lock(m_mutex); if (record->transfer_pending) { - if (!&record->m_event) - std::cout<< "Event NULL" << std::endl; - - //resource->wait_for(&record->m_event); - //record->m_event.wait(); - - std::cout<< " - "<name<<" Resource copy end" << std::endl; + resource->wait_for(&record->m_event); record->transfer_pending = false; return; } diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 89885997..cd118359 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -58,9 +58,9 @@ struct PointerRecord { int m_allocators[NUM_EXECUTION_SPACES]; - bool transfer_pending; - camp::resources::Event m_event; - camp::resources::Resource* m_last_resource = nullptr; + bool transfer_pending{false}; + camp::resources::Event m_event{}; + camp::resources::Resource* m_last_resource{nullptr}; /*! * \brief Default constructor From f0186d43aa621fdfb52a1e31204814861b32ea6e Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 29 Jun 2020 10:01:29 -0700 Subject: [PATCH 066/118] Bumping Umpire w/ cuda device fix. --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 4b102ad2..28af4940 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 4b102ad26c1d81d708f6673c133f511b1ccc5f63 +Subproject commit 28af49401262b8637f8b8b6c5439474e99a70610 From 8f63be9c95b8ed76394987e7e34accaf1006c655 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 29 Jun 2020 10:07:26 -0700 Subject: [PATCH 067/118] Use pinned host memory when possible if GPU enabled. --- src/chai/ArrayManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 99b8c578..dedaa427 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -38,7 +38,7 @@ ArrayManager::ArrayManager() : m_allocators[CPU] = #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - new umpire::Allocator(m_resource_manager.getAllocator("HOST")); + new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); #else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); #endif From 44101f5e6d68eae7b7b205247aa72bcfdf002fca Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 29 Jun 2020 11:04:58 -0700 Subject: [PATCH 068/118] Cleaning up resource examples. --- examples/CMakeLists.txt | 4 -- examples/resource-depends.cpp | 61 +++++------------ examples/resource-example-util.hpp | 39 +++++++++++ examples/resource-multi-array.cpp | 51 ++++----------- examples/resource-not-managed.cpp | 52 ++++----------- examples/resource-simple.cpp | 80 +++++------------------ examples/resource.cpp | 101 ----------------------------- 7 files changed, 96 insertions(+), 292 deletions(-) create mode 100644 examples/resource-example-util.hpp delete mode 100644 examples/resource.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5d9829d1..a9b09f4e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -28,10 +28,6 @@ if (ENABLE_CUDA OR ENABLE_HIP) NAME chai-example.exe SOURCES example.cpp DEPENDS_ON ${chai_umpire_example_depends}) - blt_add_executable( - NAME resource-integration.exe - SOURCES resource.cpp - DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( NAME resource-depends.exe SOURCES resource-depends.cpp diff --git a/examples/resource-depends.cpp b/examples/resource-depends.cpp index 85285865..6b08e891 100644 --- a/examples/resource-depends.cpp +++ b/examples/resource-depends.cpp @@ -1,77 +1,50 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "camp/resource.hpp" -#include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" +#include "../src/util/forall.hpp" +#include "resource-example-util.hpp" + #include #include -inline __host__ __device__ void -wait_for(float time, float clockrate) { - clock_t time_in_clocks = time*clockrate; - - unsigned int start_clock = (unsigned int) clock(); - clock_t clock_offset = 0; - while (clock_offset < time_in_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); - } -} - -int get_clockrate() -{ - int cuda_device = 0; - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - -#if defined(__arm__) || defined(__aarch64__) - return deviceProp.clockRate/1000; -#else - return deviceProp.clockRate; -#endif -} int main() { constexpr std::size_t ARRAY_SIZE{1000}; int clockrate{get_clockrate()}; - chai::ManagedArray array1(ARRAY_SIZE); - chai::ManagedArray array2(ARRAY_SIZE); - camp::resources::Resource dev1{camp::resources::Cuda{}}; camp::resources::Resource dev2{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + + chai::ManagedArray array1(ARRAY_SIZE); + chai::ManagedArray array2(ARRAY_SIZE); auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array1[i] = i; - wait_for(10, clockrate); + gpu_time_wait_for(10, clockrate); }); auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array2[i] = -1; - wait_for(20, clockrate); + gpu_time_wait_for(20, clockrate); }); - e2.wait(); - e1.wait(); + dev1.wait_for(&e2); forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array1[i] *= array2[i]; - wait_for(10, clockrate); + gpu_time_wait_for(10, clockrate); }); array1.move(chai::CPU, &dev1); - camp::resources::Resource host{camp::resources::Host{}}; - forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { printf("%f ", array1[i]); }); diff --git a/examples/resource-example-util.hpp b/examples/resource-example-util.hpp new file mode 100644 index 00000000..d7e0e8e9 --- /dev/null +++ b/examples/resource-example-util.hpp @@ -0,0 +1,39 @@ +#ifndef RESOURCE_EXAMPLE_UTIL_HPP +#define RESOURCE_EXAMPLE_UTIL_HPP + + +inline __host__ __device__ void +gpu_time_wait_for(float time, float clockrate) { + clock_t time_in_clocks = time*clockrate; + + unsigned int start_clock = (unsigned int) clock(); + clock_t clock_offset = 0; + while (clock_offset < time_in_clocks) + { + unsigned int end_clock = (unsigned int) clock(); + clock_offset = (clock_t)(end_clock - start_clock); + } +} + +int get_clockrate() +{ + int cuda_device = 0; + cudaDeviceProp deviceProp; + cudaGetDevice(&cuda_device); + cudaGetDeviceProperties(&deviceProp, cuda_device); + if ((deviceProp.concurrentKernels == 0)) + { + printf("> GPU does not support concurrent kernel execution\n"); + printf(" CUDA kernel runs will be serialized\n"); + } + printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + +#if defined(__arm__) || defined(__aarch64__) + return deviceProp.clockRate/1000; +#else + return deviceProp.clockRate; +#endif +} + +#endif // RESOURCE_EXAMPLE_UTIL_HPP diff --git a/examples/resource-multi-array.cpp b/examples/resource-multi-array.cpp index 4f2ad408..22665c9b 100644 --- a/examples/resource-multi-array.cpp +++ b/examples/resource-multi-array.cpp @@ -1,43 +1,12 @@ #include "camp/resource.hpp" -#include "../src/util/forall.hpp" #include "chai/ManagedArray.hpp" +#include "../src/util/forall.hpp" +#include "resource-example-util.hpp" + #include #include -inline __host__ __device__ void -wait_for(float time, float clockrate) { - clock_t time_in_clocks = time*clockrate; - - unsigned int start_clock = (unsigned int) clock(); - clock_t clock_offset = 0; - while (clock_offset < time_in_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); - } -} - -int get_clockrate() -{ - int cuda_device = 0; - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - -#if defined(__arm__) || defined(__aarch64__) - return deviceProp.clockRate/1000; -#else - return deviceProp.clockRate; -#endif -} int main() { @@ -49,22 +18,24 @@ int main() camp::resources::Resource dev1{camp::resources::Cuda{}}; camp::resources::Resource dev2{camp::resources::Cuda{}}; - auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - if (i % 2 == 1) { - wait_for(20, clockrate); + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 0) { array1[i] = i; + gpu_time_wait_for(10, clockrate); } }); - auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - if (i % 2 == 0) { + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 1) { + gpu_time_wait_for(20, clockrate); array1[i] = i; - wait_for(10, clockrate); } }); e1.wait(); e2.wait(); + array1.move(chai::CPU, &dev1); camp::resources::Resource host{camp::resources::Host{}}; diff --git a/examples/resource-not-managed.cpp b/examples/resource-not-managed.cpp index 4a34460a..3053803e 100644 --- a/examples/resource-not-managed.cpp +++ b/examples/resource-not-managed.cpp @@ -1,42 +1,17 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "camp/resource.hpp" + #include "../src/util/forall.hpp" +#include "resource-example-util.hpp" #include #include -inline __host__ __device__ void -wait_for(float time, float clockrate) { - clock_t time_in_clocks = time*clockrate; - - unsigned int start_clock = (unsigned int) clock(); - clock_t clock_offset = 0; - while (clock_offset < time_in_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); - } -} - -int get_clockrate() -{ - int cuda_device = 0; - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - -#if defined(__arm__) || defined(__aarch64__) - return deviceProp.clockRate/1000; -#else - return deviceProp.clockRate; -#endif -} int main() { @@ -49,26 +24,25 @@ int main() float * d_array1 = dev1.allocate(1000); float * d_array2 = dev2.allocate(1000); + float * h_array1 = host.allocate(1000); auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { d_array1[i] = i; - wait_for(10, clockrate); + gpu_time_wait_for(10, clockrate); }); auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { d_array2[i] = -1; - wait_for(20, clockrate); + gpu_time_wait_for(20, clockrate); }); - e2.wait(); + dev1.wait_for(&e2); forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { d_array1[i] *= d_array2[i]; - wait_for(10, clockrate); + gpu_time_wait_for(10, clockrate); }); - - float * h_array1 = host.allocate(1000); dev1.memcpy(h_array1, d_array1, sizeof(float) * 1000); forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { diff --git a/examples/resource-simple.cpp b/examples/resource-simple.cpp index 5fbf89a2..fdf2e599 100644 --- a/examples/resource-simple.cpp +++ b/examples/resource-simple.cpp @@ -1,84 +1,37 @@ -#include "camp/resource.hpp" +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "../src/util/forall.hpp" +#include "resource-example-util.hpp" + #include "chai/ManagedArray.hpp" +#include "camp/resource.hpp" #include #include -inline __host__ __device__ void -wait_for(float time, float clockrate) { - clock_t time_in_clocks = time*clockrate; - - unsigned int start_clock = (unsigned int) clock(); - clock_t clock_offset = 0; - while (clock_offset < time_in_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); - } -} - -int get_clockrate() -{ - int cuda_device = 0; - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - -#if defined(__arm__) || defined(__aarch64__) - return deviceProp.clockRate/1000; -#else - return deviceProp.clockRate; -#endif -} int main() { - auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) - { - const size_t bytes = record->m_size; - printf("%s cback: act=%s, space=%s, bytes=%ld\n", record->name.c_str(), chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); - if (act == chai::ACTION_MOVE) - { - if (s == chai::CPU) - { - printf("Moved to host\n"); - } - else if (s == chai::GPU) - { - printf("Moved to device\n"); - } - } - if (act == chai::ACTION_FOUND_ABANDONED) { - printf("in abandoned!\n"); - //ASSERT_EQ(false,true); - } - }; + constexpr int NUM_ARRAYS = 16; constexpr std::size_t ARRAY_SIZE{100}; + std::vector> arrays; camp::resources::Resource host{camp::resources::Host{}}; int clockrate{get_clockrate()}; - for (std::size_t i = 0; i < 10; ++i) { + for (std::size_t i = 0; i < NUM_ARRAYS; ++i) { arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); - arrays[i].m_pointer_record->name = "array "+ std::to_string(i); - arrays[i].setUserCallback(callBack); } - for (auto array : arrays) { - // set on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); } @@ -86,17 +39,16 @@ int main() for (auto array : arrays) { camp::resources::Resource resource{camp::resources::Cuda{}}; - forall(&resource, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&resource, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = array[i] * 2.0; - wait_for(20, clockrate); + gpu_time_wait_for(20, clockrate); }); - std::cout<< "Move to CPU called" << std::endl; array.move(chai::CPU, &resource); } for (auto array : arrays) { - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { if (i == 25) { printf("array[%d] = %f \n", i, array[i]); } diff --git a/examples/resource.cpp b/examples/resource.cpp deleted file mode 100644 index 91f30d48..00000000 --- a/examples/resource.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "camp/resource.hpp" -#include "../src/util/forall.hpp" -#include "chai/ManagedArray.hpp" -#include - -int main() -{ - float kernel_time = 20; - int cuda_device = 0; - - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - -#if defined(__arm__) || defined(__aarch64__) - clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 1000)); -#else - clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); -#endif - - const int NUM_ARRAYS = 16; - const int ARRAY_SIZE = 10; - std::vector< chai::ManagedArray > arrays; - - auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) - { - const size_t bytes = record->m_size; - printf("%s cback: act=%s, space=%s, bytes=%ld\n", record->name.c_str(), chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); - if (act == chai::ACTION_MOVE) - { - if (s == chai::CPU) - { - printf("Moved to host\n"); - } - else if (s == chai::GPU) - { - printf("Moved to device\n"); - } - } - if (act == chai::ACTION_FOUND_ABANDONED) { - printf("in abandoned!\n"); - //ASSERT_EQ(false,true); - } - }; - - for (int i = 0; i < NUM_ARRAYS; i++) { - arrays.push_back(chai::ManagedArray(10, chai::GPU)); - arrays[i].m_pointer_record->name = "array "+ std::to_string(i); - arrays[i].setUserCallback(callBack); - } - - std::cout << "calling forall with cuda context" << std::endl; - for (auto array : arrays) { - - camp::resources::Resource res{camp::resources::Cuda()}; - - auto clock_lambda_1 = [=] CHAI_HOST_DEVICE (int idx) { - array[idx] = idx * 2; - unsigned int start_clock = (unsigned int) clock(); - clock_t clock_offset = 0; - while (clock_offset < time_clocks) - { - unsigned int end_clock = (unsigned int) clock(); - clock_offset = (clock_t)(end_clock - start_clock); - } - }; - - - std::cout << "Calling forall" << std::endl; - auto e = forall(&res, 0, ARRAY_SIZE, clock_lambda_1); - std::cout << "Move to CPU called" << std::endl; - array.move(chai::CPU, &res); // asynchronous move - } - - std::cout << "calling forall with host context" << std::endl; - for (auto array : arrays) { - auto clock_lambda_2 = [=] CHAI_HOST_DEVICE (int idx) { - array[idx] *= array[idx]; - }; - camp::resources::Resource res{camp::resources::Host{}}; - auto e = forall(&res, 0, ARRAY_SIZE, clock_lambda_2); - } - - camp::resources::Resource host{camp::resources::Host{}}; - for (auto array : arrays) { - forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { - printf("%i ", int(array[i]) ); - }); - printf("\n"); - } - - for (auto a : arrays) a.free(); - return 0; -} From 467b43c3a8436e658cf6ab114599438e7fa744c3 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 29 Jun 2020 11:06:35 -0700 Subject: [PATCH 069/118] Removing print statements from ArrayManager.cpp --- src/chai/ArrayManager.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index dedaa427..47164fad 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -262,7 +262,6 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) void ArrayManager::move(PointerRecord* record, ExecutionSpace space) { - std::cout<< "Resource null"<m_last_space == PINNED) { -// if (space == CPU) { -// syncIfNeeded(); -// } -// return; -// } -//#endif +#if defined(CHAI_ENABLE_PINNED) + if (record->m_last_space == PINNED) { + if (space == CPU) { + syncIfNeeded(); + } + return; + } +#endif void* src_pointer = record->m_pointers[record->m_last_space]; void* dst_pointer = record->m_pointers[space]; if (!dst_pointer) { - std::cout<<"ALLLOCATING!!!!"<m_pointers[space]; } @@ -326,22 +324,17 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou if (res == nullptr){ m_resource_manager.copy(dst_pointer, src_pointer); - std::cout << " + res null "; callback(record, ACTION_MOVE, space); return; } auto e = m_resource_manager.copy(dst_pointer, src_pointer, *res); - //m_resource_manager.copy(dst_pointer, src_pointer); - //auto e = res->get_event(); - std::cout<< " - "<name<<" Resource copy start" << std::endl; callback(record, ACTION_MOVE, space); record->transfer_pending = true; record->m_event = e; // Default logical flow when not using non resource move. } else { - std::cout<< "Resource null 2"< Date: Mon, 29 Jun 2020 11:12:39 -0700 Subject: [PATCH 070/118] Moving resource example helper header into util. --- examples/resource-depends.cpp | 2 +- examples/resource-multi-array.cpp | 2 +- examples/resource-not-managed.cpp | 2 +- examples/resource-simple.cpp | 2 +- .../resource-example-util.hpp => src/util/gpu_clock.hpp | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) rename examples/resource-example-util.hpp => src/util/gpu_clock.hpp (90%) diff --git a/examples/resource-depends.cpp b/examples/resource-depends.cpp index 6b08e891..4817102e 100644 --- a/examples/resource-depends.cpp +++ b/examples/resource-depends.cpp @@ -8,7 +8,7 @@ #include "chai/ManagedArray.hpp" #include "../src/util/forall.hpp" -#include "resource-example-util.hpp" +#include "../src/util/gpu_clock.hpp" #include #include diff --git a/examples/resource-multi-array.cpp b/examples/resource-multi-array.cpp index 22665c9b..a5c5279b 100644 --- a/examples/resource-multi-array.cpp +++ b/examples/resource-multi-array.cpp @@ -2,7 +2,7 @@ #include "chai/ManagedArray.hpp" #include "../src/util/forall.hpp" -#include "resource-example-util.hpp" +#include "../src/util/gpu_clock.hpp" #include #include diff --git a/examples/resource-not-managed.cpp b/examples/resource-not-managed.cpp index 3053803e..7aece102 100644 --- a/examples/resource-not-managed.cpp +++ b/examples/resource-not-managed.cpp @@ -7,7 +7,7 @@ #include "camp/resource.hpp" #include "../src/util/forall.hpp" -#include "resource-example-util.hpp" +#include "../src/util/gpu_clock.hpp" #include #include diff --git a/examples/resource-simple.cpp b/examples/resource-simple.cpp index fdf2e599..c2254c63 100644 --- a/examples/resource-simple.cpp +++ b/examples/resource-simple.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: BSD-3-Clause ////////////////////////////////////////////////////////////////////////////// #include "../src/util/forall.hpp" -#include "resource-example-util.hpp" +#include "../src/util/gpu_clock.hpp" #include "chai/ManagedArray.hpp" #include "camp/resource.hpp" diff --git a/examples/resource-example-util.hpp b/src/util/gpu_clock.hpp similarity index 90% rename from examples/resource-example-util.hpp rename to src/util/gpu_clock.hpp index d7e0e8e9..81a7c97d 100644 --- a/examples/resource-example-util.hpp +++ b/src/util/gpu_clock.hpp @@ -1,5 +1,5 @@ -#ifndef RESOURCE_EXAMPLE_UTIL_HPP -#define RESOURCE_EXAMPLE_UTIL_HPP +#ifndef GPU_CLOCK_HPP +#define GPU_CLOCK_HPP inline __host__ __device__ void @@ -36,4 +36,4 @@ int get_clockrate() #endif } -#endif // RESOURCE_EXAMPLE_UTIL_HPP +#endif // GPU_CLOCK_HPP From 0ca9c6e0f23fba28dffec3961e3099fb63181055 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 29 Jun 2020 11:35:48 -0700 Subject: [PATCH 071/118] Adding Resource tests. --- examples/resource-multi-array.cpp | 6 + .../managed_array_resource_tests.cpp | 124 +++++++++++++----- 2 files changed, 97 insertions(+), 33 deletions(-) diff --git a/examples/resource-multi-array.cpp b/examples/resource-multi-array.cpp index a5c5279b..f9587fc8 100644 --- a/examples/resource-multi-array.cpp +++ b/examples/resource-multi-array.cpp @@ -1,3 +1,9 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "camp/resource.hpp" #include "chai/ManagedArray.hpp" diff --git a/tests/integration/managed_array_resource_tests.cpp b/tests/integration/managed_array_resource_tests.cpp index 8adbc699..c78f1cd5 100644 --- a/tests/integration/managed_array_resource_tests.cpp +++ b/tests/integration/managed_array_resource_tests.cpp @@ -1,5 +1,5 @@ ////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI // project contributors. See the COPYRIGHT file for details. // // SPDX-License-Identifier: BSD-3-Clause @@ -12,6 +12,7 @@ static void gpu_test_##X##Y() #include "../src/util/forall.hpp" +#include "../src/util/gpu_clock.hpp" #include "chai/ManagedArray.hpp" #include "chai/config.hpp" @@ -19,48 +20,27 @@ #ifdef CHAI_ENABLE_CUDA GPU_TEST(ManagedArray, Simple) { - auto callBack = [&](const chai::PointerRecord* record, chai::Action act, chai::ExecutionSpace s) - { - const size_t bytes = record->m_size; - printf("cback: act=%s, space=%s, bytes=%ld\n", chai::PrintAction[(int) act], chai::PrintExecSpace[(int) s], (long) bytes); - if (act == chai::ACTION_MOVE) - { - if (s == chai::CPU) - { - printf("Moved to host\n"); - } - else if (s == chai::GPU) - { - printf("Moved to device\n"); - } - } - if (act == chai::ACTION_FOUND_ABANDONED) { - printf("in abandoned!\n"); - ASSERT_EQ(false,true); - } - }; constexpr std::size_t ARRAY_SIZE{1024}; camp::resources::Resource host{camp::resources::Host{}}; camp::resources::Resource device{camp::resources::Cuda{}}; chai::ManagedArray array(ARRAY_SIZE); - array.setUserCallback(callBack); - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); - forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = array[i] * 2.0; }); - array.move(chai::CPU, &device); - // print on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { EXPECT_DOUBLE_EQ(array[i], i*2.0); }); + + array.free(); } GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) @@ -72,18 +52,18 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveFrom) chai::ManagedArray array(ARRAY_SIZE); - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); - forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = array[i] * 2.0; }); array.move(chai::CPU, &device); // print on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { EXPECT_DOUBLE_EQ(array[i], i*2.0); }); } @@ -97,19 +77,97 @@ GPU_TEST(ManagedArray, SimpleWithAsyncMoveTo) chai::ManagedArray array(ARRAY_SIZE); - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = i; }); array.move(chai::GPU, &device); - forall(&device, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&device, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { array[i] = array[i] * 2.0; }); // print on host - forall(&host, 0, ARRAY_SIZE, [=] __host__ __device__ (int i) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { EXPECT_DOUBLE_EQ(array[i], i*2.0); }); + + array.free(); +} + +GPU_TEST(ManagedArray, MultiStreamDepends) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + int clockrate{get_clockrate()}; + + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + camp::resources::Resource host{camp::resources::Host{}}; + + chai::ManagedArray array1(ARRAY_SIZE); + chai::ManagedArray array2(ARRAY_SIZE); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] = i; + gpu_time_wait_for(10, clockrate); + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array2[i] = -1; + gpu_time_wait_for(20, clockrate); + }); + + dev1.wait_for(&e2); + + forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array1[i] *= array2[i]; + gpu_time_wait_for(10, clockrate); + }); + + array1.move(chai::CPU, &dev1); + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array1[i], i*-1.0); + }); + + array1.free(); + array2.free(); +} + +GPU_TEST(ManagedArray, MultiStreamSingleArray) +{ + constexpr std::size_t ARRAY_SIZE{1024}; + int clockrate{get_clockrate()}; + + chai::ManagedArray array1(ARRAY_SIZE); + + camp::resources::Resource dev1{camp::resources::Cuda{}}; + camp::resources::Resource dev2{camp::resources::Cuda{}}; + + + auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 0) { + array1[i] = i; + gpu_time_wait_for(10, clockrate); + } + }); + + auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i % 2 == 1) { + gpu_time_wait_for(20, clockrate); + array1[i] = i; + } + }); + + e1.wait(); + e2.wait(); + + array1.move(chai::CPU, &dev1); + + camp::resources::Resource host{camp::resources::Host{}}; + + forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { + EXPECT_DOUBLE_EQ(array1[i], (double)i); + }); } #endif //#ifdef CHAI_ENABLE_CUDA From 1c9f6509198a9171ba83d64d3cdd2894918d1d82 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Mon, 13 Jul 2020 15:27:21 -0700 Subject: [PATCH 072/118] Re-run tests. --- src/chai/ArrayManager.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 416ec32a..f538abdd 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -446,12 +446,12 @@ class ArrayManager } /*! - * current execution space. + * Current execution space. */ ExecutionSpace m_current_execution_space; /*! - * current resource. + * Current resource. */ camp::resources::Resource* m_current_resource; From 1914a316852e494ce32c64eaa7a2883ff78b5eb6 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 6 Aug 2020 14:13:07 -0700 Subject: [PATCH 073/118] Moving m_pointer_record back. --- src/chai/ManagedArray.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index daa3b2b9..8dee0ab5 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -450,10 +450,9 @@ class ManagedArray : public CHAICopyable /*! * Pointer to PointerRecord data. */ + mutable PointerRecord* m_pointer_record = nullptr; mutable bool m_is_slice = false; -public: - mutable PointerRecord* m_pointer_record = nullptr; }; /*! From 1385bddd52b1f4ef6a2fb49c4860a99cda62cf90 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 6 Aug 2020 14:30:04 -0700 Subject: [PATCH 074/118] Pointing Umpire to v3.0.0. --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 28af4940..65767608 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 28af49401262b8637f8b8b6c5439474e99a70610 +Subproject commit 657676087574f61f9d90b996a3bdbf4e1cdfc92e From 5e11b4db9f78bc1caef41f51d7477d4b5f496afe Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 7 Aug 2020 10:47:51 -0700 Subject: [PATCH 075/118] Removing old debugging code. --- src/chai/ExecutionSpaces.hpp | 16 +--------------- src/chai/ManagedArray.inl | 1 - src/chai/PointerRecord.hpp | 1 - src/chai/Types.hpp | 8 -------- 4 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index 7fd024ad..c4a29404 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -44,20 +44,6 @@ enum ExecutionSpace { ,PINNED #endif }; -static std::vector PrintExecSpace = { - (char *)"NONE", - (char *)"CPU", -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - (char *)"GPU", -#endif -#if defined(CHAI_ENABLE_UM) - (char *)"UM", -#endif -#if defined(CHAI_ENABLE_PINNED) - (char *)"PINNED", -#endif - (char *)"NUM_EXECUTION_SPACES" -}; inline bool operator==(const ExecutionSpace& s, const camp::resources::Platform& p) { @@ -65,7 +51,7 @@ inline bool operator==(const ExecutionSpace& s, const camp::resources::Platform& #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) /*! Execution in GPU space */ if (s == chai::GPU && (p == camp::resources::Platform::cuda || - p == camp::resources::Platform::hip)) return true; + p == camp::resources::Platform::hip)) return true; #endif return false; } diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 8f8d3120..a211fb59 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -388,7 +388,6 @@ CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space) const { - std::cout<< "ManageArray Move Null res" << std::endl; if (m_pointer_record != &ArrayManager::s_null_record) { ExecutionSpace prev_space = m_pointer_record->m_last_space; if (prev_space == CPU || prev_space == NONE) { diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index cd118359..a1c6b42b 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -76,7 +76,6 @@ struct PointerRecord { } } - std::string name; }; } // end of namespace chai diff --git a/src/chai/Types.hpp b/src/chai/Types.hpp index 343263e5..d1c88fb6 100644 --- a/src/chai/Types.hpp +++ b/src/chai/Types.hpp @@ -31,14 +31,6 @@ namespace chai typedef unsigned int uint; enum Action { ACTION_ALLOC, ACTION_FREE, ACTION_MOVE, ACTION_CAPTURED, ACTION_FOUND_ABANDONED, ACTION_LEAKED }; - static std::vector PrintAction = { - (char *)"ACTION_ALLOC", - (char *)"ACTION_FREE", - (char *)"ACTION_MOVE", - (char *)"ACTION_CAPTURED", - (char *)"ACTION_FOUND_ABANDONED", - (char *)"ACTION_LEAKED" - }; using UserCallback = std::function; } // end of namespace chai From 99c74d0d7d2fb88047557389e0aba95c39e5c7f9 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Fri, 7 Aug 2020 10:51:21 -0700 Subject: [PATCH 076/118] Removing include vector. --- src/chai/Types.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/chai/Types.hpp b/src/chai/Types.hpp index d1c88fb6..0164a2e0 100644 --- a/src/chai/Types.hpp +++ b/src/chai/Types.hpp @@ -9,7 +9,6 @@ // Std library headers #include -#include // CHAI headers #include "chai/ExecutionSpaces.hpp" From 44a04a473d43f0e5015a3d7d271a9ca91c0cb5db Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Fri, 7 Aug 2020 10:53:30 -0700 Subject: [PATCH 077/118] Removing commented out test. --- tests/integration/managed_array_tests.cpp | 25 ++++------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 1ae588d0..7979a6b9 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -777,32 +777,15 @@ TEST(ManagedArray, ExternalConstructorUnowned) assert_empty_map(true); } -//TEST(ManagedArray, ExternalConstructorOwned) -//{ -//#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) -// float* data; -// cudaMallocHost(&data, 20*sizeof(float)); -//#else -// float* data = static_cast(std::malloc(20 * sizeof(float))); -//#endif -// -// for (int i = 0; i < 20; i++) { -// data[i] = 1.0f * i; -// } -// -// chai::ManagedArray array = -// chai::makeManagedArray(data, 20, chai::CPU, true); -// -// forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); -// -// array.free(); -// assert_empty_map(true); -//} #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) GPU_TEST(ManagedArray, ExternalUnownedMoveToGPU) { float* data; +#if defined(CHAI_ENABLE_CUDA) cudaMallocHost(&data, 20*sizeof(float)); +#elif defined(CHAI_ENABLE_HIP) + hipMallocHost(&data, 20*sizeof(float)); +#endif for (int i = 0; i < 20; i++) { data[i] = 0.; From 6f3cf8b9500db40935453ed7594e595b849fa0e9 Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Mon, 13 Dec 2021 16:38:02 -0800 Subject: [PATCH 078/118] change uberenv to also depend on camp --- scripts/spack_packages/chai/package.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/spack_packages/chai/package.py b/scripts/spack_packages/chai/package.py index d5b164dd..3d2012c3 100644 --- a/scripts/spack_packages/chai/package.py +++ b/scripts/spack_packages/chai/package.py @@ -74,9 +74,12 @@ class Chai(CMakePackage, CudaPackage, ROCmPackage): multi=False, description='Tests to run') depends_on('umpire') - depends_on('raja', when="+raja") - depends_on('umpire@main', when='@main') + + depends_on('camp') + depends_on('camp@0.3.0') + + depends_on('raja', when="+raja") depends_on('raja@main', when="@main+raja") depends_on('cmake@3.14:', type='build') From 82469986911c480cddcca8284c74bc1b5cd74d43 Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Tue, 14 Dec 2021 15:47:35 -0800 Subject: [PATCH 079/118] changing version of camp in CI --- scripts/spack_packages/chai/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/chai/package.py b/scripts/spack_packages/chai/package.py index f9ab1776..70580882 100644 --- a/scripts/spack_packages/chai/package.py +++ b/scripts/spack_packages/chai/package.py @@ -77,7 +77,7 @@ class Chai(CMakePackage, CudaPackage, ROCmPackage): depends_on('umpire@main', when='@main') depends_on('camp') - depends_on('camp@0.3.0') + depends_on('camp@0.2.2') depends_on('raja', when="+raja") depends_on('raja@main', when="@main+raja") From e0015202533cae035ee2fb2e1751b147313161e1 Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Tue, 14 Dec 2021 16:25:32 -0800 Subject: [PATCH 080/118] trying to fix camp in CI... --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 5f886b42..5f474c75 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 5f886b4299496b7cb6f9d62dc1372ce6d3832fbc +Subproject commit 5f474c7501daf365d6015a2141821d83bd799ffa From c3b06bba7d19a59def72cf209dc21004b7e6d31b Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Wed, 15 Dec 2021 11:45:06 -0800 Subject: [PATCH 081/118] updating from develop --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 5f474c75..5f886b42 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 5f474c7501daf365d6015a2141821d83bd799ffa +Subproject commit 5f886b4299496b7cb6f9d62dc1372ce6d3832fbc From 0f4cea7143ed8cc6ddf67931676d983a1097beb8 Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Wed, 15 Dec 2021 16:32:13 -0800 Subject: [PATCH 082/118] had to change the camp version in umpire package too --- scripts/spack_packages/umpire/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/umpire/package.py b/scripts/spack_packages/umpire/package.py index 7a4a6914..1ddf12aa 100644 --- a/scripts/spack_packages/umpire/package.py +++ b/scripts/spack_packages/umpire/package.py @@ -88,7 +88,7 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on('camp cuda_arch={0}'.format(sm_), when='cuda_arch={0}'.format(sm_)) - depends_on('camp@main') + depends_on('camp@0.2.2') conflicts('+numa', when='@:0.3.2') conflicts('~c', when='+fortran', msg='Fortran API requires C API') From 103d537fbfd564b1943d2b91dc73e3beb9aac732 Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Thu, 16 Dec 2021 10:57:29 -0800 Subject: [PATCH 083/118] changing the tpl job to also depend on correct camp version --- .gitlab/quartz-jobs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/quartz-jobs.yml b/.gitlab/quartz-jobs.yml index eae1a553..9952f4b2 100644 --- a/.gitlab/quartz-jobs.yml +++ b/.gitlab/quartz-jobs.yml @@ -38,5 +38,5 @@ gcc_4_9_3: clang_10_develop_tpls: variables: - SPEC: " %clang@10.0.1 ^umpire@develop ^raja@develop ^camp@main" - extends: .build_and_test_on_quartz \ No newline at end of file + SPEC: " %clang@10.0.1 ^umpire@develop ^raja@develop ^camp@0.2.2" + extends: .build_and_test_on_quartz From 2188b12788764d1697bdc22df3a570173c9c08ac Mon Sep 17 00:00:00 2001 From: Kristi Belcher Date: Thu, 16 Dec 2021 13:27:31 -0800 Subject: [PATCH 084/118] removing the quartz tpl job for now --- .gitlab/quartz-jobs.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab/quartz-jobs.yml b/.gitlab/quartz-jobs.yml index 9952f4b2..b7ec7f0b 100644 --- a/.gitlab/quartz-jobs.yml +++ b/.gitlab/quartz-jobs.yml @@ -36,7 +36,7 @@ gcc_4_9_3: SPEC: "%gcc@4.9.3" extends: .build_and_test_on_quartz -clang_10_develop_tpls: - variables: - SPEC: " %clang@10.0.1 ^umpire@develop ^raja@develop ^camp@0.2.2" - extends: .build_and_test_on_quartz +#clang_10_develop_tpls: +# variables: +# SPEC: " %clang@10.0.1 ^umpire@develop ^raja@develop ^camp@0.2.2" +# extends: .build_and_test_on_quartz From d53c05f2c83ac2ba118dccad27caf16095f1d25b Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 17 Nov 2022 09:09:24 -0800 Subject: [PATCH 085/118] Add missing files to CMakeLists.txt --- src/chai/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index e10763c4..2cf61ba0 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -9,6 +9,8 @@ configure_file( ${PROJECT_BINARY_DIR}/include/chai/config.hpp) set (chai_headers + ActiveResourceManager.hpp + ActiveResourceManager.inl ArrayManager.hpp ArrayManager.inl ChaiMacros.hpp From 54c1f1e2d5e3d701615fff6cffe3a7d6e7035f39 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 17 Nov 2022 09:13:01 -0800 Subject: [PATCH 086/118] Don't tie CHAI spack package to specific version of CAMP --- scripts/spack_packages/chai/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/chai/package.py b/scripts/spack_packages/chai/package.py index e4e8a332..bf596de4 100644 --- a/scripts/spack_packages/chai/package.py +++ b/scripts/spack_packages/chai/package.py @@ -77,7 +77,7 @@ class Chai(CMakePackage, CudaPackage, ROCmPackage): depends_on('umpire@main', when='@main') depends_on('camp') - depends_on('camp@0.2.2') + depends_on('camp@main', when='@main') depends_on('raja', when="+raja") depends_on('raja@main', when="@main+raja") From cb5d1e3989b109be7dddf6707f3b228664012230 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 17 Nov 2022 09:19:06 -0800 Subject: [PATCH 087/118] Stylistic fixes --- src/chai/ActiveResourceManager.inl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl index a80f933b..c4fd051a 100644 --- a/src/chai/ActiveResourceManager.inl +++ b/src/chai/ActiveResourceManager.inl @@ -27,10 +27,13 @@ int ActiveResourceManager::size() { CHAI_INLINE void ActiveResourceManager::push_back(camp::resources::Resource * res) { - if (m_size < BASE_SIZE) + if (m_size < BASE_SIZE) { m_res_base[m_size] = res; - else + } + else { m_res_overflow.push_back(res); + } + m_size++; } @@ -49,9 +52,13 @@ bool ActiveResourceManager::is_empty() const { CHAI_INLINE -camp::resources::Resource* ActiveResourceManager::operator [](int i) const { - if (i >= m_size) return nullptr; - return i < BASE_SIZE ? m_res_base[i] : m_res_overflow[i - BASE_SIZE]; +camp::resources::Resource* ActiveResourceManager::operator[](int i) const { + if (i >= m_size) { + return nullptr; + } + else { + return i < BASE_SIZE ? m_res_base[i] : m_res_overflow[i - BASE_SIZE]; + } } } //end of namespace chai From 74a4117d62b0321db297e2b578df27765e250ac7 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 17 Nov 2022 09:25:12 -0800 Subject: [PATCH 088/118] Clean up in unit tests --- tests/unit/CMakeLists.txt | 33 +---------------------- tests/unit/execution_space_unit_tests.cpp | 2 +- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 1146410b..1d540216 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -4,38 +4,7 @@ # # SPDX-License-Identifier: BSD-3-Clause ############################################################################## -set (managed_array_test_depends - chai umpire gtest) -set (execution_space_test_depends - chai umpire gtest) - - -set (array_manager_test_depends - chai umpire gtest) - -if (ENABLE_CUDA) - set (execution_space_test_depends - ${execution_space_test_depends} - cuda) - set (managed_array_test_depends - ${managed_array_test_depends} - cuda) - set (array_manager_test_depends - ${array_manager_test_depends} - cuda) -endif () -if (ENABLE_HIP) - set (execution_space_test_depends - ${execution_space_test_depends} - hip) - set (managed_array_test_depends - ${managed_array_test_depends} - hip) - set (array_manager_test_depends - ${array_manager_test_depends} - hip) -endif () set (chai_unit_test_depends chai umpire gtest) @@ -46,7 +15,7 @@ blt_list_append(TO chai_unit_test_depends ELEMENTS blt::hip IF ${CHAI_ENABLE_HIP blt_add_executable( NAME execution_space_unit_test SOURCES execution_space_unit_tests.cpp - DEPENDS_ON ${execution_space_test_depends}) + DEPENDS_ON ${chai_unit_test_depends}) target_include_directories( execution_space_unit_test diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp index adf23dd4..c6fc1a87 100644 --- a/tests/unit/execution_space_unit_tests.cpp +++ b/tests/unit/execution_space_unit_tests.cpp @@ -77,4 +77,4 @@ TEST(ExecutionSpace, Hip) camp::resources::Resource res{camp::resources::Hip()}; ASSERT_TRUE( chai::GPU == res.get().get_platform() ); } -#endif // #if defined(CHAI_ENABLE_CUDA) +#endif // #if defined(CHAI_ENABLE_HIP) From 60d254d2445deda52f7ba004ab2b45954309738e Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 17 Nov 2022 09:56:11 -0800 Subject: [PATCH 089/118] Document parameter --- src/chai/ArrayManager.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 89a3975e..2dab5708 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -161,10 +161,12 @@ class ArrayManager * \param space The space to set as current. */ CHAISHAREDDLL_API void setExecutionSpace(ExecutionSpace space); + /*! * \brief Set the current execution space. * * \param space The space to set as current. + * \param resource The resource to set as current. */ CHAISHAREDDLL_API void setExecutionSpace(ExecutionSpace space, camp::resources::Resource *resource); From 0a0c74564b273ace06a90c844a6dd1efaa952526 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 29 Nov 2022 13:00:02 -0800 Subject: [PATCH 090/118] Fix GPU simulation mode --- src/chai/ArrayManager.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index f5c92606..0d27af23 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -37,7 +37,7 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) +#if (defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP)) && !defined(CHAI_ENABLE_GPU_SIMULATION_MODE) new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); #else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); @@ -164,9 +164,9 @@ void * ArrayManager::frontOfAllocation(void * pointer) { void ArrayManager::setExecutionSpace(ExecutionSpace space) { #if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - if (isGPUSimMode()) { - space = chai::GPU; - } + if (isGPUSimMode()) { + space = chai::GPU; + } #endif CHAI_LOG(Debug, "Setting execution space to " << space); @@ -180,7 +180,14 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space) void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Resource* resource) { +#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) + if (isGPUSimMode()) { + space = chai::GPU; + } +#endif + CHAI_LOG(Debug, "Setting execution space to " << space); + std::lock_guard lock(m_mutex); m_current_execution_space = space; From 13c4e2f4210a01ca02037f22189e093a3dcd747a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 29 Mar 2023 11:14:18 -0700 Subject: [PATCH 091/118] Fix synchronization for unified/pinned memory --- src/chai/ArrayManager.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 0d27af23..da877429 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -188,8 +188,11 @@ void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Reso CHAI_LOG(Debug, "Setting execution space to " << space); - std::lock_guard lock(m_mutex); + if (chai::GPU == space) { + m_synced_since_last_kernel = false; + } + std::lock_guard lock(m_mutex); m_current_execution_space = space; m_current_resource = resource; } From cc1eee7e81c37316f6222765b56bd802d9a12979 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 08:05:59 -0700 Subject: [PATCH 092/118] Update to RAJA 2022.10.5 --- src/tpl/raja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/raja b/src/tpl/raja index c2a6b174..3774f513 160000 --- a/src/tpl/raja +++ b/src/tpl/raja @@ -1 +1 @@ -Subproject commit c2a6b1740759ae3ae7c85b35e20dbffbe235355d +Subproject commit 3774f51339459bbbdb77055aa23f82919b6335b6 From 2bebbf73cdb5bd2a72a3b15766292ea10ae816f2 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:03:37 -0700 Subject: [PATCH 093/118] Fix typo --- src/chai/ActiveResourceManager.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ActiveResourceManager.hpp b/src/chai/ActiveResourceManager.hpp index 696c0662..0ba7bf88 100644 --- a/src/chai/ActiveResourceManager.hpp +++ b/src/chai/ActiveResourceManager.hpp @@ -32,7 +32,7 @@ class ActiveResourceManager { std::array m_res_base; /*! - * Heap containter for extra resources if more than BASE_SIZE pushed. + * Heap container for extra resources if more than BASE_SIZE pushed. */ std::vector m_res_overflow; From 897f7633a781e6e06735e7bf8fd781f206d1e2e1 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:07:13 -0700 Subject: [PATCH 094/118] More strict checking --- src/chai/ActiveResourceManager.inl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl index c4fd051a..f156a475 100644 --- a/src/chai/ActiveResourceManager.inl +++ b/src/chai/ActiveResourceManager.inl @@ -47,13 +47,13 @@ void ActiveResourceManager::clear() { CHAI_INLINE bool ActiveResourceManager::is_empty() const { - return m_size < 1; + return m_size == 0; } CHAI_INLINE camp::resources::Resource* ActiveResourceManager::operator[](int i) const { - if (i >= m_size) { + if (i < 0 || i >= m_size) { return nullptr; } else { From 2a99083ac24fd142016ba7ec800f7ec4ef9f08d1 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:10:18 -0700 Subject: [PATCH 095/118] Use default constructor --- src/chai/ActiveResourceManager.hpp | 2 +- src/chai/ActiveResourceManager.inl | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/chai/ActiveResourceManager.hpp b/src/chai/ActiveResourceManager.hpp index 0ba7bf88..0bc6d6df 100644 --- a/src/chai/ActiveResourceManager.hpp +++ b/src/chai/ActiveResourceManager.hpp @@ -42,7 +42,7 @@ class ActiveResourceManager { int m_size = 0; public: - ActiveResourceManager(); + ActiveResourceManager() = default; /*! * Retrun current size of the resource list. diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl index f156a475..c01067e5 100644 --- a/src/chai/ActiveResourceManager.inl +++ b/src/chai/ActiveResourceManager.inl @@ -12,13 +12,6 @@ namespace chai { -CHAI_INLINE -ActiveResourceManager::ActiveResourceManager(): - m_size(0) -{ -} - - CHAI_INLINE int ActiveResourceManager::size() { return m_size; From cd5e8a52af27ab2c1ee81fd00f382e87f581614b Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:15:03 -0700 Subject: [PATCH 096/118] Reduce duplicate code --- src/chai/ArrayManager.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index da877429..9c889197 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -163,19 +163,7 @@ void * ArrayManager::frontOfAllocation(void * pointer) { void ArrayManager::setExecutionSpace(ExecutionSpace space) { -#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - if (isGPUSimMode()) { - space = chai::GPU; - } -#endif - - CHAI_LOG(Debug, "Setting execution space to " << space); - - if (chai::GPU == space) { - m_synced_since_last_kernel = false; - } - - m_current_execution_space = space; + setExecutionSpace(space, nullptr); } void ArrayManager::setExecutionSpace(ExecutionSpace space, camp::resources::Resource* resource) From 3aad2cd73c17ea153c8b4736f2031b19fdd14f4e Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:36:44 -0700 Subject: [PATCH 097/118] Documentation and clean up --- src/chai/ArrayManager.cpp | 1 - src/chai/ArrayManager.hpp | 10 +++++++--- src/chai/ManagedArray.hpp | 25 +++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 9c889197..78c8756a 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -232,7 +232,6 @@ camp::resources::Resource* ArrayManager::getResource() return m_current_resource; } - void ArrayManager::registerTouch(PointerRecord* pointer_record) { registerTouch(pointer_record, m_current_execution_space); diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 2dab5708..97c6e75a 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -173,12 +173,16 @@ class ArrayManager /*! * \brief Get the current execution space. * - * \return The current execution space.jo + * \return The current execution space. */ CHAISHAREDDLL_API ExecutionSpace getExecutionSpace(); - - camp::resources::Resource* getResource(); + /*! + * \brief Get the current resource. + * + * \return The current resource. + */ + CHAISHAREDDLL_API camp::resources::Resource* getResource(); /*! * \brief Move data in pointer to the current execution space. diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index af2df38a..47299be2 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -173,11 +173,32 @@ class ManagedArray : public CHAICopyable */ CHAI_HOST void registerTouch(ExecutionSpace space); - //CHAI_HOST void move(ExecutionSpace space=NONE) const; - CHAI_HOST void move(ExecutionSpace space, camp::resources::Resource* resource); + /*! + * \brief Move the underlying data to the given execution space using the given resource. + * + * \param space The space to which to move the underlying data. + * \param resource The resource to use to move the underlying data. + */ + CHAI_HOST void move(ExecutionSpace space, + camp::resources::Resource* resource); + + /*! + * \brief Move the underlying data to the given execution space. + * + * \param space The space to which to move the underlying data. + * \param registerTouch Whether to mark the data as touched in the given space. + */ CHAI_HOST void move(ExecutionSpace space=NONE, bool registerTouch=!std::is_const::value) const; + /*! + * \brief Get a slice of the ManagedArray. + * + * \param begin The start of the slice. + * \param elems The number of elements in the slice (-1 means use all remaining elements) + * + * \return A slice of the ManagedArray. + */ CHAI_HOST_DEVICE ManagedArray slice(size_t begin, size_t elems=(size_t)-1) const; /*! From 79174e228b281b0d089152424b280139811fc24a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:46:35 -0700 Subject: [PATCH 098/118] More cleanup and documentation --- src/chai/ActiveResourceManager.hpp | 17 +++++++++++++++-- src/chai/ActiveResourceManager.inl | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/chai/ActiveResourceManager.hpp b/src/chai/ActiveResourceManager.hpp index 0bc6d6df..9512aebd 100644 --- a/src/chai/ActiveResourceManager.hpp +++ b/src/chai/ActiveResourceManager.hpp @@ -1,5 +1,5 @@ ////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// Copyright (c) 2016-23, Lawrence Livermore National Security, LLC and CHAI // project contributors. See the COPYRIGHT file for details. // // SPDX-License-Identifier: BSD-3-Clause @@ -42,15 +42,22 @@ class ActiveResourceManager { int m_size = 0; public: + /*! + * Default constructor. + */ ActiveResourceManager() = default; /*! - * Retrun current size of the resource list. + * Return current size of the resource list. + * + * \return The current size of the resource list. */ int size(); /*! * Push a new resource onto the list. + * + * \param res The resource to add. */ void push_back(camp::resources::Resource* res); @@ -61,11 +68,17 @@ class ActiveResourceManager { /*! * Check if empty. + * + * \return Whether or not the resource manager is empty. */ bool is_empty() const; /*! * Get resource at given index. + * + * \param i The index at which to get a resource. + * + * \return The resource at the given index. */ camp::resources::Resource* operator [](int i) const; }; diff --git a/src/chai/ActiveResourceManager.inl b/src/chai/ActiveResourceManager.inl index c01067e5..22c9a522 100644 --- a/src/chai/ActiveResourceManager.inl +++ b/src/chai/ActiveResourceManager.inl @@ -1,5 +1,5 @@ ////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// Copyright (c) 2016-23, Lawrence Livermore National Security, LLC and CHAI // project contributors. See the COPYRIGHT file for details. // // SPDX-License-Identifier: BSD-3-Clause From 7b0f907ae91dbcd69f3c080d71ce75ef0719b360 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 09:59:10 -0700 Subject: [PATCH 099/118] Document more functions --- src/chai/ArrayManager.hpp | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 97c6e75a..7f5580ca 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -185,18 +185,32 @@ class ArrayManager CHAISHAREDDLL_API camp::resources::Resource* getResource(); /*! - * \brief Move data in pointer to the current execution space. + * \brief Move data in pointer to the given execution space. * * \param pointer Pointer to data in any execution space. + * \param pointer_record The pointer record. + * \param space The execution space to which to move the data. + * * \return Pointer to data in the current execution space. */ CHAISHAREDDLL_API void* move(void* pointer, PointerRecord* pointer_record, - ExecutionSpace = NONE); + ExecutionSpace space = NONE); + + /*! + * \brief Move data in pointer to the given execution space. + * + * \param pointer Pointer to data in any execution space. + * \param pointer_record The pointer record. + * \param resource The resource to use to move the data. + * \param space The execution space to which to move the data. + * + * \return Pointer to data in the current execution space. + */ CHAISHAREDDLL_API void* move(void* pointer, PointerRecord* pointer_record, camp::resources::Resource* resource, - ExecutionSpace = NONE); + ExecutionSpace space = NONE); /*! * \brief Register a touch of the pointer in the current execution space. @@ -476,10 +490,18 @@ class ArrayManager /*! * \brief Move data in PointerRecord to the corresponding ExecutionSpace. * - * \param record - * \param space + * \param record The pointer record. + * \param space The execution space to which to move the data. */ void move(PointerRecord* record, ExecutionSpace space); + + /*! + * \brief Move data in PointerRecord to the corresponding ExecutionSpace. + * + * \param record The pointer record. + * \param space The execution space to which to move the data. + * \param resource The resource to use to move the data. + */ void move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource); /*! From e606ad736c113a63ed8f89407536c4608b9ac84d Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:01:23 -0700 Subject: [PATCH 100/118] More clean up --- src/chai/ArrayManager.cpp | 1 - src/chai/ArrayManager.hpp | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 78c8756a..26d85b29 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -359,7 +359,6 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resou resetTouch(record); } - void ArrayManager::allocate( PointerRecord* pointer_record, ExecutionSpace space) diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 7f5580ca..cdcffccb 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -488,7 +488,7 @@ class ArrayManager /*! - * \brief Move data in PointerRecord to the corresponding ExecutionSpace. + * \brief Move data in the pointer record to the corresponding execution space. * * \param record The pointer record. * \param space The execution space to which to move the data. @@ -496,7 +496,7 @@ class ArrayManager void move(PointerRecord* record, ExecutionSpace space); /*! - * \brief Move data in PointerRecord to the corresponding ExecutionSpace. + * \brief Move data in the pointer record to the corresponding execution space. * * \param record The pointer record. * \param space The execution space to which to move the data. @@ -538,7 +538,6 @@ class ArrayManager */ camp::resources::Resource* m_current_resource; - /** * Default space for new allocations. */ From 7691033e0bbe99cf7bb72c56c07515919b867228 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:07:31 -0700 Subject: [PATCH 101/118] More strict checking --- src/chai/ExecutionSpaces.hpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index c4a29404..555b3ba7 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -46,14 +46,23 @@ enum ExecutionSpace { }; -inline bool operator==(const ExecutionSpace& s, const camp::resources::Platform& p) { - if(s == chai::CPU && p == camp::resources::Platform::host) return true; -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - /*! Execution in GPU space */ - if (s == chai::GPU && (p == camp::resources::Platform::cuda || - p == camp::resources::Platform::hip)) return true; +inline bool operator==(const ExecutionSpace& s, + const camp::resources::Platform& p) { + if (s == chai::CPU && p == camp::resources::Platform::host) { + return true; + } +#if defined(CHAI_ENABLE_CUDA) + else if (s == chai::GPU && p == camp::resources::Platform::cuda) { + return true; + } +#elif defined(CHAI_ENABLE_HIP) + else if (s == chai::GPU && p == camp::resources::Platform::hip) { + return true; + } #endif - return false; + else { + return false; + } } } // end of namespace chai From 69a589086b47eecc7f5026b07b05983ec5e24df5 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:12:12 -0700 Subject: [PATCH 102/118] Document variables added to PointerRecord --- src/chai/PointerRecord.hpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 8890c7d8..8dcaccb1 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -49,7 +49,6 @@ struct PointerRecord { */ bool m_owned[NUM_EXECUTION_SPACES]; - /*! * User defined callback triggered on memory operations. * @@ -58,16 +57,33 @@ struct PointerRecord { */ UserCallback m_user_callback; + /*! + * Array holding Umpire allocator IDs in each execution space. + */ int m_allocators[NUM_EXECUTION_SPACES]; + /*! + * Whether or not a transfer is pending. + */ bool transfer_pending{false}; + + /*! + * An event that can be used to control asynchronous flow. + */ camp::resources::Event m_event{}; + + /*! + * Last resource used by this array. + */ camp::resources::Resource* m_last_resource{nullptr}; + + /*! + * The resource manager. + */ ActiveResourceManager m_res_manager; /*! * \brief Default constructor - * */ PointerRecord() : m_size(0), m_last_space(NONE) { m_user_callback = [] (const PointerRecord*, Action, ExecutionSpace) {}; From 5b7e9df6b4dd8c108d2b0e5a71f19b971d81e1ee Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:12:46 -0700 Subject: [PATCH 103/118] Remove unnecessary include --- src/chai/PointerRecord.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 8dcaccb1..51c1bd45 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -15,7 +15,6 @@ #include #include -#include namespace chai { From 17478efd75543e72da6540ead3ca151e5c34bfa6 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:13:53 -0700 Subject: [PATCH 104/118] Clean up --- src/chai/PointerRecord.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index 51c1bd45..fce8ebc6 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -93,7 +93,6 @@ struct PointerRecord { m_allocators[space] = 0; } } - }; } // end of namespace chai From 131654fbb5a054fea93042ed2458e1d87f1c8505 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:20:25 -0700 Subject: [PATCH 105/118] Reuse move implementation --- src/chai/ArrayManager.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 26d85b29..04b51504 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -189,19 +189,9 @@ void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, ExecutionSpace space) { - // Check for default arg (NONE) - if (space == NONE) { - space = m_current_execution_space; - } - - if (space == NONE) { - return pointer; - } - - move(pointer_record, space); - - return pointer_record->m_pointers[space]; + return move(pointer, pointer_record, nullptr, space); } + void* ArrayManager::move(void* pointer, PointerRecord* pointer_record, camp::resources::Resource* resource, @@ -221,7 +211,6 @@ void* ArrayManager::move(void* pointer, return pointer_record->m_pointers[space]; } - ExecutionSpace ArrayManager::getExecutionSpace() { return m_current_execution_space; @@ -262,7 +251,7 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) void ArrayManager::move(PointerRecord* record, ExecutionSpace space) { - move(record,space,nullptr); + move(record, space, nullptr); } void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource) From 48442389558b8202a39d8dbbbf13106a49dba34a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:22:51 -0700 Subject: [PATCH 106/118] Clean up --- src/chai/ArrayManager.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 04b51504..e02d00bd 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -254,24 +254,25 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) move(record, space, nullptr); } -void ArrayManager::move(PointerRecord* record, ExecutionSpace space, camp::resources::Resource* resource) +void ArrayManager::move(PointerRecord* record, + ExecutionSpace space, + camp::resources::Resource* resource) { - if (space == NONE) { return; } callback(record, ACTION_CAPTURED, space); -#if defined(CHAI_ENABLE_UM) - if (record->m_last_space == UM) { + if (space == record->m_last_space && !record->transfer_pending) { return; } -#endif - if (space == record->m_last_space && !record->transfer_pending) { +#if defined(CHAI_ENABLE_UM) + if (record->m_last_space == UM) { return; } +#endif #if defined(CHAI_ENABLE_PINNED) if (record->m_last_space == PINNED) { From 8f4d31a728409cf3a0394f10a4238ce7628f55de Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:49:57 -0700 Subject: [PATCH 107/118] Clean up move function --- src/chai/ManagedArray.inl | 107 +++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 59 deletions(-) diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index d9d808ee..222bc7ca 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -392,78 +392,67 @@ CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space, bool registerTouch) const { - if (m_pointer_record != &ArrayManager::s_null_record) { - ExecutionSpace prev_space = m_pointer_record->m_last_space; - if (prev_space == CPU || prev_space == NONE) { - /// Move nested ManagedArrays first, so they are working with a valid m_active_pointer for the host, - // and so the meta data associated with them are updated before we move the other array down. - moveInnerImpl(); - } - CHAI_LOG(Debug, "Moving " << m_active_pointer); - m_active_base_pointer = static_cast(m_resource_manager->move((void *)m_active_base_pointer, m_pointer_record, space)); - m_active_pointer = m_active_base_pointer + m_offset; - - CHAI_LOG(Debug, "Moved to " << m_active_pointer); -#if defined(CHAI_ENABLE_UM) - if (m_pointer_record->m_last_space == UM) { - // just because we were allocated in UM doesn't mean our CHAICopyable array values were - moveInnerImpl(); - } else -#endif -#if defined(CHAI_ENABLE_PINNED) - if (m_pointer_record->m_last_space == PINNED) { - // just because we were allocated in PINNED doesn't mean our CHAICopyable array values were - moveInnerImpl(); - } else -#endif - if (registerTouch) { - CHAI_LOG(Debug, "T is non-const, registering touch of pointer" << m_active_pointer); - m_resource_manager->registerTouch(m_pointer_record, space); - } - if (space != GPU && prev_space == GPU) { - /// Move nested ManagedArrays after the move, so they are working with a valid m_active_pointer for the host, - // and so the meta data associated with them are updated with live GPU data - moveInnerImpl(); - } - } + move(space, nullptr, registerTouch); } + template CHAI_INLINE CHAI_HOST -void ManagedArray::move(ExecutionSpace space, camp::resources::Resource* resource) +void ManagedArray::move(ExecutionSpace space, + camp::resources::Resource* resource, + bool registerTouch) const { - ExecutionSpace prev_space = m_pointer_record->m_last_space; + if (m_pointer_record != &ArrayManager::s_null_record) { + ExecutionSpace prev_space = m_pointer_record->m_last_space; - /* When moving from CPU to GPU we need to move the inner arrays before the outer array. */ - if (prev_space == CPU) { - moveInnerImpl(); - //moveInnerImpl(space); - } + if (prev_space == CPU || prev_space == NONE) { + // Move nested ManagedArrays first, so they are working with a valid + // m_active_pointer for the host, and so the meta data associated with + // them are updated before we move the other array down. + moveInnerImpl(); + } #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - if (space == GPU && m_pointer_record->m_last_resource != resource ){ - m_pointer_record->m_res_manager.push_back(resource); - } + if (resource != nullptr && + space == GPU && + m_pointer_record->m_last_resource != resource) { + m_pointer_record->m_res_manager.push_back(resource); + } #endif - m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, resource, space)); - m_active_pointer = m_active_base_pointer + m_offset; - - if (!std::is_const::value) { - CHAI_LOG(Debug, "T is non-const, registering touch of pointer" << m_active_pointer); - m_resource_manager->registerTouch(m_pointer_record, space); - } + CHAI_LOG(Debug, "Moving " << m_active_pointer); + m_active_base_pointer = static_cast(m_resource_manager->move(const_cast(m_active_base_pointer), m_pointer_record, resource, space)); + m_active_pointer = m_active_base_pointer + m_offset; + CHAI_LOG(Debug, "Moved to " << m_active_pointer); - if (space != NONE) m_pointer_record->m_last_space = space; - if (space != NONE) m_pointer_record->m_last_resource = resource; +#if defined(CHAI_ENABLE_UM) + if (m_pointer_record->m_last_space == UM) { + // Just because we were allocated in UM doesn't mean our CHAICopyable + // array values were + moveInnerImpl(); + } else +#endif +#if defined(CHAI_ENABLE_PINNED) + if (m_pointer_record->m_last_space == PINNED) { + // Just because we were allocated in PINNED doesn't mean our CHAICopyable + // array values were + moveInnerImpl(); + } else +#endif + if (registerTouch) { + CHAI_LOG(Debug, "Registering touch of pointer " << m_active_pointer); + m_resource_manager->registerTouch(m_pointer_record, space); + } - /* When moving from GPU to CPU we need to move the inner arrays after the outer array. */ -#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) - if (prev_space == GPU) { - moveInnerImpl(); - //moveInnerImpl(space); - } +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) + if (space != GPU && prev_space == GPU) { + // Move nested ManagedArrays after the move, so they are working with a + // valid m_active_pointer for the host, and so the meta data associated + // with them are updated with live GPU data + moveInnerImpl(); + } #endif + } } template From 1b1c7359baa9867c0c19aef9a9f8b7c5a4772642 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:52:21 -0700 Subject: [PATCH 108/118] Clean up --- src/chai/ManagedArray.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 47299be2..0e100cf8 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -178,9 +178,11 @@ class ManagedArray : public CHAICopyable * * \param space The space to which to move the underlying data. * \param resource The resource to use to move the underlying data. + * \param registerTouch Whether to mark the data as touched in the given space. */ CHAI_HOST void move(ExecutionSpace space, - camp::resources::Resource* resource); + camp::resources::Resource* resource, + bool registerTouch=!std::is_const::value) const; /*! * \brief Move the underlying data to the given execution space. From e071ca47a0c121258d727523ad14a2bdebd32311 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 14 Apr 2023 10:56:40 -0700 Subject: [PATCH 109/118] Reorder arguments for better intuition --- src/chai/ManagedArray.hpp | 12 ++++++------ src/chai/ManagedArray.inl | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 0e100cf8..c14159d8 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -176,13 +176,13 @@ class ManagedArray : public CHAICopyable /*! * \brief Move the underlying data to the given execution space using the given resource. * - * \param space The space to which to move the underlying data. * \param resource The resource to use to move the underlying data. + * \param space The space to which to move the underlying data. * \param registerTouch Whether to mark the data as touched in the given space. */ - CHAI_HOST void move(ExecutionSpace space, - camp::resources::Resource* resource, - bool registerTouch=!std::is_const::value) const; + CHAI_HOST void move(camp::resources::Resource* resource, + ExecutionSpace space = NONE, + bool registerTouch = !std::is_const::value) const; /*! * \brief Move the underlying data to the given execution space. @@ -190,8 +190,8 @@ class ManagedArray : public CHAICopyable * \param space The space to which to move the underlying data. * \param registerTouch Whether to mark the data as touched in the given space. */ - CHAI_HOST void move(ExecutionSpace space=NONE, - bool registerTouch=!std::is_const::value) const; + CHAI_HOST void move(ExecutionSpace space = NONE, + bool registerTouch = !std::is_const::value) const; /*! * \brief Get a slice of the ManagedArray. diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 222bc7ca..bc12e43e 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -392,14 +392,14 @@ CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space, bool registerTouch) const { - move(space, nullptr, registerTouch); + move(nullptr, space, registerTouch); } template CHAI_INLINE CHAI_HOST -void ManagedArray::move(ExecutionSpace space, - camp::resources::Resource* resource, +void ManagedArray::move(camp::resources::Resource* resource, + ExecutionSpace space, bool registerTouch) const { if (m_pointer_record != &ArrayManager::s_null_record) { From 87957a59a8daf441befb811826b89661bcf08f0e Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 24 Apr 2023 10:14:36 -0700 Subject: [PATCH 110/118] Fix execution space unit tests --- tests/unit/execution_space_unit_tests.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/execution_space_unit_tests.cpp b/tests/unit/execution_space_unit_tests.cpp index c6fc1a87..384edd49 100644 --- a/tests/unit/execution_space_unit_tests.cpp +++ b/tests/unit/execution_space_unit_tests.cpp @@ -51,8 +51,11 @@ TEST(ExecutionSpace, Platforms) ASSERT_TRUE(chai::CPU == camp::resources::Platform::host); ASSERT_FALSE(chai::CPU == camp::resources::Platform::undefined); #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) +#if defined(CHAI_ENABLE_CUDA) ASSERT_TRUE(chai::GPU == camp::resources::Platform::cuda); +#else ASSERT_TRUE(chai::GPU == camp::resources::Platform::hip); +#endif ASSERT_FALSE(chai::GPU == camp::resources::Platform::undefined); #endif } From 13032551a80cc57476d7431a33e3105df073a0da Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 24 Apr 2023 14:34:34 -0700 Subject: [PATCH 111/118] Fix ManagedArray::move argument order --- src/chai/ManagedArray.hpp | 6 +++--- src/chai/ManagedArray.inl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index c14159d8..0f88ab5b 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -176,12 +176,12 @@ class ManagedArray : public CHAICopyable /*! * \brief Move the underlying data to the given execution space using the given resource. * - * \param resource The resource to use to move the underlying data. * \param space The space to which to move the underlying data. + * \param resource The resource to use to move the underlying data. * \param registerTouch Whether to mark the data as touched in the given space. */ - CHAI_HOST void move(camp::resources::Resource* resource, - ExecutionSpace space = NONE, + CHAI_HOST void move(ExecutionSpace space, + camp::resources::Resource* resource, bool registerTouch = !std::is_const::value) const; /*! diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index bc12e43e..222bc7ca 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -392,14 +392,14 @@ CHAI_INLINE CHAI_HOST void ManagedArray::move(ExecutionSpace space, bool registerTouch) const { - move(nullptr, space, registerTouch); + move(space, nullptr, registerTouch); } template CHAI_INLINE CHAI_HOST -void ManagedArray::move(camp::resources::Resource* resource, - ExecutionSpace space, +void ManagedArray::move(ExecutionSpace space, + camp::resources::Resource* resource, bool registerTouch) const { if (m_pointer_record != &ArrayManager::s_null_record) { From a24145a32409ce990863b7a0a2454e662e21717a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 24 Apr 2023 14:42:22 -0700 Subject: [PATCH 112/118] Add tests back in --- tests/integration/managed_array_tests.cpp | 60 ++++++++++++++++++++--- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 5021c5e0..32a03217 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -851,17 +851,63 @@ TEST(ManagedArray, ExternalConstructorUnowned) assert_empty_map(true); } +TEST(ManagedArray, ExternalConstructorOwned) +{ + float* data = static_cast(std::malloc(20 * sizeof(float))); + + for (int i = 0; i < 20; i++) { + data[i] = 1.0f * i; + } + + chai::ManagedArray array = + chai::makeManagedArray(data, 20, chai::CPU, true); + + forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); + + array.free(); + assert_empty_map(true); +} + +TEST(ManagedArray, ExternalOwnedFromManagedArray) +{ + chai::ManagedArray array(20); + + forall(sequential(), 0, 20, [=](int i) { array[i] = 1.0f * i; }); + + chai::ManagedArray arrayCopy = + chai::makeManagedArray(array.getPointer(chai::CPU), 20, chai::CPU, true); + +#if defined(CHAI_ENABLE_IMPLICIT_CONVERSIONS) + ASSERT_EQ(array, arrayCopy); +#else + ASSERT_EQ(array.data(), arrayCopy.data()); +#endif + // should be able to free through the new ManagedArray + arrayCopy.free(); + assert_empty_map(true); +} + +TEST(ManagedArray, ExternalUnownedFromManagedArray) +{ + chai::ManagedArray array(20); + + forall(sequential(), 0, 20, [=](int i) { array[i] = 1.0f * i; }); + + chai::ManagedArray arrayCopy = + chai::makeManagedArray(array.getPointer(chai::CPU), 20, chai::CPU, false); + + forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(arrayCopy[i], 1.0f * i); }); + // freeing from an unowned pointer should leave the original ManagedArray intact + arrayCopy.free(); + array.free(); + assert_empty_map(true); +} + #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) #ifndef CHAI_DISABLE_RM GPU_TEST(ManagedArray, ExternalUnownedMoveToGPU) { - float* data; -#if defined(CHAI_ENABLE_CUDA) - cudaMallocHost(&data, 20*sizeof(float)); -#elif defined(CHAI_ENABLE_HIP) - hipMallocHost(&data, 20*sizeof(float)); -#endif - + float data[20]; for (int i = 0; i < 20; i++) { data[i] = 0.; } From 67b279dfe0dd41fb7f32e646fba030177a9bcf2a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 24 Apr 2023 15:36:26 -0700 Subject: [PATCH 113/118] Attempt to fix ExternalConstructorUnowned test --- tests/integration/managed_array_tests.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 32a03217..b07c7c4c 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -830,7 +830,9 @@ GPU_TEST(ManagedArray, PodTestGPU) TEST(ManagedArray, ExternalConstructorUnowned) { - float* data = static_cast(std::malloc(100 * sizeof(float))); + // The CPU space could be host memory or host pinned memory + auto allocator = chai::ArrayManager::getInstance()->getAllocator(chai::CPU); + float* data = static_cast(allocator.allocate(100 * sizeof(float))); for (int i = 0; i < 100; i++) { data[i] = 1.0f * i; @@ -839,7 +841,9 @@ TEST(ManagedArray, ExternalConstructorUnowned) chai::ManagedArray array = chai::makeManagedArray(data, 100, chai::CPU, false); - forall(sequential(), 0, 20, [=](int i) { ASSERT_EQ(data[i], array[i]); }); + forall(sequential(), 0, 100, [=] (int i) { + ASSERT_EQ(data[i], array[i]); + }); array.free(); @@ -847,7 +851,7 @@ TEST(ManagedArray, ExternalConstructorUnowned) ASSERT_EQ(data[i], 1.0f * i); } - std::free(data); + allocator.deallocate(data); assert_empty_map(true); } From 7197a45cf2ad2efab606e558a9fa06263377a131 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 5 Feb 2025 10:10:07 -0800 Subject: [PATCH 114/118] Fixing compilation errors from develop merge. --- src/chai/ArrayManager.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 1dda4277..f3fc61a9 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -298,7 +298,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) void ArrayManager::move(PointerRecord* record, ExecutionSpace space, - camp::resources::Resource* resource) + camp::resources::Resource* ) { if (space == NONE) { return; @@ -347,9 +347,7 @@ void ArrayManager::move(PointerRecord* record, chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space); } - callback(record, ACTION_MOVE, space); - } - } + callback(record, ACTION_MOVE, space); } resetTouch(record); From 38c4489d2612e762611dba38f1bb2447654818f9 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 5 Feb 2025 10:34:46 -0800 Subject: [PATCH 115/118] Remove old logic for PINNED memory. --- src/chai/ArrayManager.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index f3fc61a9..bec5f6a2 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -41,11 +41,7 @@ ArrayManager::ArrayManager() : m_default_allocation_space = CPU; m_allocators[CPU] = -#if (defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP)) && !defined(CHAI_ENABLE_GPU_SIMULATION_MODE) - new umpire::Allocator(m_resource_manager.getAllocator("PINNED")); -#else new umpire::Allocator(m_resource_manager.getAllocator("HOST")); -#endif #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE) #if defined(CHAI_ENABLE_GPU_SIMULATION_MODE) From 390d11f3d517af2f937ec433a9e01b7e815904d3 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 11 Feb 2025 13:07:07 -0800 Subject: [PATCH 116/118] resource-simple example executing async across 16 calls w/ HIP. --- examples/resource-simple.cpp | 18 +++++++++++++++--- src/chai/ArrayManager.cpp | 15 +++++++++------ src/chai/ChaiMacros.hpp | 9 +++++++++ src/util/forall.hpp | 9 ++++----- src/util/gpu_clock.hpp | 13 +++++++------ 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/examples/resource-simple.cpp b/examples/resource-simple.cpp index c2254c63..b450f8c9 100644 --- a/examples/resource-simple.cpp +++ b/examples/resource-simple.cpp @@ -21,7 +21,7 @@ int main() constexpr std::size_t ARRAY_SIZE{100}; std::vector> arrays; - camp::resources::Resource host{camp::resources::Host{}}; + camp::resources::Resource host{camp::resources::Host{}}; int clockrate{get_clockrate()}; @@ -37,12 +37,24 @@ int main() } for (auto array : arrays) { - camp::resources::Resource resource{camp::resources::Cuda{}}; +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif forall(&resource, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - array[i] = array[i] * 2.0; + array[i] = array[i] * 2.0 + i; gpu_time_wait_for(20, clockrate); }); + } + + for (auto array : arrays) { +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif array.move(chai::CPU, &resource); } diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index bec5f6a2..2a732ccb 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -263,14 +263,17 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record) /* Not all GPU platform runtimes (notably HIP), will give you asynchronous copies to the device by default, so we leverage * umpire's API for asynchronous copies using camp resources in this method, based off of the CHAI destination space * */ -static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager & manager, ExecutionSpace dst_space, ExecutionSpace src_space) { +static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager & manager, ExecutionSpace dst_space, ExecutionSpace src_space, camp::resources::Resource* res = nullptr) { #ifdef CHAI_ENABLE_CUDA - camp::resources::Resource device_resource(camp::resources::Cuda::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Cuda::get_default(); #elif defined(CHAI_ENABLE_HIP) - camp::resources::Resource device_resource(camp::resources::Hip::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Hip::get_default(); #else - camp::resources::Resource device_resource(camp::resources::Host::get_default()); + camp::resources::Resource device_resource = + (res) ? res->get() : camp::resources::Host::get_default(); #endif camp::resources::Resource host_resource(camp::resources::Host::get_default()); @@ -294,7 +297,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) void ArrayManager::move(PointerRecord* record, ExecutionSpace space, - camp::resources::Resource* ) + camp::resources::Resource* resource) { if (space == NONE) { return; @@ -340,7 +343,7 @@ void ArrayManager::move(PointerRecord* record, } else if (dst_pointer != src_pointer) { // Exclude the copy if src and dst are the same (can happen for PINNED memory) { - chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space); + chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space, resource); } callback(record, ACTION_MOVE, space); diff --git a/src/chai/ChaiMacros.hpp b/src/chai/ChaiMacros.hpp index 1d62ba84..b79b37db 100644 --- a/src/chai/ChaiMacros.hpp +++ b/src/chai/ChaiMacros.hpp @@ -31,6 +31,10 @@ #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice #define gpuMemcpyDefault cudaMemcpyDefault +#define gpuDeviceProp_t hipDeviceProp +#define gpuGetDevice cudaGetDevice +#define gpuGetDeviceProperties cudaGetDeviceProperties + // NOTE: Cannot have if defined(__HIPCC__) in the condition below, since __HIPCC__ comes from the included header hip_runtime below. #elif defined(CHAI_ENABLE_HIP) @@ -48,6 +52,11 @@ #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice #define gpuMemcpyDefault hipMemcpyDefault +#define gpuDeviceProp_t hipDeviceProp_t +#define gpuGetDevice hipGetDevice +#define gpuGetDeviceProperties hipGetDeviceProperties + + #else #define CHAI_HOST diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 46bf745c..4f66786a 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -66,7 +66,6 @@ camp::resources::Event forall_host(camp::resources::Resource* dev, int begin, in rm->setExecutionSpace(chai::CPU, dev); - auto host = dev->get(); forall_kernel_cpu(begin, end, body); rm->setExecutionSpace(chai::NONE); @@ -154,12 +153,12 @@ camp::resources::Event forall_gpu(camp::resources::Resource* dev, int begin, int size_t gridSize = (end - begin + blockSize - 1) / blockSize; #if defined(CHAI_ENABLE_CUDA) -auto cuda = dev->get(); -forall_kernel_gpu<<>>(begin, end - begin, body); + auto cuda = dev->get(); + forall_kernel_gpu<<>>(begin, end - begin, body); #elif defined(CHAI_ENABLE_HIP) - hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,0, + auto hip = dev->get(); + hipLaunchKernelGGL(forall_kernel_gpu, dim3(gridSize), dim3(blockSize), 0,hip.get_stream(), begin, end - begin, body); - hipDeviceSynchronize(); #endif rm->setExecutionSpace(chai::NONE); diff --git a/src/util/gpu_clock.hpp b/src/util/gpu_clock.hpp index 81a7c97d..bffa8f86 100644 --- a/src/util/gpu_clock.hpp +++ b/src/util/gpu_clock.hpp @@ -17,14 +17,15 @@ gpu_time_wait_for(float time, float clockrate) { int get_clockrate() { - int cuda_device = 0; - cudaDeviceProp deviceProp; - cudaGetDevice(&cuda_device); - cudaGetDeviceProperties(&deviceProp, cuda_device); - if ((deviceProp.concurrentKernels == 0)) + //TODO: Generalize this... + int gpu_device = 0; + gpuDeviceProp_t deviceProp; + gpuGetDevice(&gpu_device); + gpuGetDeviceProperties(&deviceProp, gpu_device); + if (deviceProp.concurrentKernels == 0) { printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); + printf(" gpu kernel runs will be serialized\n"); } printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); From d9523c0b3c04e1c6944c0b2be1570c6668176025 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 11 Feb 2025 13:21:46 -0800 Subject: [PATCH 117/118] Cleanup resource exmples; Executing w/ HIP. --- examples/CMakeLists.txt | 4 -- examples/resource-depends.cpp | 5 +++ examples/resource-multi-array.cpp | 69 ++++++++++++++++++++----------- examples/resource-simple.cpp | 69 ------------------------------- 4 files changed, 49 insertions(+), 98 deletions(-) delete mode 100644 examples/resource-simple.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 24a6052e..5bedea38 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -32,10 +32,6 @@ if (CHAI_ENABLE_CUDA OR CHAI_ENABLE_HIP) NAME resource-depends.exe SOURCES resource-depends.cpp DEPENDS_ON ${chai_umpire_example_depends}) - blt_add_executable( - NAME resource-simple.exe - SOURCES resource-simple.cpp - DEPENDS_ON ${chai_umpire_example_depends}) blt_add_executable( NAME resource-not-managed.exe SOURCES resource-not-managed.cpp diff --git a/examples/resource-depends.cpp b/examples/resource-depends.cpp index 4817102e..81cd9d60 100644 --- a/examples/resource-depends.cpp +++ b/examples/resource-depends.cpp @@ -19,8 +19,13 @@ int main() constexpr std::size_t ARRAY_SIZE{1000}; int clockrate{get_clockrate()}; +#ifdef CHAI_ENABLE_CUDA camp::resources::Resource dev1{camp::resources::Cuda{}}; camp::resources::Resource dev2{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource dev1{camp::resources::Hip{}}; + camp::resources::Resource dev2{camp::resources::Hip{}}; +#endif camp::resources::Resource host{camp::resources::Host{}}; chai::ManagedArray array1(ARRAY_SIZE); diff --git a/examples/resource-multi-array.cpp b/examples/resource-multi-array.cpp index 926dbfe1..b450f8c9 100644 --- a/examples/resource-multi-array.cpp +++ b/examples/resource-multi-array.cpp @@ -4,47 +4,66 @@ // // SPDX-License-Identifier: BSD-3-Clause ////////////////////////////////////////////////////////////////////////////// -#include "camp/resource.hpp" -#include "chai/ManagedArray.hpp" - #include "../src/util/forall.hpp" #include "../src/util/gpu_clock.hpp" +#include "chai/ManagedArray.hpp" +#include "camp/resource.hpp" + #include #include int main() { - constexpr std::size_t ARRAY_SIZE{1000}; - int clockrate{get_clockrate()}; - chai::ManagedArray array1(ARRAY_SIZE); + constexpr int NUM_ARRAYS = 16; + constexpr std::size_t ARRAY_SIZE{100}; - camp::resources::Resource dev1{camp::resources::Cuda{}}; - camp::resources::Resource dev2{camp::resources::Cuda{}}; + std::vector> arrays; + camp::resources::Resource host{camp::resources::Host{}}; + + + int clockrate{get_clockrate()}; + for (std::size_t i = 0; i < NUM_ARRAYS; ++i) { + arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); + } - auto e1 = forall(&dev1, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - if (i % 2 == 0) { - array1[i] = i; - gpu_time_wait_for(10, clockrate); - } - }); + for (auto array : arrays) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = i; + }); + } - auto e2 = forall(&dev2, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - if (i % 2 == 1) { + for (auto array : arrays) { +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif + + forall(&resource, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + array[i] = array[i] * 2.0 + i; gpu_time_wait_for(20, clockrate); - array1[i] = i; - } - }); + }); + } - array1.move(chai::CPU, &dev1); + for (auto array : arrays) { +#ifdef CHAI_ENABLE_CUDA + camp::resources::Resource resource{camp::resources::Cuda{}}; +#elif defined(CHAI_ENABLE_HIP) + camp::resources::Resource resource{camp::resources::Hip{}}; +#endif - camp::resources::Resource host{camp::resources::Host{}}; + array.move(chai::CPU, &resource); + } - forall(&host, 0, 10, [=] CHAI_HOST_DEVICE (int i) { - printf("%f ", array1[i]); - }); - printf("\n"); + for (auto array : arrays) { + forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { + if (i == 25) { + printf("array[%d] = %f \n", i, array[i]); + } + }); + } } diff --git a/examples/resource-simple.cpp b/examples/resource-simple.cpp deleted file mode 100644 index b450f8c9..00000000 --- a/examples/resource-simple.cpp +++ /dev/null @@ -1,69 +0,0 @@ -////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC and CHAI -// project contributors. See the COPYRIGHT file for details. -// -// SPDX-License-Identifier: BSD-3-Clause -////////////////////////////////////////////////////////////////////////////// -#include "../src/util/forall.hpp" -#include "../src/util/gpu_clock.hpp" - -#include "chai/ManagedArray.hpp" -#include "camp/resource.hpp" - -#include -#include - - -int main() -{ - - constexpr int NUM_ARRAYS = 16; - constexpr std::size_t ARRAY_SIZE{100}; - - std::vector> arrays; - camp::resources::Resource host{camp::resources::Host{}}; - - - int clockrate{get_clockrate()}; - - for (std::size_t i = 0; i < NUM_ARRAYS; ++i) { - arrays.push_back(chai::ManagedArray(ARRAY_SIZE)); - } - - for (auto array : arrays) { - forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - array[i] = i; - }); - } - - for (auto array : arrays) { -#ifdef CHAI_ENABLE_CUDA - camp::resources::Resource resource{camp::resources::Cuda{}}; -#elif defined(CHAI_ENABLE_HIP) - camp::resources::Resource resource{camp::resources::Hip{}}; -#endif - - forall(&resource, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - array[i] = array[i] * 2.0 + i; - gpu_time_wait_for(20, clockrate); - }); - } - - for (auto array : arrays) { -#ifdef CHAI_ENABLE_CUDA - camp::resources::Resource resource{camp::resources::Cuda{}}; -#elif defined(CHAI_ENABLE_HIP) - camp::resources::Resource resource{camp::resources::Hip{}}; -#endif - - array.move(chai::CPU, &resource); - } - - for (auto array : arrays) { - forall(&host, 0, ARRAY_SIZE, [=] CHAI_HOST_DEVICE (int i) { - if (i == 25) { - printf("array[%d] = %f \n", i, array[i]); - } - }); - } -} From 5db50da39ebad04dfb05cbc7f05f7182d7ec313e Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 11 Feb 2025 15:20:29 -0800 Subject: [PATCH 118/118] hip->cuda --- src/chai/ChaiMacros.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chai/ChaiMacros.hpp b/src/chai/ChaiMacros.hpp index b79b37db..e25ccec7 100644 --- a/src/chai/ChaiMacros.hpp +++ b/src/chai/ChaiMacros.hpp @@ -31,7 +31,7 @@ #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice #define gpuMemcpyDefault cudaMemcpyDefault -#define gpuDeviceProp_t hipDeviceProp +#define gpuDeviceProp_t cudaDeviceProp #define gpuGetDevice cudaGetDevice #define gpuGetDeviceProperties cudaGetDeviceProperties