forked from cms-sw/cmssw
-
Notifications
You must be signed in to change notification settings - Fork 5
Add infrastructure around cub CachingDeviceAllocator, and use it in SiPixelRawToCluster #172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
fwyzard
merged 19 commits into
cms-patatrack:CMSSW_10_4_X_Patatrack
from
makortel:cubAllocator
Nov 27, 2018
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
2c35bb2
Add infrastructure around cub CachingDeviceAllocator for device memor…
makortel 2c515f9
Migrate raw2cluster data products to use the cub allocator
makortel e30b0f2
Migrate raw2cluster temporary buffers to use the cub allocator
makortel 1bb5d59
Really release the cached memory
makortel 8d0d330
Add CachingHostAllocator
makortel 20f125d
Rename device unique_ptrs to prepare for host unique_ptrs
makortel 21f5bcd
Use CachingHostAllocator in CUDAService
makortel e0087ce
Use unique_host_ptr for GPU->CPU buffers
makortel e7eac3f
Use unique_host_ptr for CPU->GPU transfers
makortel a2c63af
Cleanup
makortel a6b549a
Decrease allocator minBin to 1
makortel 6fd9438
Going "back" to "GPU struct of pointers to GPU"
makortel 8c371ba
Using __ldg
makortel fc83c38
Add a configuration option to CUDAService to enable debug prints in t…
makortel f9b53aa
Fix printout in CachingHostAllocator
makortel 031f07f
Add possibility to preallocate device and host buffers
makortel 15c15ab
Fix memory problem with SiPixelFedCablingMapGPUWrapper::ModulesToUnpack
makortel 91cb38a
Wrap the exception in a try-catch block to let GDB break on it
fwyzard ae51c9a
Improve allocator log information
fwyzard File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| #ifndef CUDADataFormats_Common_interface_device_unique_ptr_h | ||
| #define CUDADataFormats_Common_interface_device_unique_ptr_h | ||
|
|
||
| #include <memory> | ||
| #include <functional> | ||
|
|
||
| namespace edm { | ||
| namespace cuda { | ||
| namespace device { | ||
| template <typename T> | ||
| using unique_ptr = std::unique_ptr<T, std::function<void(void *)>>; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| #ifndef CUDADataFormats_Common_interface_host_unique_ptr_h | ||
| #define CUDADataFormats_Common_interface_host_unique_ptr_h | ||
|
|
||
| #include <memory> | ||
| #include <functional> | ||
|
|
||
| namespace edm { | ||
| namespace cuda { | ||
| namespace host { | ||
| template <typename T> | ||
| using unique_ptr = std::unique_ptr<T, std::function<void(void *)>>; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| <use name="FWCore/ServiceRegistry"/> | ||
| <use name="HeterogeneousCore/CUDAServices"/> | ||
| <use name="cuda-api-wrappers"/> | ||
|
|
||
| <export> | ||
| <lib name="1"/> | ||
| </export> | ||
|
|
73 changes: 73 additions & 0 deletions
73
CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| #ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h | ||
| #define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h | ||
|
|
||
| #include "CUDADataFormats/Common/interface/device_unique_ptr.h" | ||
|
|
||
| #include <cuda/api_wrappers.h> | ||
|
|
||
| class SiPixelClustersCUDA { | ||
| public: | ||
| SiPixelClustersCUDA() = default; | ||
| explicit SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream); | ||
| ~SiPixelClustersCUDA() = default; | ||
|
|
||
| SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete; | ||
| SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete; | ||
| SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default; | ||
| SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default; | ||
|
|
||
| uint32_t *moduleStart() { return moduleStart_d.get(); } | ||
| int32_t *clus() { return clus_d.get(); } | ||
| uint32_t *clusInModule() { return clusInModule_d.get(); } | ||
| uint32_t *moduleId() { return moduleId_d.get(); } | ||
| uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } | ||
|
|
||
| uint32_t const *moduleStart() const { return moduleStart_d.get(); } | ||
| int32_t const *clus() const { return clus_d.get(); } | ||
| uint32_t const *clusInModule() const { return clusInModule_d.get(); } | ||
| uint32_t const *moduleId() const { return moduleId_d.get(); } | ||
| uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } | ||
|
|
||
| uint32_t const *c_moduleStart() const { return moduleStart_d.get(); } | ||
| int32_t const *c_clus() const { return clus_d.get(); } | ||
| uint32_t const *c_clusInModule() const { return clusInModule_d.get(); } | ||
| uint32_t const *c_moduleId() const { return moduleId_d.get(); } | ||
| uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); } | ||
|
|
||
| class DeviceConstView { | ||
| public: | ||
| DeviceConstView() = default; | ||
|
|
||
| #ifdef __CUDACC__ | ||
| __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } | ||
| __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } | ||
| __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } | ||
| __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } | ||
| __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } | ||
| #endif | ||
|
|
||
| friend SiPixelClustersCUDA; | ||
|
|
||
| private: | ||
| uint32_t const *moduleStart_ = nullptr; | ||
| int32_t const *clus_ = nullptr; | ||
| uint32_t const *clusInModule_ = nullptr; | ||
| uint32_t const *moduleId_ = nullptr; | ||
| uint32_t const *clusModuleStart_ = nullptr; | ||
| }; | ||
|
|
||
| DeviceConstView *view() const { return view_d.get(); } | ||
|
|
||
| private: | ||
| edm::cuda::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module | ||
| edm::cuda::device::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel | ||
| edm::cuda::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module | ||
| edm::cuda::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module | ||
|
|
||
| // originally from rechits | ||
| edm::cuda::device::unique_ptr<uint32_t[]> clusModuleStart_d; | ||
|
|
||
| edm::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer | ||
| }; | ||
|
|
||
| #endif | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" | ||
|
|
||
| #include "FWCore/ServiceRegistry/interface/Service.h" | ||
| #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
|
|
||
| SiPixelClustersCUDA::SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream) { | ||
| edm::Service<CUDAService> cs; | ||
|
|
||
| moduleStart_d = cs->make_device_unique<uint32_t[]>(nelements+1, stream); | ||
| clus_d = cs->make_device_unique< int32_t[]>(feds, stream); | ||
| clusInModule_d = cs->make_device_unique<uint32_t[]>(nelements, stream); | ||
| moduleId_d = cs->make_device_unique<uint32_t[]>(nelements, stream); | ||
| clusModuleStart_d = cs->make_device_unique<uint32_t[]>(nelements+1, stream); | ||
|
|
||
| auto view = cs->make_host_unique<DeviceConstView>(stream); | ||
| view->moduleStart_ = moduleStart_d.get(); | ||
| view->clus_ = clus_d.get(); | ||
| view->clusInModule_ = clusInModule_d.get(); | ||
| view->moduleId_ = moduleId_d.get(); | ||
| view->clusModuleStart_ = clusModuleStart_d.get(); | ||
|
|
||
| view_d = cs->make_device_unique<DeviceConstView>(stream); | ||
| cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| <use name="FWCore/ServiceRegistry"/> | ||
| <use name="HeterogeneousCore/CUDAServices"/> | ||
| <use name="cuda-api-wrappers"/> | ||
|
|
||
| <export> | ||
| <lib name="1"/> | ||
| </export> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| #ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h | ||
| #define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h | ||
|
|
||
| #include "CUDADataFormats/Common/interface/device_unique_ptr.h" | ||
| #include "FWCore/Utilities/interface/propagate_const.h" | ||
|
|
||
| #include <cuda/api_wrappers.h> | ||
|
|
||
| class SiPixelDigisCUDA { | ||
| public: | ||
| SiPixelDigisCUDA() = default; | ||
| explicit SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream); | ||
| ~SiPixelDigisCUDA() = default; | ||
|
|
||
| SiPixelDigisCUDA(const SiPixelDigisCUDA&) = delete; | ||
| SiPixelDigisCUDA& operator=(const SiPixelDigisCUDA&) = delete; | ||
| SiPixelDigisCUDA(SiPixelDigisCUDA&&) = default; | ||
| SiPixelDigisCUDA& operator=(SiPixelDigisCUDA&&) = default; | ||
|
|
||
| uint16_t * xx() { return xx_d.get(); } | ||
| uint16_t * yy() { return yy_d.get(); } | ||
| uint16_t * adc() { return adc_d.get(); } | ||
| uint16_t * moduleInd() { return moduleInd_d.get(); } | ||
|
|
||
| uint16_t const *xx() const { return xx_d.get(); } | ||
| uint16_t const *yy() const { return yy_d.get(); } | ||
| uint16_t const *adc() const { return adc_d.get(); } | ||
| uint16_t const *moduleInd() const { return moduleInd_d.get(); } | ||
|
|
||
| uint16_t const *c_xx() const { return xx_d.get(); } | ||
| uint16_t const *c_yy() const { return yy_d.get(); } | ||
| uint16_t const *c_adc() const { return adc_d.get(); } | ||
| uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } | ||
|
|
||
| class DeviceConstView { | ||
| public: | ||
| DeviceConstView() = default; | ||
|
|
||
| #ifdef __CUDACC__ | ||
| __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_+i); } | ||
| __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } | ||
| __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } | ||
| __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } | ||
| #endif | ||
|
|
||
| friend class SiPixelDigisCUDA; | ||
|
|
||
| private: | ||
| uint16_t const *xx_ = nullptr; | ||
| uint16_t const *yy_ = nullptr; | ||
| uint16_t const *adc_ = nullptr; | ||
| uint16_t const *moduleInd_ = nullptr; | ||
| }; | ||
|
|
||
| const DeviceConstView *view() const { return view_d.get(); } | ||
|
|
||
| private: | ||
| edm::cuda::device::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel | ||
| edm::cuda::device::unique_ptr<uint16_t[]> yy_d; // | ||
| edm::cuda::device::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel | ||
| edm::cuda::device::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel | ||
| edm::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer | ||
| }; | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" | ||
|
|
||
| #include "FWCore/ServiceRegistry/interface/Service.h" | ||
| #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
|
|
||
| #include <cuda_runtime.h> | ||
|
|
||
| SiPixelDigisCUDA::SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream) { | ||
| edm::Service<CUDAService> cs; | ||
|
|
||
| xx_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
| yy_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
| adc_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
| moduleInd_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
|
|
||
| auto view = cs->make_host_unique<DeviceConstView>(stream); | ||
| view->xx_ = xx_d.get(); | ||
| view->yy_ = yy_d.get(); | ||
| view->adc_ = adc_d.get(); | ||
| view->moduleInd_ = moduleInd_d.get(); | ||
|
|
||
| view_d = cs->make_device_unique<DeviceConstView>(stream); | ||
| cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.