-
Notifications
You must be signed in to change notification settings - Fork 4.6k
CUDA implementation of RecoLocalTracker/SiStripCluster ClustersFromRawProducer #34618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
35b8c17
GPU SiStripClusterizer (squashed)
829b9fd
Reorganize SiStripClusterizerConditionsGPU to avoid alignment issues
a56c8ae
This commit adds fillDescriptions to SiStripClusterizerFromRaw and the
cc83f31
rebase, MaxClusterSize corrections, CCC interface change, formatting
82d27f0
Fix logic error introduced in SiStripZeroSuppression/src/SiStripRawPr…
c22c8a2
remove the gpu rcd as redundant. Address comment to make 15000 a cons…
davidlange6 d71ba2c
address hardwired comments for saturation and masks
davidlange6 0408521
rebase to 12_6
7dbff2a
rebase and response to comments
72793a5
stdio.h -> cstdio, fix ChannelLocsView wrt class vs. struct
44b2092
rebase to CMSSW_13_1_0_pre1, remove unused CUDAService include
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| <use name="DataFormats/Common"/> | ||
| <use name="CUDADataFormats/Common"/> | ||
| <use name="HeterogeneousCore/CUDAUtilities"/> | ||
| <use name="cuda"/> | ||
| <use name="rootcore"/> | ||
|
|
||
| <export> | ||
| <lib name="1"/> | ||
| </export> | ||
|
|
59 changes: 59 additions & 0 deletions
59
CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| #ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h | ||
| #define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h | ||
|
|
||
| #include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" | ||
|
|
||
| #include <cuda_runtime.h> | ||
|
|
||
| class SiStripClustersCUDADevice : public SiStripClustersSOABase<cms::cuda::device::unique_ptr> { | ||
| public: | ||
| SiStripClustersCUDADevice() = default; | ||
| explicit SiStripClustersCUDADevice(uint32_t maxClusters, uint32_t maxStripsPerCluster, cudaStream_t stream); | ||
| ~SiStripClustersCUDADevice() override = default; | ||
|
|
||
| SiStripClustersCUDADevice(const SiStripClustersCUDADevice &) = delete; | ||
| SiStripClustersCUDADevice &operator=(const SiStripClustersCUDADevice &) = delete; | ||
| SiStripClustersCUDADevice(SiStripClustersCUDADevice &&) = default; | ||
| SiStripClustersCUDADevice &operator=(SiStripClustersCUDADevice &&) = default; | ||
|
|
||
| struct DeviceView { | ||
| uint32_t *clusterIndex_; | ||
| uint32_t *clusterSize_; | ||
| uint8_t *clusterADCs_; | ||
| stripgpu::detId_t *clusterDetId_; | ||
| stripgpu::stripId_t *firstStrip_; | ||
| bool *trueCluster_; | ||
| float *barycenter_; | ||
| float *charge_; | ||
| uint32_t nClusters_; | ||
| uint32_t maxClusterSize_; | ||
| }; | ||
|
|
||
| DeviceView *view() const { return view_d.get(); } | ||
| uint32_t nClusters() const { return nClusters_; } | ||
| uint32_t *nClustersPtr() { return &nClusters_; } | ||
| uint32_t maxClusterSize() const { return maxClusterSize_; } | ||
| uint32_t *maxClusterSizePtr() { return &maxClusterSize_; } | ||
|
|
||
| private: | ||
| cms::cuda::device::unique_ptr<DeviceView> view_d; // "me" pointer | ||
| uint32_t nClusters_; | ||
| uint32_t maxClusterSize_; | ||
| }; | ||
|
|
||
| class SiStripClustersCUDAHost : public SiStripClustersSOABase<cms::cuda::host::unique_ptr> { | ||
| public: | ||
| SiStripClustersCUDAHost() = default; | ||
| explicit SiStripClustersCUDAHost(const SiStripClustersCUDADevice &clusters_d, cudaStream_t stream); | ||
| ~SiStripClustersCUDAHost() override = default; | ||
|
|
||
| SiStripClustersCUDAHost(const SiStripClustersCUDAHost &) = delete; | ||
| SiStripClustersCUDAHost &operator=(const SiStripClustersCUDAHost &) = delete; | ||
| SiStripClustersCUDAHost(SiStripClustersCUDAHost &&) = default; | ||
| SiStripClustersCUDAHost &operator=(SiStripClustersCUDAHost &&) = default; | ||
| }; | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| #include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" | ||
|
|
||
| SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters, | ||
| uint32_t maxStripsPerCluster, | ||
| cudaStream_t stream) { | ||
| maxClusterSize_ = maxStripsPerCluster; | ||
|
|
||
| clusterIndex_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream); | ||
| clusterSize_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream); | ||
| clusterADCs_ = cms::cuda::make_device_unique<uint8_t[]>(maxClusters * maxStripsPerCluster, stream); | ||
| clusterDetId_ = cms::cuda::make_device_unique<stripgpu::detId_t[]>(maxClusters, stream); | ||
| firstStrip_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(maxClusters, stream); | ||
| trueCluster_ = cms::cuda::make_device_unique<bool[]>(maxClusters, stream); | ||
| barycenter_ = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
| charge_ = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
|
|
||
| auto view = cms::cuda::make_host_unique<DeviceView>(stream); | ||
| view->clusterIndex_ = clusterIndex_.get(); | ||
| view->clusterSize_ = clusterSize_.get(); | ||
| view->clusterADCs_ = clusterADCs_.get(); | ||
| view->clusterDetId_ = clusterDetId_.get(); | ||
| view->firstStrip_ = firstStrip_.get(); | ||
| view->trueCluster_ = trueCluster_.get(); | ||
| view->barycenter_ = barycenter_.get(); | ||
| view->charge_ = charge_.get(); | ||
| view->maxClusterSize_ = maxStripsPerCluster; | ||
|
|
||
| view_d = cms::cuda::make_device_unique<DeviceView>(stream); | ||
| cms::cuda::copyAsync(view_d, view, stream); | ||
| #ifdef GPU_CHECK | ||
| cudaCheck(cudaStreamSynchronize(stream)); | ||
| #endif | ||
| } | ||
|
|
||
| SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) { | ||
| nClusters_ = clusters_d.nClusters(); | ||
| maxClusterSize_ = clusters_d.maxClusterSize(); | ||
| clusterIndex_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream); | ||
| clusterSize_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream); | ||
| clusterADCs_ = cms::cuda::make_host_unique<uint8_t[]>(nClusters_ * maxClusterSize_, stream); | ||
| clusterDetId_ = cms::cuda::make_host_unique<stripgpu::detId_t[]>(nClusters_, stream); | ||
| firstStrip_ = cms::cuda::make_host_unique<stripgpu::stripId_t[]>(nClusters_, stream); | ||
| trueCluster_ = cms::cuda::make_host_unique<bool[]>(nClusters_, stream); | ||
| barycenter_ = cms::cuda::make_host_unique<float[]>(nClusters_, stream); | ||
| charge_ = cms::cuda::make_host_unique<float[]>(nClusters_, stream); | ||
|
|
||
| cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream); | ||
| cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream); | ||
| cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream); | ||
| cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream); | ||
| cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream); | ||
| cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream); | ||
| cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream); | ||
| cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream); | ||
| #ifdef GPU_CHECK | ||
| cudaCheck(cudaStreamSynchronize(stream)); | ||
| #endif | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| #ifndef CUDADataFormats_SiStripCluster_classes_h | ||
| #define CUDADataFormats_SiStripCluster_classes_h | ||
|
|
||
| #include "CUDADataFormats/Common/interface/Product.h" | ||
| #include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" | ||
| #include "DataFormats/Common/interface/Wrapper.h" | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| <lcgdict> | ||
| <class name="cms::cuda::Product<SiStripClustersCUDADevice>" persistent="false"/> | ||
| <class name="edm::Wrapper<cms::cuda::Product<SiStripClustersCUDADevice>>" persistent="false"/> | ||
| <class name="SiStripClustersCUDAHost" persistent="false"/> | ||
| <class name="edm::Wrapper<SiStripClustersCUDAHost>" persistent="false"/> | ||
| </lcgdict> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
137 changes: 137 additions & 0 deletions
137
CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| #ifndef CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h | ||
| #define CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h | ||
|
|
||
| #include "DataFormats/SiStripCluster/interface/SiStripTypes.h" | ||
| #include "DataFormats/SiStripCommon/interface/ConstantsForHardwareSystems.h" | ||
|
|
||
| #include "HeterogeneousCore/CUDACore/interface/ESProduct.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" | ||
| #include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" | ||
|
|
||
| class SiStripQuality; | ||
| class SiStripGain; | ||
| class SiStripNoises; | ||
|
|
||
| namespace stripgpu { | ||
| __host__ __device__ inline fedId_t fedIndex(fedId_t fed) { return fed - sistrip::FED_ID_MIN; } | ||
| __host__ __device__ inline std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip) { | ||
| return fedIndex(fed) * sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH + channel * sistrip::STRIPS_PER_FEDCH + | ||
| (strip % sistrip::STRIPS_PER_FEDCH); | ||
| } | ||
| __host__ __device__ inline std::uint32_t apvIndex(fedId_t fed, fedCh_t channel, stripId_t strip) { | ||
| return fedIndex(fed) * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED + sistrip::APVS_PER_CHAN * channel + | ||
| (strip % sistrip::STRIPS_PER_FEDCH) / sistrip::STRIPS_PER_APV; | ||
| } | ||
| __host__ __device__ inline std::uint32_t channelIndex(fedId_t fed, fedCh_t channel) { | ||
| return fedIndex(fed) * sistrip::FEDCH_PER_FED + channel; | ||
| } | ||
|
|
||
| class SiStripClusterizerConditionsGPU { | ||
| public: | ||
| class DetToFed { | ||
| public: | ||
| DetToFed(detId_t detid, apvPair_t ipair, fedId_t fedid, fedCh_t fedch) | ||
| : detid_(detid), ipair_(ipair), fedid_(fedid), fedch_(fedch) {} | ||
| detId_t detID() const { return detid_; } | ||
| apvPair_t pair() const { return ipair_; } | ||
| fedId_t fedID() const { return fedid_; } | ||
| fedCh_t fedCh() const { return fedch_; } | ||
|
|
||
| private: | ||
| detId_t detid_; | ||
| apvPair_t ipair_; | ||
| fedId_t fedid_; | ||
| fedCh_t fedch_; | ||
| }; | ||
| using DetToFeds = std::vector<DetToFed>; | ||
|
|
||
| static constexpr std::uint16_t badBit = 1 << 15; | ||
|
|
||
| class Data { | ||
| public: | ||
| struct DeviceView { | ||
| __device__ inline detId_t detID(fedId_t fed, fedCh_t channel) const { | ||
| return detID_[channelIndex(fed, channel)]; | ||
| } | ||
|
|
||
| __device__ inline apvPair_t iPair(fedId_t fed, fedCh_t channel) const { | ||
| return iPair_[channelIndex(fed, channel)]; | ||
| } | ||
|
|
||
| __device__ inline float invthick(fedId_t fed, fedCh_t channel) const { | ||
| return invthick_[channelIndex(fed, channel)]; | ||
| } | ||
|
|
||
| __device__ inline float noise(fedId_t fed, fedCh_t channel, stripId_t strip) const { | ||
| // noise is stored as 9 bits with a fixed point scale factor of 0.1 | ||
| return 0.1f * (noise_[stripIndex(fed, channel, strip)] & ~badBit); | ||
| } | ||
|
|
||
| __device__ inline float gain(fedId_t fed, fedCh_t channel, stripId_t strip) const { | ||
| return gain_[apvIndex(fed, channel, strip)]; | ||
| } | ||
|
|
||
| __device__ inline bool bad(fedId_t fed, fedCh_t channel, stripId_t strip) const { | ||
| return badBit == (noise_[stripIndex(fed, channel, strip)] & badBit); | ||
| } | ||
| const std::uint16_t* noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH]; | ||
| const float* invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| const detId_t* detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| const apvPair_t* iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| const float* gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED]; | ||
| }; | ||
|
|
||
| const DeviceView* deviceView() const { return deviceView_.get(); } | ||
|
|
||
| cms::cuda::device::unique_ptr<DeviceView> deviceView_; | ||
| cms::cuda::host::unique_ptr<DeviceView> hostView_; | ||
|
|
||
| cms::cuda::device::unique_ptr<std::uint16_t[]> | ||
| noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH]; | ||
| cms::cuda::device::unique_ptr<float[]> invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| cms::cuda::device::unique_ptr<detId_t[]> detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| cms::cuda::device::unique_ptr<apvPair_t[]> iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; | ||
| cms::cuda::device::unique_ptr<float[]> | ||
| gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED]; | ||
| }; | ||
|
|
||
| SiStripClusterizerConditionsGPU(const SiStripQuality& quality, | ||
| const SiStripGain* gains, | ||
| const SiStripNoises& noises); | ||
| ~SiStripClusterizerConditionsGPU() = default; | ||
|
|
||
| // Function to return the actual payload on the memory of the current device | ||
| Data const& getGPUProductAsync(cudaStream_t stream) const; | ||
|
|
||
| const DetToFeds& detToFeds() const { return detToFeds_; } | ||
|
|
||
| private: | ||
| void setStrip(fedId_t fed, fedCh_t channel, stripId_t strip, std::uint16_t noise, float gain, bool bad) { | ||
| gain_[apvIndex(fed, channel, strip)] = gain; | ||
| noise_[stripIndex(fed, channel, strip)] = noise; | ||
| if (bad) { | ||
| noise_[stripIndex(fed, channel, strip)] |= badBit; | ||
| } | ||
| } | ||
|
|
||
| void setInvThickness(fedId_t fed, fedCh_t channel, float invthick) { | ||
| invthick_[channelIndex(fed, channel)] = invthick; | ||
| } | ||
|
|
||
| // Holds the data in pinned CPU memory | ||
| std::vector<std::uint16_t, cms::cuda::HostAllocator<std::uint16_t>> noise_; | ||
| std::vector<float, cms::cuda::HostAllocator<float>> invthick_; | ||
| std::vector<detId_t, cms::cuda::HostAllocator<detId_t>> detID_; | ||
| std::vector<apvPair_t, cms::cuda::HostAllocator<apvPair_t>> iPair_; | ||
| std::vector<float, cms::cuda::HostAllocator<float>> gain_; | ||
|
|
||
| // Helper that takes care of complexity of transferring the data to | ||
| // multiple devices | ||
| cms::cuda::ESProduct<Data> gpuData_; | ||
| DetToFeds detToFeds_; | ||
| }; | ||
| } // namespace stripgpu | ||
|
|
||
| #endif | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.