diff --git a/CUDADataFormats/SiStripCluster/BuildFile.xml b/CUDADataFormats/SiStripCluster/BuildFile.xml new file mode 100644 index 0000000000000..5e401d215c4eb --- /dev/null +++ b/CUDADataFormats/SiStripCluster/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h new file mode 100644 index 0000000000000..f64b8a533d513 --- /dev/null +++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h @@ -0,0 +1,59 @@ +#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h +#define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h + +#include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" + +#include + +class SiStripClustersCUDADevice : public SiStripClustersSOABase { +public: + SiStripClustersCUDADevice() = default; + explicit SiStripClustersCUDADevice(uint32_t maxClusters, uint32_t maxStripsPerCluster, cudaStream_t stream); + ~SiStripClustersCUDADevice() override = default; + + SiStripClustersCUDADevice(const SiStripClustersCUDADevice &) = delete; + SiStripClustersCUDADevice &operator=(const SiStripClustersCUDADevice &) = delete; + SiStripClustersCUDADevice(SiStripClustersCUDADevice &&) = default; + SiStripClustersCUDADevice &operator=(SiStripClustersCUDADevice &&) = default; + + struct DeviceView { + uint32_t *clusterIndex_; + uint32_t *clusterSize_; + uint8_t *clusterADCs_; + stripgpu::detId_t *clusterDetId_; + stripgpu::stripId_t *firstStrip_; + bool *trueCluster_; + float *barycenter_; + float *charge_; + uint32_t nClusters_; + uint32_t maxClusterSize_; + }; + + DeviceView *view() const { return view_d.get(); } + uint32_t nClusters() const { return nClusters_; } + uint32_t *nClustersPtr() { return &nClusters_; } + uint32_t maxClusterSize() const { return maxClusterSize_; } + uint32_t *maxClusterSizePtr() { return &maxClusterSize_; } + +private: + cms::cuda::device::unique_ptr view_d; // "me" pointer + uint32_t nClusters_; + uint32_t maxClusterSize_; +}; + +class SiStripClustersCUDAHost : public SiStripClustersSOABase { +public: + SiStripClustersCUDAHost() = default; + explicit SiStripClustersCUDAHost(const SiStripClustersCUDADevice &clusters_d, cudaStream_t stream); + ~SiStripClustersCUDAHost() override = default; + + SiStripClustersCUDAHost(const SiStripClustersCUDAHost &) = delete; + SiStripClustersCUDAHost &operator=(const SiStripClustersCUDAHost &) = delete; + SiStripClustersCUDAHost(SiStripClustersCUDAHost &&) = default; + SiStripClustersCUDAHost &operator=(SiStripClustersCUDAHost &&) = default; +}; + +#endif diff --git a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc b/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc new file mode 100644 index 0000000000000..220456760476a --- /dev/null +++ b/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc @@ -0,0 +1,59 @@ +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" + +SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters, + uint32_t maxStripsPerCluster, + cudaStream_t stream) { + maxClusterSize_ = maxStripsPerCluster; + + clusterIndex_ = cms::cuda::make_device_unique(maxClusters, stream); + clusterSize_ = cms::cuda::make_device_unique(maxClusters, stream); + clusterADCs_ = cms::cuda::make_device_unique(maxClusters * maxStripsPerCluster, stream); + clusterDetId_ = cms::cuda::make_device_unique(maxClusters, stream); + firstStrip_ = cms::cuda::make_device_unique(maxClusters, stream); + trueCluster_ = cms::cuda::make_device_unique(maxClusters, stream); + barycenter_ = cms::cuda::make_device_unique(maxClusters, stream); + charge_ = cms::cuda::make_device_unique(maxClusters, stream); + + auto view = cms::cuda::make_host_unique(stream); + view->clusterIndex_ = clusterIndex_.get(); + view->clusterSize_ = clusterSize_.get(); + view->clusterADCs_ = clusterADCs_.get(); + view->clusterDetId_ = clusterDetId_.get(); + view->firstStrip_ = firstStrip_.get(); + view->trueCluster_ = trueCluster_.get(); + view->barycenter_ = barycenter_.get(); + view->charge_ = charge_.get(); + view->maxClusterSize_ = maxStripsPerCluster; + + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); +#ifdef GPU_CHECK + cudaCheck(cudaStreamSynchronize(stream)); +#endif +} + +SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) { + nClusters_ = clusters_d.nClusters(); + maxClusterSize_ = clusters_d.maxClusterSize(); + clusterIndex_ = cms::cuda::make_host_unique(nClusters_, stream); + clusterSize_ = cms::cuda::make_host_unique(nClusters_, stream); + clusterADCs_ = cms::cuda::make_host_unique(nClusters_ * maxClusterSize_, stream); + clusterDetId_ = cms::cuda::make_host_unique(nClusters_, stream); + firstStrip_ = cms::cuda::make_host_unique(nClusters_, stream); + trueCluster_ = cms::cuda::make_host_unique(nClusters_, stream); + barycenter_ = cms::cuda::make_host_unique(nClusters_, stream); + charge_ = cms::cuda::make_host_unique(nClusters_, stream); + + cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream); + cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream); + cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream); + cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream); + cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream); + cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream); + cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream); + cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream); +#ifdef GPU_CHECK + cudaCheck(cudaStreamSynchronize(stream)); +#endif +} diff --git a/CUDADataFormats/SiStripCluster/src/classes.h b/CUDADataFormats/SiStripCluster/src/classes.h new file mode 100644 index 0000000000000..b38f397dee067 --- /dev/null +++ b/CUDADataFormats/SiStripCluster/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_SiStripCluster_classes_h +#define CUDADataFormats_SiStripCluster_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif diff --git a/CUDADataFormats/SiStripCluster/src/classes_def.xml b/CUDADataFormats/SiStripCluster/src/classes_def.xml new file mode 100644 index 0000000000000..3c2f3ab27c620 --- /dev/null +++ b/CUDADataFormats/SiStripCluster/src/classes_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/CalibFormats/SiStripObjects/BuildFile.xml b/CalibFormats/SiStripObjects/BuildFile.xml index 325f0aa1bcb9f..83c3901a34f13 100644 --- a/CalibFormats/SiStripObjects/BuildFile.xml +++ b/CalibFormats/SiStripObjects/BuildFile.xml @@ -2,6 +2,9 @@ + + + diff --git a/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h b/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h new file mode 100644 index 0000000000000..94f0080f88019 --- /dev/null +++ b/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h @@ -0,0 +1,137 @@ +#ifndef CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h +#define CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h + +#include "DataFormats/SiStripCluster/interface/SiStripTypes.h" +#include "DataFormats/SiStripCommon/interface/ConstantsForHardwareSystems.h" + +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" + +class SiStripQuality; +class SiStripGain; +class SiStripNoises; + +namespace stripgpu { + __host__ __device__ inline fedId_t fedIndex(fedId_t fed) { return fed - sistrip::FED_ID_MIN; } + __host__ __device__ inline std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip) { + return fedIndex(fed) * sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH + channel * sistrip::STRIPS_PER_FEDCH + + (strip % sistrip::STRIPS_PER_FEDCH); + } + __host__ __device__ inline std::uint32_t apvIndex(fedId_t fed, fedCh_t channel, stripId_t strip) { + return fedIndex(fed) * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED + sistrip::APVS_PER_CHAN * channel + + (strip % sistrip::STRIPS_PER_FEDCH) / sistrip::STRIPS_PER_APV; + } + __host__ __device__ inline std::uint32_t channelIndex(fedId_t fed, fedCh_t channel) { + return fedIndex(fed) * sistrip::FEDCH_PER_FED + channel; + } + + class SiStripClusterizerConditionsGPU { + public: + class DetToFed { + public: + DetToFed(detId_t detid, apvPair_t ipair, fedId_t fedid, fedCh_t fedch) + : detid_(detid), ipair_(ipair), fedid_(fedid), fedch_(fedch) {} + detId_t detID() const { return detid_; } + apvPair_t pair() const { return ipair_; } + fedId_t fedID() const { return fedid_; } + fedCh_t fedCh() const { return fedch_; } + + private: + detId_t detid_; + apvPair_t ipair_; + fedId_t fedid_; + fedCh_t fedch_; + }; + using DetToFeds = std::vector; + + static constexpr std::uint16_t badBit = 1 << 15; + + class Data { + public: + struct DeviceView { + __device__ inline detId_t detID(fedId_t fed, fedCh_t channel) const { + return detID_[channelIndex(fed, channel)]; + } + + __device__ inline apvPair_t iPair(fedId_t fed, fedCh_t channel) const { + return iPair_[channelIndex(fed, channel)]; + } + + __device__ inline float invthick(fedId_t fed, fedCh_t channel) const { + return invthick_[channelIndex(fed, channel)]; + } + + __device__ inline float noise(fedId_t fed, fedCh_t channel, stripId_t strip) const { + // noise is stored as 9 bits with a fixed point scale factor of 0.1 + return 0.1f * (noise_[stripIndex(fed, channel, strip)] & ~badBit); + } + + __device__ inline float gain(fedId_t fed, fedCh_t channel, stripId_t strip) const { + return gain_[apvIndex(fed, channel, strip)]; + } + + __device__ inline bool bad(fedId_t fed, fedCh_t channel, stripId_t strip) const { + return badBit == (noise_[stripIndex(fed, channel, strip)] & badBit); + } + const std::uint16_t* noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH]; + const float* invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + const detId_t* detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + const apvPair_t* iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + const float* gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED]; + }; + + const DeviceView* deviceView() const { return deviceView_.get(); } + + cms::cuda::device::unique_ptr deviceView_; + cms::cuda::host::unique_ptr hostView_; + + cms::cuda::device::unique_ptr + noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH]; + cms::cuda::device::unique_ptr invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + cms::cuda::device::unique_ptr detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + cms::cuda::device::unique_ptr iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED]; + cms::cuda::device::unique_ptr + gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED]; + }; + + SiStripClusterizerConditionsGPU(const SiStripQuality& quality, + const SiStripGain* gains, + const SiStripNoises& noises); + ~SiStripClusterizerConditionsGPU() = default; + + // Function to return the actual payload on the memory of the current device + Data const& getGPUProductAsync(cudaStream_t stream) const; + + const DetToFeds& detToFeds() const { return detToFeds_; } + + private: + void setStrip(fedId_t fed, fedCh_t channel, stripId_t strip, std::uint16_t noise, float gain, bool bad) { + gain_[apvIndex(fed, channel, strip)] = gain; + noise_[stripIndex(fed, channel, strip)] = noise; + if (bad) { + noise_[stripIndex(fed, channel, strip)] |= badBit; + } + } + + void setInvThickness(fedId_t fed, fedCh_t channel, float invthick) { + invthick_[channelIndex(fed, channel)] = invthick; + } + + // Holds the data in pinned CPU memory + std::vector> noise_; + std::vector> invthick_; + std::vector> detID_; + std::vector> iPair_; + std::vector> gain_; + + // Helper that takes care of complexity of transferring the data to + // multiple devices + cms::cuda::ESProduct gpuData_; + DetToFeds detToFeds_; + }; +} // namespace stripgpu + +#endif diff --git a/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc b/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc index ddf7a0ffb914f..05530484f14c4 100644 --- a/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc +++ b/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc @@ -23,3 +23,6 @@ TYPELOOKUP_DATA_REG(SiStripQuality); #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditions.h" TYPELOOKUP_DATA_REG(SiStripClusterizerConditions); + +#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h" +TYPELOOKUP_DATA_REG(stripgpu::SiStripClusterizerConditionsGPU); diff --git a/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc b/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc new file mode 100644 index 0000000000000..33d0889ff5550 --- /dev/null +++ b/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc @@ -0,0 +1,100 @@ +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" + +#include "CondFormats/SiStripObjects/interface/SiStripNoises.h" +#include "CalibFormats/SiStripObjects/interface/SiStripGain.h" +#include "CalibFormats/SiStripObjects/interface/SiStripDetCabling.h" +#include "CalibFormats/SiStripObjects/interface/SiStripQuality.h" +#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h" + +#include "DataFormats/SiStripCluster/interface/SiStripClusterTools.h" + +namespace stripgpu { + SiStripClusterizerConditionsGPU::SiStripClusterizerConditionsGPU(const SiStripQuality& quality, + const SiStripGain* gains, + const SiStripNoises& noises) + + : noise_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH), + invthick_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED), + detID_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED), + iPair_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED), + gain_(sistrip::NUMBER_OF_FEDS * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED) { + // connected: map> + // map of KEY=detid DATA=vector of apvs, maximum 6 APVs per detector module : + const auto& connected = quality.cabling()->connected(); + // detCabling: map + // map of KEY=detid DATA=vector + const auto& detCabling = quality.cabling()->getDetCabling(); + + for (const auto& conn : connected) { + const auto det = conn.first; + if (!quality.IsModuleBad(det)) { + const auto detConn_it = detCabling.find(det); + + if (detCabling.end() != detConn_it) { + for (const auto& chan : (*detConn_it).second) { + if (chan && chan->fedId() && chan->isConnected()) { + const auto detID = chan->detId(); + const auto fedID = chan->fedId(); + const auto fedCh = chan->fedCh(); + const auto iPair = chan->apvPairNumber(); + + detToFeds_.emplace_back(detID, iPair, fedID, fedCh); + + detID_[channelIndex(fedID, fedCh)] = detID; + iPair_[channelIndex(fedID, fedCh)] = iPair; + setInvThickness(fedID, fedCh, siStripClusterTools::sensorThicknessInverse(detID)); + + auto offset = 256 * iPair; + + for (auto strip = 0; strip < 256; ++strip) { + const auto gainRange = gains->getRange(det); + + const auto detstrip = strip + offset; + const std::uint16_t noise = SiStripNoises::getRawNoise(detstrip, noises.getRange(det)); + const auto gain = SiStripGain::getStripGain(detstrip, gainRange); + const auto bad = quality.IsStripBad(quality.getRange(det), detstrip); + + // gain is actually stored per-APV, not per-strip + setStrip(fedID, fedCh, detstrip, noise, gain, bad); + } + } + } + } + } + } + + std::sort(detToFeds_.begin(), detToFeds_.end(), [](const DetToFed& a, const DetToFed& b) { + return a.detID() < b.detID() || (a.detID() == b.detID() && a.pair() < b.pair()); + }); + } + + SiStripClusterizerConditionsGPU::Data const& SiStripClusterizerConditionsGPU::getGPUProductAsync( + cudaStream_t stream) const { + auto const& data = gpuData_.dataForCurrentDeviceAsync(stream, [this](Data& data, cudaStream_t stream) { + data.noise_ = cms::cuda::make_device_unique(noise_.size(), stream); + data.invthick_ = cms::cuda::make_device_unique(invthick_.size(), stream); + data.detID_ = cms::cuda::make_device_unique(detID_.size(), stream); + data.iPair_ = cms::cuda::make_device_unique(iPair_.size(), stream); + data.gain_ = cms::cuda::make_device_unique(gain_.size(), stream); + + cms::cuda::copyAsync(data.noise_, noise_, stream); + cms::cuda::copyAsync(data.invthick_, invthick_, stream); + cms::cuda::copyAsync(data.detID_, detID_, stream); + cms::cuda::copyAsync(data.iPair_, iPair_, stream); + cms::cuda::copyAsync(data.gain_, gain_, stream); + + data.hostView_ = cms::cuda::make_host_unique(stream); + data.hostView_->noise_ = data.noise_.get(); + data.hostView_->invthick_ = data.invthick_.get(); + data.hostView_->detID_ = data.detID_.get(); + data.hostView_->iPair_ = data.iPair_.get(); + data.hostView_->gain_ = data.gain_.get(); + + data.deviceView_ = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(data.deviceView_, data.hostView_, stream); + }); + + return data; + } +} // namespace stripgpu diff --git a/DataFormats/SiStripCluster/interface/SiStripClustersSOA.h b/DataFormats/SiStripCluster/interface/SiStripClustersSOA.h new file mode 100644 index 0000000000000..e6c262d456289 --- /dev/null +++ b/DataFormats/SiStripCluster/interface/SiStripClustersSOA.h @@ -0,0 +1,27 @@ +#ifndef DataFormats_SiStripCluster_interface_SiStripClustersSOA_h +#define DataFormats_SiStripCluster_interface_SiStripClustersSOA_h + +#include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h" + +#include + +namespace detail { + namespace impl { + template + using unique_ptr_default_deleter = typename std::unique_ptr; + } +} // namespace detail + +class SiStripClustersSOA : public SiStripClustersSOABase { +public: + SiStripClustersSOA() = default; + explicit SiStripClustersSOA(uint32_t maxClusters, uint32_t maxStripsPerCluster); + ~SiStripClustersSOA() override = default; + + SiStripClustersSOA(const SiStripClustersSOA &) = delete; + SiStripClustersSOA &operator=(const SiStripClustersSOA &) = delete; + SiStripClustersSOA(SiStripClustersSOA &&) = default; + SiStripClustersSOA &operator=(SiStripClustersSOA &&) = default; +}; + +#endif diff --git a/DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h b/DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h new file mode 100644 index 0000000000000..036ab7c3dd3e5 --- /dev/null +++ b/DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h @@ -0,0 +1,59 @@ +#ifndef DataFormats_SiStripCluster_interface_SiStripClustersSOABase_ +#define DataFormats_SiStripCluster_interface_SiStripClustersSOABase_ + +#include "DataFormats/SiStripCluster/interface/SiStripTypes.h" + +#include +#include + +template