diff --git a/CUDADataFormats/SiStripCluster/BuildFile.xml b/CUDADataFormats/SiStripCluster/BuildFile.xml
deleted file mode 100644
index 5e401d215c4eb..0000000000000
--- a/CUDADataFormats/SiStripCluster/BuildFile.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
deleted file mode 100644
index f64b8a533d513..0000000000000
--- a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h
-#define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h
-
-#include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-
-#include
-
-class SiStripClustersCUDADevice : public SiStripClustersSOABase {
-public:
- SiStripClustersCUDADevice() = default;
- explicit SiStripClustersCUDADevice(uint32_t maxClusters, uint32_t maxStripsPerCluster, cudaStream_t stream);
- ~SiStripClustersCUDADevice() override = default;
-
- SiStripClustersCUDADevice(const SiStripClustersCUDADevice &) = delete;
- SiStripClustersCUDADevice &operator=(const SiStripClustersCUDADevice &) = delete;
- SiStripClustersCUDADevice(SiStripClustersCUDADevice &&) = default;
- SiStripClustersCUDADevice &operator=(SiStripClustersCUDADevice &&) = default;
-
- struct DeviceView {
- uint32_t *clusterIndex_;
- uint32_t *clusterSize_;
- uint8_t *clusterADCs_;
- stripgpu::detId_t *clusterDetId_;
- stripgpu::stripId_t *firstStrip_;
- bool *trueCluster_;
- float *barycenter_;
- float *charge_;
- uint32_t nClusters_;
- uint32_t maxClusterSize_;
- };
-
- DeviceView *view() const { return view_d.get(); }
- uint32_t nClusters() const { return nClusters_; }
- uint32_t *nClustersPtr() { return &nClusters_; }
- uint32_t maxClusterSize() const { return maxClusterSize_; }
- uint32_t *maxClusterSizePtr() { return &maxClusterSize_; }
-
-private:
- cms::cuda::device::unique_ptr view_d; // "me" pointer
- uint32_t nClusters_;
- uint32_t maxClusterSize_;
-};
-
-class SiStripClustersCUDAHost : public SiStripClustersSOABase {
-public:
- SiStripClustersCUDAHost() = default;
- explicit SiStripClustersCUDAHost(const SiStripClustersCUDADevice &clusters_d, cudaStream_t stream);
- ~SiStripClustersCUDAHost() override = default;
-
- SiStripClustersCUDAHost(const SiStripClustersCUDAHost &) = delete;
- SiStripClustersCUDAHost &operator=(const SiStripClustersCUDAHost &) = delete;
- SiStripClustersCUDAHost(SiStripClustersCUDAHost &&) = default;
- SiStripClustersCUDAHost &operator=(SiStripClustersCUDAHost &&) = default;
-};
-
-#endif
diff --git a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc b/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc
deleted file mode 100644
index 220456760476a..0000000000000
--- a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-
-SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters,
- uint32_t maxStripsPerCluster,
- cudaStream_t stream) {
- maxClusterSize_ = maxStripsPerCluster;
-
- clusterIndex_ = cms::cuda::make_device_unique(maxClusters, stream);
- clusterSize_ = cms::cuda::make_device_unique(maxClusters, stream);
- clusterADCs_ = cms::cuda::make_device_unique(maxClusters * maxStripsPerCluster, stream);
- clusterDetId_ = cms::cuda::make_device_unique(maxClusters, stream);
- firstStrip_ = cms::cuda::make_device_unique(maxClusters, stream);
- trueCluster_ = cms::cuda::make_device_unique(maxClusters, stream);
- barycenter_ = cms::cuda::make_device_unique(maxClusters, stream);
- charge_ = cms::cuda::make_device_unique(maxClusters, stream);
-
- auto view = cms::cuda::make_host_unique(stream);
- view->clusterIndex_ = clusterIndex_.get();
- view->clusterSize_ = clusterSize_.get();
- view->clusterADCs_ = clusterADCs_.get();
- view->clusterDetId_ = clusterDetId_.get();
- view->firstStrip_ = firstStrip_.get();
- view->trueCluster_ = trueCluster_.get();
- view->barycenter_ = barycenter_.get();
- view->charge_ = charge_.get();
- view->maxClusterSize_ = maxStripsPerCluster;
-
- view_d = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(view_d, view, stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-}
-
-SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
- nClusters_ = clusters_d.nClusters();
- maxClusterSize_ = clusters_d.maxClusterSize();
- clusterIndex_ = cms::cuda::make_host_unique(nClusters_, stream);
- clusterSize_ = cms::cuda::make_host_unique(nClusters_, stream);
- clusterADCs_ = cms::cuda::make_host_unique(nClusters_ * maxClusterSize_, stream);
- clusterDetId_ = cms::cuda::make_host_unique(nClusters_, stream);
- firstStrip_ = cms::cuda::make_host_unique(nClusters_, stream);
- trueCluster_ = cms::cuda::make_host_unique(nClusters_, stream);
- barycenter_ = cms::cuda::make_host_unique(nClusters_, stream);
- charge_ = cms::cuda::make_host_unique(nClusters_, stream);
-
- cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream);
- cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream);
- cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream);
- cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream);
- cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream);
- cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream);
- cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream);
- cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-}
diff --git a/CalibFormats/SiStripObjects/BuildFile.xml b/CalibFormats/SiStripObjects/BuildFile.xml
index 83c3901a34f13..325f0aa1bcb9f 100644
--- a/CalibFormats/SiStripObjects/BuildFile.xml
+++ b/CalibFormats/SiStripObjects/BuildFile.xml
@@ -2,9 +2,6 @@
-
-
-
diff --git a/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h b/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h
deleted file mode 100644
index 94f0080f88019..0000000000000
--- a/CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h
+++ /dev/null
@@ -1,137 +0,0 @@
-#ifndef CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h
-#define CalibFormats_SiStripObjects_SiStripClusterizerConditionsGPU_h
-
-#include "DataFormats/SiStripCluster/interface/SiStripTypes.h"
-#include "DataFormats/SiStripCommon/interface/ConstantsForHardwareSystems.h"
-
-#include "HeterogeneousCore/CUDACore/interface/ESProduct.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
-
-class SiStripQuality;
-class SiStripGain;
-class SiStripNoises;
-
-namespace stripgpu {
- __host__ __device__ inline fedId_t fedIndex(fedId_t fed) { return fed - sistrip::FED_ID_MIN; }
- __host__ __device__ inline std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip) {
- return fedIndex(fed) * sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH + channel * sistrip::STRIPS_PER_FEDCH +
- (strip % sistrip::STRIPS_PER_FEDCH);
- }
- __host__ __device__ inline std::uint32_t apvIndex(fedId_t fed, fedCh_t channel, stripId_t strip) {
- return fedIndex(fed) * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED + sistrip::APVS_PER_CHAN * channel +
- (strip % sistrip::STRIPS_PER_FEDCH) / sistrip::STRIPS_PER_APV;
- }
- __host__ __device__ inline std::uint32_t channelIndex(fedId_t fed, fedCh_t channel) {
- return fedIndex(fed) * sistrip::FEDCH_PER_FED + channel;
- }
-
- class SiStripClusterizerConditionsGPU {
- public:
- class DetToFed {
- public:
- DetToFed(detId_t detid, apvPair_t ipair, fedId_t fedid, fedCh_t fedch)
- : detid_(detid), ipair_(ipair), fedid_(fedid), fedch_(fedch) {}
- detId_t detID() const { return detid_; }
- apvPair_t pair() const { return ipair_; }
- fedId_t fedID() const { return fedid_; }
- fedCh_t fedCh() const { return fedch_; }
-
- private:
- detId_t detid_;
- apvPair_t ipair_;
- fedId_t fedid_;
- fedCh_t fedch_;
- };
- using DetToFeds = std::vector;
-
- static constexpr std::uint16_t badBit = 1 << 15;
-
- class Data {
- public:
- struct DeviceView {
- __device__ inline detId_t detID(fedId_t fed, fedCh_t channel) const {
- return detID_[channelIndex(fed, channel)];
- }
-
- __device__ inline apvPair_t iPair(fedId_t fed, fedCh_t channel) const {
- return iPair_[channelIndex(fed, channel)];
- }
-
- __device__ inline float invthick(fedId_t fed, fedCh_t channel) const {
- return invthick_[channelIndex(fed, channel)];
- }
-
- __device__ inline float noise(fedId_t fed, fedCh_t channel, stripId_t strip) const {
- // noise is stored as 9 bits with a fixed point scale factor of 0.1
- return 0.1f * (noise_[stripIndex(fed, channel, strip)] & ~badBit);
- }
-
- __device__ inline float gain(fedId_t fed, fedCh_t channel, stripId_t strip) const {
- return gain_[apvIndex(fed, channel, strip)];
- }
-
- __device__ inline bool bad(fedId_t fed, fedCh_t channel, stripId_t strip) const {
- return badBit == (noise_[stripIndex(fed, channel, strip)] & badBit);
- }
- const std::uint16_t* noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH];
- const float* invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- const detId_t* detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- const apvPair_t* iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- const float* gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED];
- };
-
- const DeviceView* deviceView() const { return deviceView_.get(); }
-
- cms::cuda::device::unique_ptr deviceView_;
- cms::cuda::host::unique_ptr hostView_;
-
- cms::cuda::device::unique_ptr
- noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH];
- cms::cuda::device::unique_ptr invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- cms::cuda::device::unique_ptr detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- cms::cuda::device::unique_ptr iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
- cms::cuda::device::unique_ptr
- gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED];
- };
-
- SiStripClusterizerConditionsGPU(const SiStripQuality& quality,
- const SiStripGain* gains,
- const SiStripNoises& noises);
- ~SiStripClusterizerConditionsGPU() = default;
-
- // Function to return the actual payload on the memory of the current device
- Data const& getGPUProductAsync(cudaStream_t stream) const;
-
- const DetToFeds& detToFeds() const { return detToFeds_; }
-
- private:
- void setStrip(fedId_t fed, fedCh_t channel, stripId_t strip, std::uint16_t noise, float gain, bool bad) {
- gain_[apvIndex(fed, channel, strip)] = gain;
- noise_[stripIndex(fed, channel, strip)] = noise;
- if (bad) {
- noise_[stripIndex(fed, channel, strip)] |= badBit;
- }
- }
-
- void setInvThickness(fedId_t fed, fedCh_t channel, float invthick) {
- invthick_[channelIndex(fed, channel)] = invthick;
- }
-
- // Holds the data in pinned CPU memory
- std::vector> noise_;
- std::vector> invthick_;
- std::vector> detID_;
- std::vector> iPair_;
- std::vector> gain_;
-
- // Helper that takes care of complexity of transferring the data to
- // multiple devices
- cms::cuda::ESProduct gpuData_;
- DetToFeds detToFeds_;
- };
-} // namespace stripgpu
-
-#endif
diff --git a/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc b/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc
index 05530484f14c4..ddf7a0ffb914f 100644
--- a/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc
+++ b/CalibFormats/SiStripObjects/src/EventSetup_Registration.cc
@@ -23,6 +23,3 @@ TYPELOOKUP_DATA_REG(SiStripQuality);
#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditions.h"
TYPELOOKUP_DATA_REG(SiStripClusterizerConditions);
-
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-TYPELOOKUP_DATA_REG(stripgpu::SiStripClusterizerConditionsGPU);
diff --git a/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc b/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc
deleted file mode 100644
index 33d0889ff5550..0000000000000
--- a/CalibFormats/SiStripObjects/src/SiStripClusterizerConditionsGPU.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-
-#include "CondFormats/SiStripObjects/interface/SiStripNoises.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripGain.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripDetCabling.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripQuality.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-
-#include "DataFormats/SiStripCluster/interface/SiStripClusterTools.h"
-
-namespace stripgpu {
- SiStripClusterizerConditionsGPU::SiStripClusterizerConditionsGPU(const SiStripQuality& quality,
- const SiStripGain* gains,
- const SiStripNoises& noises)
-
- : noise_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH),
- invthick_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED),
- detID_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED),
- iPair_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED),
- gain_(sistrip::NUMBER_OF_FEDS * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED) {
- // connected: map>
- // map of KEY=detid DATA=vector of apvs, maximum 6 APVs per detector module :
- const auto& connected = quality.cabling()->connected();
- // detCabling: map
- // map of KEY=detid DATA=vector
- const auto& detCabling = quality.cabling()->getDetCabling();
-
- for (const auto& conn : connected) {
- const auto det = conn.first;
- if (!quality.IsModuleBad(det)) {
- const auto detConn_it = detCabling.find(det);
-
- if (detCabling.end() != detConn_it) {
- for (const auto& chan : (*detConn_it).second) {
- if (chan && chan->fedId() && chan->isConnected()) {
- const auto detID = chan->detId();
- const auto fedID = chan->fedId();
- const auto fedCh = chan->fedCh();
- const auto iPair = chan->apvPairNumber();
-
- detToFeds_.emplace_back(detID, iPair, fedID, fedCh);
-
- detID_[channelIndex(fedID, fedCh)] = detID;
- iPair_[channelIndex(fedID, fedCh)] = iPair;
- setInvThickness(fedID, fedCh, siStripClusterTools::sensorThicknessInverse(detID));
-
- auto offset = 256 * iPair;
-
- for (auto strip = 0; strip < 256; ++strip) {
- const auto gainRange = gains->getRange(det);
-
- const auto detstrip = strip + offset;
- const std::uint16_t noise = SiStripNoises::getRawNoise(detstrip, noises.getRange(det));
- const auto gain = SiStripGain::getStripGain(detstrip, gainRange);
- const auto bad = quality.IsStripBad(quality.getRange(det), detstrip);
-
- // gain is actually stored per-APV, not per-strip
- setStrip(fedID, fedCh, detstrip, noise, gain, bad);
- }
- }
- }
- }
- }
- }
-
- std::sort(detToFeds_.begin(), detToFeds_.end(), [](const DetToFed& a, const DetToFed& b) {
- return a.detID() < b.detID() || (a.detID() == b.detID() && a.pair() < b.pair());
- });
- }
-
- SiStripClusterizerConditionsGPU::Data const& SiStripClusterizerConditionsGPU::getGPUProductAsync(
- cudaStream_t stream) const {
- auto const& data = gpuData_.dataForCurrentDeviceAsync(stream, [this](Data& data, cudaStream_t stream) {
- data.noise_ = cms::cuda::make_device_unique(noise_.size(), stream);
- data.invthick_ = cms::cuda::make_device_unique(invthick_.size(), stream);
- data.detID_ = cms::cuda::make_device_unique(detID_.size(), stream);
- data.iPair_ = cms::cuda::make_device_unique(iPair_.size(), stream);
- data.gain_ = cms::cuda::make_device_unique(gain_.size(), stream);
-
- cms::cuda::copyAsync(data.noise_, noise_, stream);
- cms::cuda::copyAsync(data.invthick_, invthick_, stream);
- cms::cuda::copyAsync(data.detID_, detID_, stream);
- cms::cuda::copyAsync(data.iPair_, iPair_, stream);
- cms::cuda::copyAsync(data.gain_, gain_, stream);
-
- data.hostView_ = cms::cuda::make_host_unique(stream);
- data.hostView_->noise_ = data.noise_.get();
- data.hostView_->invthick_ = data.invthick_.get();
- data.hostView_->detID_ = data.detID_.get();
- data.hostView_->iPair_ = data.iPair_.get();
- data.hostView_->gain_ = data.gain_.get();
-
- data.deviceView_ = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(data.deviceView_, data.hostView_, stream);
- });
-
- return data;
- }
-} // namespace stripgpu
diff --git a/CommonTools/TrackerMap/BuildFile.xml b/CommonTools/TrackerMap/BuildFile.xml
index f620aeb6e2bb1..01188ecdc9d64 100644
--- a/CommonTools/TrackerMap/BuildFile.xml
+++ b/CommonTools/TrackerMap/BuildFile.xml
@@ -1,3 +1,5 @@
+
+
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiStripClusterizer/plugins/BuildFile.xml
index 993015c581591..c0472117bc677 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/BuildFile.xml
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/BuildFile.xml
@@ -1,11 +1,7 @@
-
+
-
-
-
-
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.cc b/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.cc
deleted file mode 100644
index 5b721778284d4..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include
-
-#include
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-
-#include "ChannelLocsGPU.h"
-
-ChannelLocs::ChannelLocs(size_t size, cudaStream_t stream) : ChannelLocsBase(size) {
- if (size > 0) {
- input_ = cms::cuda::make_host_unique(size, stream);
- inoff_ = cms::cuda::make_host_unique(size, stream);
- offset_ = cms::cuda::make_host_unique(size, stream);
- length_ = cms::cuda::make_host_unique(size, stream);
- fedID_ = cms::cuda::make_host_unique(size, stream);
- fedCh_ = cms::cuda::make_host_unique(size, stream);
- detID_ = cms::cuda::make_host_unique(size, stream);
- }
-}
-
-void ChannelLocsView::fill(const ChannelLocsGPU& c) {
- input_ = c.input();
- inoff_ = c.inoff();
- offset_ = c.offset();
- length_ = c.length();
- fedID_ = c.fedID();
- fedCh_ = c.fedCh();
- detID_ = c.detID();
- size_ = c.size();
-}
-
-ChannelLocsGPU::ChannelLocsGPU(size_t size, cudaStream_t stream) : ChannelLocsBase(size) {
- if (size > 0) {
- input_ = cms::cuda::make_device_unique(size, stream);
- inoff_ = cms::cuda::make_device_unique(size, stream);
- offset_ = cms::cuda::make_device_unique(size, stream);
- length_ = cms::cuda::make_device_unique(size, stream);
- fedID_ = cms::cuda::make_device_unique(size, stream);
- fedCh_ = cms::cuda::make_device_unique(size, stream);
- detID_ = cms::cuda::make_device_unique(size, stream);
-
- auto channelLocsView = cms::cuda::make_host_unique(stream);
- channelLocsView->fill(*this);
- channelLocsViewGPU_ = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(channelLocsViewGPU_, channelLocsView, stream);
- }
-}
-
-void ChannelLocsGPU::setVals(const ChannelLocs* c,
- cms::cuda::host::unique_ptr inputGPU,
- cudaStream_t stream) {
- assert(c->size() == size_);
- cms::cuda::copyAsync(input_, inputGPU, size_, stream);
- cms::cuda::copyAsync(inoff_, c->inoff_, size_, stream);
- cms::cuda::copyAsync(offset_, c->offset_, size_, stream);
- cms::cuda::copyAsync(length_, c->length_, size_, stream);
- cms::cuda::copyAsync(fedID_, c->fedID_, size_, stream);
- cms::cuda::copyAsync(fedCh_, c->fedCh_, size_, stream);
- cms::cuda::copyAsync(detID_, c->detID_, size_, stream);
-}
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.h b/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.h
deleted file mode 100644
index 343d5a536f035..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.h
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef RecoLocalTracker_SiStripClusterizer_plugins_ChannelLocsGPU_h
-#define RecoLocalTracker_SiStripClusterizer_plugins_ChannelLocsGPU_h
-
-#include
-#include
-
-#include
-
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "DataFormats/SiStripCluster/interface/SiStripTypes.h"
-
-class ChannelLocsGPU;
-
-template class T>
-class ChannelLocsBase {
-public:
- ChannelLocsBase(size_t size) : size_(size) {}
- virtual ~ChannelLocsBase() = default;
-
- ChannelLocsBase(ChannelLocsBase&& arg)
- : input_(std::move(arg.input_)),
- inoff_(std::move(arg.inoff_)),
- offset_(std::move(arg.offset_)),
- length_(std::move(arg.length_)),
- fedID_(std::move(arg.fedID_)),
- fedCh_(std::move(arg.fedCh_)),
- detID_(std::move(arg.detID_)),
- size_(arg.size_) {}
-
- void setChannelLoc(uint32_t index,
- const uint8_t* input,
- size_t inoff,
- size_t offset,
- uint16_t length,
- stripgpu::fedId_t fedID,
- stripgpu::fedCh_t fedCh,
- stripgpu::detId_t detID) {
- input_[index] = input;
- inoff_[index] = inoff;
- offset_[index] = offset;
- length_[index] = length;
- fedID_[index] = fedID;
- fedCh_[index] = fedCh;
- detID_[index] = detID;
- }
-
- size_t size() const { return size_; }
-
- const uint8_t* input(uint32_t index) const { return input_[index]; }
- size_t inoff(uint32_t index) const { return inoff_[index]; }
- size_t offset(uint32_t index) const { return offset_[index]; }
- uint16_t length(uint32_t index) const { return length_[index]; }
- stripgpu::fedId_t fedID(uint32_t index) const { return fedID_[index]; }
- stripgpu::fedCh_t fedCh(uint32_t index) const { return fedCh_[index]; }
- stripgpu::detId_t detID(uint32_t index) const { return detID_[index]; }
-
- const uint8_t* const* input() const { return input_.get(); }
- size_t* inoff() const { return inoff_.get(); }
- size_t* offset() const { return offset_.get(); }
- uint16_t* length() const { return length_.get(); }
- stripgpu::fedId_t* fedID() const { return fedID_.get(); }
- stripgpu::fedCh_t* fedCh() const { return fedCh_.get(); }
- stripgpu::detId_t* detID() const { return detID_.get(); }
-
-protected:
- T input_; // input raw data for channel
- T inoff_; // offset in input raw data
- T offset_; // global offset in alldata
- T length_; // length of channel data
- T fedID_;
- T fedCh_;
- T detID_;
- size_t size_ = 0;
-};
-
-class ChannelLocs : public ChannelLocsBase {
- friend class ChannelLocsGPU;
-
-public:
- ChannelLocs(size_t size, cudaStream_t stream);
- ChannelLocs(ChannelLocs&& arg) : ChannelLocsBase(std::move(arg)) {}
-
- ChannelLocs(ChannelLocs&) = delete;
- ChannelLocs(const ChannelLocs&) = delete;
- ChannelLocs& operator=(const ChannelLocs&) = delete;
- ChannelLocs& operator=(ChannelLocs&&) = delete;
-
- ~ChannelLocs() override = default;
-};
-
-class ChannelLocsView {
-public:
- void fill(const ChannelLocsGPU& c);
-
- __device__ size_t size() const { return size_; }
-
- __device__ const uint8_t* input(uint32_t index) const { return input_[index]; }
- __device__ size_t inoff(uint32_t index) const { return inoff_[index]; }
- __device__ size_t offset(uint32_t index) const { return offset_[index]; }
- __device__ uint16_t length(uint32_t index) const { return length_[index]; }
- __device__ stripgpu::fedId_t fedID(uint32_t index) const { return fedID_[index]; }
- __device__ stripgpu::fedCh_t fedCh(uint32_t index) const { return fedCh_[index]; }
- __device__ stripgpu::detId_t detID(uint32_t index) const { return detID_[index]; }
-
-private:
- const uint8_t* const* input_; // input raw data for channel
- size_t* inoff_; // offset in input raw data
- size_t* offset_; // global offset in alldata
- uint16_t* length_; // length of channel data
- stripgpu::fedId_t* fedID_;
- stripgpu::fedCh_t* fedCh_;
- stripgpu::detId_t* detID_;
- size_t size_;
-};
-
-class ChannelLocsGPU : public ChannelLocsBase {
-public:
- //using Base = ChannelLocsBase;
- ChannelLocsGPU(size_t size, cudaStream_t stream);
- ChannelLocsGPU(ChannelLocsGPU&& arg)
- : ChannelLocsBase(std::move(arg)), channelLocsViewGPU_(std::move(arg.channelLocsViewGPU_)) {}
-
- ChannelLocsGPU(ChannelLocsGPU&) = delete;
- ChannelLocsGPU(const ChannelLocsGPU&) = delete;
- ChannelLocsGPU& operator=(const ChannelLocsGPU&) = delete;
- ChannelLocsGPU& operator=(ChannelLocsGPU&&) = delete;
-
- ~ChannelLocsGPU() override = default;
-
- void setVals(const ChannelLocs* c, cms::cuda::host::unique_ptr inputGPU, cudaStream_t stream);
- const ChannelLocsView* channelLocsView() const { return channelLocsViewGPU_.get(); }
-
-private:
- cms::cuda::device::unique_ptr channelLocsViewGPU_;
-};
-
-#endif
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc b/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc
deleted file mode 100644
index 2d7b4c83a4a4a..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- */
-#include "RecoLocalTracker/Records/interface/SiStripClusterizerConditionsRcd.h"
-
-#include "RecoLocalTracker/SiStripClusterizer/interface/StripClusterizerAlgorithmFactory.h"
-
-#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
-#include "EventFilter/SiStripRawToDigi/interface/SiStripFEDBuffer.h"
-
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditions.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Utilities/interface/Likely.h"
-
-#include "FWCore/MessageLogger/interface/MessageLogger.h"
-
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-
-#include "SiStripRawToClusterGPUKernel.h"
-#include "ChannelLocsGPU.h"
-
-//#include
-#include
-#include
-
-namespace {
- std::unique_ptr fillBuffer(int fedId, const FEDRawData& rawData) {
- std::unique_ptr buffer;
-
- // Check on FEDRawData pointer
- const auto st_buffer = sistrip::preconstructCheckFEDBuffer(rawData);
- if UNLIKELY (sistrip::FEDBufferStatusCode::SUCCESS != st_buffer) {
- LogDebug(sistrip::mlRawToCluster_) << "[ClustersFromRawProducer::" << __func__ << "]" << st_buffer
- << " for FED ID " << fedId;
- return buffer;
- }
- buffer = std::make_unique(rawData);
- const auto st_chan = buffer->findChannels();
- if UNLIKELY (sistrip::FEDBufferStatusCode::SUCCESS != st_chan) {
- LogDebug(sistrip::mlRawToCluster_) << "Exception caught when creating FEDBuffer object for FED " << fedId << ": "
- << st_chan;
- buffer.reset();
- return buffer;
- }
- if UNLIKELY (!buffer->doChecks(false)) {
- LogDebug(sistrip::mlRawToCluster_) << "Exception caught when creating FEDBuffer object for FED " << fedId
- << ": FED Buffer check fails";
- buffer.reset();
- return buffer;
- }
-
- return buffer;
- }
-} // namespace
-
-class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer {
-public:
- explicit SiStripClusterizerFromRawGPU(const edm::ParameterSet& conf)
- : buffers_(sistrip::FED_ID_MAX),
- raw_(sistrip::FED_ID_MAX),
- gpuAlgo_(conf.getParameter("Clusterizer")),
- inputToken_(consumes(conf.getParameter("ProductLabel"))),
- outputToken_(produces>()),
- conditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})),
- cpuConditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})) {}
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
- void acquire(edm::Event const& ev,
- edm::EventSetup const& es,
- edm::WaitingTaskWithArenaHolder waitingTaskHolder) override {
- const auto& conditions = es.getData(conditionsToken_); //these need to be GPU conditions
- const auto& cpuConditions = es.getData(cpuConditionsToken_); //CPU conditions
-
- // Sets the current device and creates a CUDA stream
- cms::cuda::ScopedContextAcquire ctx{ev.streamID(), std::move(waitingTaskHolder), ctxState_};
-
- // get raw data
- auto const& rawData = ev.get(inputToken_);
- run(rawData, cpuConditions);
-
- // Queues asynchronous data transfers and kernels to the CUDA stream
- // returned by cms::cuda::ScopedContextAcquire::stream()
- gpuAlgo_.makeAsync(raw_, buffers_, conditions, ctx.stream());
-
- // Destructor of ctx queues a callback to the CUDA stream notifying
- // waitingTaskHolder when the queued asynchronous work has finished
- }
-
- void produce(edm::Event& ev, const edm::EventSetup& es) override {
- cms::cuda::ScopedContextProduce ctx{ctxState_};
-
- // Now getResult() returns data in GPU memory that is passed to the
- // constructor of OutputData. cms::cuda::ScopedContextProduce::emplace() wraps the
- // OutputData to cms::cuda::Product. cms::cuda::Product stores also
- // the current device and the CUDA stream since those will be needed
- // in the consumer side.
- ctx.emplace(ev, outputToken_, gpuAlgo_.getResults(ctx.stream()));
-
- for (auto& buf : buffers_)
- buf.reset(nullptr);
- }
-
-private:
- void run(const FEDRawDataCollection& rawColl, const SiStripClusterizerConditions& conditions);
- void fill(uint32_t idet, const FEDRawDataCollection& rawColl, const SiStripClusterizerConditions& conditions);
-
-private:
- std::vector> buffers_;
- std::vector raw_;
- cms::cuda::ContextState ctxState_;
-
- stripgpu::SiStripRawToClusterGPUKernel gpuAlgo_;
-
- edm::EDGetTokenT inputToken_;
- edm::EDPutTokenT> outputToken_;
- edm::ESGetToken conditionsToken_;
- edm::ESGetToken cpuConditionsToken_;
-};
-
-void SiStripClusterizerFromRawGPU::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
- edm::ParameterSetDescription desc;
-
- desc.add("ProductLabel", edm::InputTag("rawDataCollector"));
- desc.add("ConditionsLabel", "");
-
- edm::ParameterSetDescription clusterizer;
- StripClusterizerAlgorithmFactory::fillDescriptions(clusterizer);
- desc.add("Clusterizer", clusterizer);
-
- descriptions.addWithDefaultLabel(desc);
-}
-
-void SiStripClusterizerFromRawGPU::run(const FEDRawDataCollection& rawColl,
- const SiStripClusterizerConditions& conditions) {
- // loop over good det in cabling
- for (auto idet : conditions.allDetIds()) {
- fill(idet, rawColl, conditions);
- } // end loop over dets
-}
-
-void SiStripClusterizerFromRawGPU::fill(uint32_t idet,
- const FEDRawDataCollection& rawColl,
- const SiStripClusterizerConditions& conditions) {
- auto const& det = conditions.findDetId(idet);
- if (!det.valid())
- return;
-
- // Loop over apv-pairs of det
- for (auto const conn : conditions.currentConnection(det)) {
- if UNLIKELY (!conn)
- continue;
-
- const uint16_t fedId = conn->fedId();
-
- // If fed id is null or connection is invalid continue
- if UNLIKELY (!fedId || !conn->isConnected()) {
- continue;
- }
-
- // If Fed hasnt already been initialised, extract data and initialise
- sistrip::FEDBuffer* buffer = buffers_[fedId].get();
- if (!buffer) {
- const FEDRawData& rawData = rawColl.FEDData(fedId);
- raw_[fedId] = &rawData;
- buffers_[fedId] = fillBuffer(fedId, rawData);
- }
- } // end loop over conn
-}
-
-#include "FWCore/Framework/interface/MakerMacros.h"
-DEFINE_FWK_MODULE(SiStripClusterizerFromRawGPU);
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClusterizerConditionsGPUESProducer.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClusterizerConditionsGPUESProducer.cc
deleted file mode 100644
index 67f4f3152dd9c..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClusterizerConditionsGPUESProducer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/**\class SiStripClusterizerConditionsGPUESProducer
- *
- * Create a GPU cache object for fast access to conditions needed by the SiStrip clusterizer
- *
- * @see SiStripClusterizerConditions
- */
-#include
-
-#include "FWCore/Framework/interface/ModuleFactory.h"
-#include "FWCore/Framework/interface/ESProducer.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-
-#include "RecoLocalTracker/Records/interface/SiStripClusterizerConditionsRcd.h"
-
-#include "CalibFormats/SiStripObjects/interface/SiStripGain.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripDetCabling.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-
-using namespace stripgpu;
-
-class SiStripClusterizerConditionsGPUESProducer : public edm::ESProducer {
-public:
- SiStripClusterizerConditionsGPUESProducer(const edm::ParameterSet&);
- ~SiStripClusterizerConditionsGPUESProducer() override {}
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
- using ReturnType = std::unique_ptr;
- ReturnType produce(const SiStripClusterizerConditionsRcd&);
-
-private:
- edm::ESGetToken gainToken_;
- edm::ESGetToken noisesToken_;
- edm::ESGetToken qualityToken_;
-};
-
-SiStripClusterizerConditionsGPUESProducer::SiStripClusterizerConditionsGPUESProducer(const edm::ParameterSet& iConfig) {
- auto cc = setWhatProduced(this, iConfig.getParameter("Label"));
-
- gainToken_ = cc.consumesFrom();
- noisesToken_ = cc.consumesFrom();
- qualityToken_ = cc.consumesFrom(
- edm::ESInputTag{"", iConfig.getParameter("QualityLabel")});
-}
-
-void SiStripClusterizerConditionsGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
- edm::ParameterSetDescription desc;
- desc.add("QualityLabel", "");
- desc.add("Label", "");
- descriptions.add("SiStripClusterizerConditionsGPUESProducer", desc);
-}
-
-SiStripClusterizerConditionsGPUESProducer::ReturnType SiStripClusterizerConditionsGPUESProducer::produce(
- const SiStripClusterizerConditionsRcd& iRecord) {
- auto gainsH = iRecord.getTransientHandle(gainToken_);
- const auto& noises = iRecord.get(noisesToken_);
- const auto& quality = iRecord.get(qualityToken_);
-
- return std::make_unique(quality, gainsH.product(), noises);
-}
-
-DEFINE_FWK_EVENTSETUP_MODULE(SiStripClusterizerConditionsGPUESProducer);
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc
deleted file mode 100644
index 8b891382085c7..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- */
-#include "DataFormats/SiStripCluster/interface/SiStripCluster.h"
-#include "DataFormats/Common/interface/DetSetVectorNew.h"
-
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/MessageLogger/interface/MessageLogger.h"
-
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-
-#include
-
-class SiStripClustersFromSOA final : public edm::stream::EDProducer<> {
-public:
- explicit SiStripClustersFromSOA(const edm::ParameterSet& conf)
- : inputToken_(consumes(conf.getParameter("ProductLabel"))),
- outputToken_(produces>()) {}
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
- edm::ParameterSetDescription desc;
-
- desc.add("ProductLabel", edm::InputTag("siStripClustersSOAtoHost"));
- descriptions.addWithDefaultLabel(desc);
- }
-
-private:
- void produce(edm::Event& ev, const edm::EventSetup& es) override {
- const auto& clust_data = ev.get(inputToken_);
-
- const int nSeedStripsNC = clust_data.nClusters();
- const auto clusterSize = clust_data.clusterSize().get();
- const auto clusterADCs = clust_data.clusterADCs().get();
- const auto detIDs = clust_data.clusterDetId().get();
- const auto stripIDs = clust_data.firstStrip().get();
- const auto trueCluster = clust_data.trueCluster().get();
-
- const unsigned int initSeedStripsSize = 15000;
-
- using out_t = edmNew::DetSetVector;
- auto output{std::make_unique(edmNew::DetSetVector())};
- output->reserve(initSeedStripsSize, nSeedStripsNC);
-
- std::vector adcs;
-
- for (int i = 0; i < nSeedStripsNC;) {
- const auto detid = detIDs[i];
- out_t::FastFiller record(*output, detid);
-
- while (i < nSeedStripsNC && detIDs[i] == detid) {
- if (trueCluster[i]) {
- const auto size = clusterSize[i];
- const auto firstStrip = stripIDs[i];
-
- adcs.clear();
- adcs.reserve(size);
-
- for (uint32_t j = 0; j < size; ++j) {
- adcs.push_back(clusterADCs[i + j * nSeedStripsNC]);
- }
- record.push_back(SiStripCluster(firstStrip, std::move(adcs)));
- }
- i++;
- }
- }
-
- output->shrink_to_fit();
- ev.put(std::move(output));
- }
-
-private:
- edm::EDGetTokenT inputToken_;
- edm::EDPutTokenT> outputToken_;
-};
-
-#include "FWCore/Framework/interface/MakerMacros.h"
-DEFINE_FWK_MODULE(SiStripClustersFromSOA);
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc
deleted file mode 100644
index a51bb1216d0e0..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- */
-#include "DataFormats/SiStripCluster/interface/SiStripCluster.h"
-
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/MessageLogger/interface/MessageLogger.h"
-
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-
-#include
-
-class SiStripSOAtoHost {
-public:
- SiStripSOAtoHost() = default;
- void makeAsync(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
- hostView_ = std::make_unique(clusters_d, stream);
- }
- std::unique_ptr getResults() { return std::move(hostView_); }
-
-private:
- std::unique_ptr hostView_;
-};
-
-class SiStripClustersSOAtoHost final : public edm::stream::EDProducer {
-public:
- explicit SiStripClustersSOAtoHost(const edm::ParameterSet& conf)
- : inputToken_(
- consumes>(conf.getParameter("ProductLabel"))),
- outputToken_(produces()) {}
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
- edm::ParameterSetDescription desc;
-
- desc.add("ProductLabel", edm::InputTag("siStripClusterizerFromRawGPU"));
- descriptions.addWithDefaultLabel(desc);
- }
-
-private:
- void acquire(edm::Event const& ev,
- edm::EventSetup const& es,
- edm::WaitingTaskWithArenaHolder waitingTaskHolder) override {
- const auto& wrapper = ev.get(inputToken_);
-
- // Sets the current device and creates a CUDA stream
- cms::cuda::ScopedContextAcquire ctx{wrapper, std::move(waitingTaskHolder)};
-
- const auto& input = ctx.get(wrapper);
-
- // Queues asynchronous data transfers and kernels to the CUDA stream
- // returned by cms::cuda::ScopedContextAcquire::stream()
- gpuAlgo_.makeAsync(input, ctx.stream());
-
- // Destructor of ctx queues a callback to the CUDA stream notifying
- // waitingTaskHolder when the queued asynchronous work has finished
- }
-
- void produce(edm::Event& ev, const edm::EventSetup& es) override { ev.put(gpuAlgo_.getResults()); }
-
-private:
- SiStripSOAtoHost gpuAlgo_;
-
- edm::EDGetTokenT> inputToken_;
- edm::EDPutTokenT outputToken_;
-};
-
-#include "FWCore/Framework/interface/MakerMacros.h"
-DEFINE_FWK_MODULE(SiStripClustersSOAtoHost);
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc
deleted file mode 100644
index a8b9aa04a00b3..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-#include "EventFilter/SiStripRawToDigi/interface/SiStripFEDBuffer.h"
-#include "DataFormats/Common/interface/DetSetVectorNew.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "RecoLocalTracker/SiStripClusterizer/interface/ClusterChargeCut.h"
-
-#include "SiStripRawToClusterGPUKernel.h"
-
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-#include "ChannelLocsGPU.h"
-#include "StripDataView.h"
-
-namespace stripgpu {
- StripDataGPU::StripDataGPU(size_t size, cudaStream_t stream) {
- alldataGPU_ = cms::cuda::make_device_unique(size, stream);
- channelGPU_ = cms::cuda::make_device_unique(size, stream);
- stripIdGPU_ = cms::cuda::make_device_unique(size, stream);
- }
-
- SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel(const edm::ParameterSet& conf)
- : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
- channelThreshold_(conf.getParameter("ChannelThreshold")),
- seedThreshold_(conf.getParameter("SeedThreshold")),
- clusterThresholdSquared_(std::pow(conf.getParameter("ClusterThreshold"), 2.0f)),
- maxSequentialHoles_(conf.getParameter("MaxSequentialHoles")),
- maxSequentialBad_(conf.getParameter("MaxSequentialBad")),
- maxAdjacentBad_(conf.getParameter("MaxAdjacentBad")),
- maxClusterSize_(conf.getParameter("MaxClusterSize")),
- minGoodCharge_(clusterChargeCut(conf)) {
- fedRawDataOffsets_.reserve(sistrip::NUMBER_OF_FEDS);
- }
-
- void SiStripRawToClusterGPUKernel::makeAsync(const std::vector& rawdata,
- const std::vector>& buffers,
- const SiStripClusterizerConditionsGPU& conditions,
- cudaStream_t stream) {
- size_t totalSize{0};
- for (const auto& buff : buffers) {
- if (buff != nullptr) {
- totalSize += buff->bufferSize();
- }
- }
-
- auto fedRawDataHost = cms::cuda::make_host_unique(totalSize, stream);
- auto fedRawDataGPU = cms::cuda::make_device_unique(totalSize, stream);
-
- size_t off = 0;
- fedRawDataOffsets_.clear();
- fedIndex_.clear();
- fedIndex_.resize(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed);
-
- sistrip::FEDReadoutMode mode = sistrip::READOUT_MODE_INVALID;
-
- for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
- auto& buff = buffers[fedi];
- if (buff != nullptr) {
- const auto raw = rawdata[fedi];
- memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
- fedIndex_[stripgpu::fedIndex(fedi)] = fedRawDataOffsets_.size();
- fedRawDataOffsets_.push_back(off);
- off += raw->size();
- if (fedRawDataOffsets_.size() == 1) {
- mode = buff->readoutMode();
- } else {
- if (buff->readoutMode() != mode) {
- throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
- << buff->readoutMode() << " != " << mode;
- }
- }
- }
- }
- // send rawdata to GPU
- cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);
-
- const auto& detmap = conditions.detToFeds();
- if ((mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED) && (mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10)) {
- throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
- }
- const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
- size_t offset = 0;
- auto chanlocs = std::make_unique(detmap.size(), stream);
- auto inputGPU = cms::cuda::make_host_unique(chanlocs->size(), stream);
-
- // iterate over the detector in DetID/APVPair order
- // mapping out where the data are
- for (size_t i = 0; i < detmap.size(); ++i) {
- const auto& detp = detmap[i];
- const auto fedId = detp.fedID();
- const auto fedCh = detp.fedCh();
- const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];
-
- if (fedi != invalidFed) {
- const auto buffer = buffers[fedId].get();
- const auto& channel = buffer->channel(detp.fedCh());
-
- auto len = channel.length();
- auto off = channel.offset();
-
- assert(len >= headerlen || len == 0);
-
- if (len >= headerlen) {
- len -= headerlen;
- off += headerlen;
- }
-
- chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
- inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
- offset += len;
-
- } else {
- chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
- inputGPU[i] = nullptr;
- }
- }
-
- const auto n_strips = offset;
-
- sst_data_d_ = cms::cuda::make_host_unique(stream);
- sst_data_d_->nStrips = n_strips;
-
- chanlocsGPU_ = std::make_unique(detmap.size(), stream);
- chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);
-
- stripdata_ = std::make_unique(n_strips, stream);
-
- const auto& condGPU = conditions.getGPUProductAsync(stream);
-
- unpackChannelsGPU(condGPU.deviceView(), stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
-#ifdef EDM_ML_DEBUG
- auto outdata = cms::cuda::make_host_unique(n_strips, stream);
- cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
- cudaCheck(cudaStreamSynchronize(stream));
-
- constexpr int xor3bits = 7;
- for (size_t i = 0; i < chanlocs->size(); ++i) {
- const auto data = chanlocs->input(i);
- const auto len = chanlocs->length(i);
-
- if (data != nullptr && len > 0) {
- auto aoff = chanlocs->offset(i);
- auto choff = chanlocs->inoff(i);
- const auto end = choff + len;
-
- while (choff < end) {
- const auto stripIndex = data[choff++ ^ xor3bits];
- const auto groupLength = data[choff++ ^ xor3bits];
- aoff += 2;
- for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
- if (data[choff ^ xor3bits] != outdata[aoff]) {
- LogDebug("SiStripRawToClusterGPUKernel")
- << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
- << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
- }
- }
- }
- }
- }
- outdata.reset(nullptr);
-#endif
-
- fedRawDataGPU.reset();
- allocateSSTDataGPU(n_strips, stream);
- setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
-
- clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
- findClusterGPU(condGPU.deviceView(), stream);
-
- stripdata_.reset();
- }
-
- SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
- reset();
-
- return std::move(clusters_d_);
- }
-
- void SiStripRawToClusterGPUKernel::reset() {
- chanlocsGPU_.reset();
- sst_data_d_.reset();
- }
-} // namespace stripgpu
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu
deleted file mode 100644
index c16b4fed30448..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu
+++ /dev/null
@@ -1,581 +0,0 @@
-//#define GPU_DEBUG
-#if defined(EDM_ML_DEBUG) || defined(GPU_DEBUG)
-#define GPU_CHECK
-#include
-#endif
-
-#include
-// prevent _Float16 defined by CUDA headers from hiding the ISO C type used by GCC
-#ifdef _Float16
-#undef _Float16
-#endif
-
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/allocate_host.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-
-#include "ChannelLocsGPU.h"
-#include "SiStripRawToClusterGPUKernel.h"
-#include "StripDataView.h"
-
-using namespace stripgpu;
-using ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView;
-
-__global__ static void unpackChannels(const ChannelLocsView *chanlocs,
- const ConditionsDeviceView *conditions,
- uint8_t *alldata,
- uint16_t *channel,
- stripId_t *stripId) {
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
-
- const auto first = nthreads * bid + tid;
- const auto stride = blockDim.x * gridDim.x;
- for (auto chan = first; chan < chanlocs->size(); chan += stride) {
- const auto fedid = chanlocs->fedID(chan);
- const auto fedch = chanlocs->fedCh(chan);
- const auto ipair = conditions->iPair(fedid, fedch);
- const auto ipoff = sistrip::STRIPS_PER_FEDCH * ipair;
-
- const auto data = chanlocs->input(chan);
- const auto len = chanlocs->length(chan);
-
- if (data != nullptr && len > 0) {
- auto aoff = chanlocs->offset(chan);
- auto choff = chanlocs->inoff(chan);
- const auto end = choff + len;
-
- while (choff < end) {
- auto stripIndex = data[(choff++) ^ 7] + ipoff;
- const auto groupLength = data[(choff++) ^ 7];
-
- for (auto i = 0; i < 2; ++i) {
- stripId[aoff] = invalidStrip;
- alldata[aoff++] = 0;
- }
-
- for (auto i = 0; i < groupLength; ++i) {
- stripId[aoff] = stripIndex++;
- channel[aoff] = chan;
- alldata[aoff++] = data[(choff++) ^ 7];
- }
- }
- } // choff < end
- } // data != nullptr && len > 0
-} // chan < chanlocs->size()
-
-__global__ static void setSeedStripsGPU(StripDataView *sst_data_d, const ConditionsDeviceView *conditions) {
- const int nStrips = sst_data_d->nStrips;
- const auto __restrict__ chanlocs = sst_data_d->chanlocs;
- const uint8_t *__restrict__ adc = sst_data_d->adc;
- const uint16_t *__restrict__ channels = sst_data_d->channel;
- const uint16_t *__restrict__ stripId = sst_data_d->stripId;
- int *__restrict__ seedStripsMask = sst_data_d->seedStripsMask;
- int *__restrict__ seedStripsNCMask = sst_data_d->seedStripsNCMask;
- const float seedThreshold = sst_data_d->seedThreshold;
-
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
- const int first = nthreads * bid + tid;
- const int stride = blockDim.x * gridDim.x;
-
- for (int i = first; i < nStrips; i += stride) {
- seedStripsMask[i] = 0;
- seedStripsNCMask[i] = 0;
- const stripId_t strip = stripId[i];
- if (strip != invalidStrip) {
- const auto chan = channels[i];
- const fedId_t fed = chanlocs->fedID(chan);
- const fedCh_t channel = chanlocs->fedCh(chan);
- const float noise_i = conditions->noise(fed, channel, strip);
- const uint8_t adc_i = adc[i];
-
- seedStripsMask[i] = (adc_i >= static_cast(noise_i * seedThreshold)) ? 1 : 0;
- seedStripsNCMask[i] = seedStripsMask[i];
- }
- }
-}
-
-__global__ static void setNCSeedStripsGPU(StripDataView *sst_data_d, const ConditionsDeviceView *conditions) {
- const int nStrips = sst_data_d->nStrips;
- const auto __restrict__ chanlocs = sst_data_d->chanlocs;
- const uint16_t *__restrict__ channels = sst_data_d->channel;
- const uint16_t *__restrict__ stripId = sst_data_d->stripId;
- const int *__restrict__ seedStripsMask = sst_data_d->seedStripsMask;
- int *__restrict__ seedStripsNCMask = sst_data_d->seedStripsNCMask;
-
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
- const int first = nthreads * bid + tid;
- const int stride = blockDim.x * gridDim.x;
-
- for (int i = first; i < nStrips; i += stride) {
- if (i > 0) {
- const auto detid = chanlocs->detID(channels[i]);
- const auto detid1 = chanlocs->detID(channels[i - 1]);
-
- if (seedStripsMask[i] && seedStripsMask[i - 1] && (stripId[i] - stripId[i - 1]) == 1 && (detid == detid1))
- seedStripsNCMask[i] = 0;
- }
- }
-}
-
-__global__ static void setStripIndexGPU(StripDataView *sst_data_d) {
- const int nStrips = sst_data_d->nStrips;
- const int *__restrict__ seedStripsNCMask = sst_data_d->seedStripsNCMask;
- const int *__restrict__ prefixSeedStripsNCMask = sst_data_d->prefixSeedStripsNCMask;
- int *__restrict__ seedStripsNCIndex = sst_data_d->seedStripsNCIndex;
-
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
- const int first = nthreads * bid + tid;
- const int stride = blockDim.x * gridDim.x;
-
- for (int i = first; i < nStrips; i += stride) {
- if (seedStripsNCMask[i] == 1) {
- const int index = prefixSeedStripsNCMask[i];
- seedStripsNCIndex[index] = i;
- }
- }
-}
-
-__global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d,
- const ConditionsDeviceView *conditions,
- SiStripClustersCUDADevice::DeviceView *clust_data_d) {
- const int nStrips = sst_data_d->nStrips;
- const int *__restrict__ seedStripsNCIndex = sst_data_d->seedStripsNCIndex;
- const auto __restrict__ chanlocs = sst_data_d->chanlocs;
- const uint16_t *__restrict__ stripId = sst_data_d->stripId;
- const uint16_t *__restrict__ channels = sst_data_d->channel;
- const uint8_t *__restrict__ adc = sst_data_d->adc;
- const int nSeedStripsNC = std::min(kMaxSeedStrips, *(sst_data_d->prefixSeedStripsNCMask + nStrips - 1));
- const uint8_t maxSequentialHoles = sst_data_d->maxSequentialHoles;
- const float channelThreshold = sst_data_d->channelThreshold;
- const float clusterThresholdSquared = sst_data_d->clusterThresholdSquared;
- const int clusterSizeLimit = sst_data_d->clusterSizeLimit;
-
- auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_;
- auto __restrict__ clusterSize = clust_data_d->clusterSize_;
- auto __restrict__ clusterDetId = clust_data_d->clusterDetId_;
- auto __restrict__ firstStrip = clust_data_d->firstStrip_;
- auto __restrict__ trueCluster = clust_data_d->trueCluster_;
-
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
- const int first = nthreads * bid + tid;
- const int stride = blockDim.x * gridDim.x;
-
- for (int i = first; i < nSeedStripsNC; i += stride) {
- const auto index = seedStripsNCIndex[i];
- const auto chan = channels[index];
- const auto fed = chanlocs->fedID(chan);
- const auto channel = chanlocs->fedCh(chan);
- const auto det = chanlocs->detID(chan);
- const auto strip = stripId[index];
- const auto noise_i = conditions->noise(fed, channel, strip);
-
- auto noiseSquared_i = noise_i * noise_i;
- float adcSum_i = static_cast(adc[index]);
- auto testIndex = index - 1;
- auto size = 1;
-
- auto addtocluster = [&](int &indexLR) {
- const auto testchan = channels[testIndex];
- const auto testFed = chanlocs->fedID(testchan);
- const auto testChannel = chanlocs->fedCh(testchan);
- const auto testStrip = stripId[testIndex];
- const auto testNoise = conditions->noise(testFed, testChannel, testStrip);
- const auto testADC = adc[testIndex];
-
- if (testADC >= static_cast(testNoise * channelThreshold)) {
- ++size;
- indexLR = testIndex;
- noiseSquared_i += testNoise * testNoise;
- adcSum_i += static_cast(testADC);
- }
- };
-
- // find left boundary
- auto indexLeft = index;
-
- if (testIndex >= 0 && stripId[testIndex] == invalidStrip) {
- testIndex -= 2;
- }
-
- if (testIndex >= 0) {
- const auto testchan = channels[testIndex];
- const auto testDet = chanlocs->detID(testchan);
- auto rangeLeft = stripId[indexLeft] - stripId[testIndex] - 1;
- auto sameDetLeft = det == testDet;
-
- while (sameDetLeft && rangeLeft >= 0 && rangeLeft <= maxSequentialHoles && size < clusterSizeLimit + 1) {
- addtocluster(indexLeft);
- --testIndex;
- if (testIndex >= 0 && stripId[testIndex] == invalidStrip) {
- testIndex -= 2;
- }
- if (testIndex >= 0) {
- rangeLeft = stripId[indexLeft] - stripId[testIndex] - 1;
- const auto newchan = channels[testIndex];
- const auto newdet = chanlocs->detID(newchan);
- sameDetLeft = det == newdet;
- } else {
- sameDetLeft = false;
- }
- } // while loop
- } // testIndex >= 0
-
- // find right boundary
- auto indexRight = index;
- testIndex = index + 1;
-
- if (testIndex < nStrips && stripId[testIndex] == invalidStrip) {
- testIndex += 2;
- }
-
- if (testIndex < nStrips) {
- const auto testchan = channels[testIndex];
- const auto testDet = chanlocs->detID(testchan);
- auto rangeRight = stripId[testIndex] - stripId[indexRight] - 1;
- auto sameDetRight = det == testDet;
-
- while (sameDetRight && rangeRight >= 0 && rangeRight <= maxSequentialHoles && size < clusterSizeLimit + 1) {
- addtocluster(indexRight);
- ++testIndex;
- if (testIndex < nStrips && stripId[testIndex] == invalidStrip) {
- testIndex += 2;
- }
- if (testIndex < nStrips) {
- rangeRight = stripId[testIndex] - stripId[indexRight] - 1;
- const auto newchan = channels[testIndex];
- const auto newdet = chanlocs->detID(newchan);
- sameDetRight = det == newdet;
- } else {
- sameDetRight = false;
- }
- } // while loop
- } // testIndex < nStrips
- clusterIndexLeft[i] = indexLeft;
- clusterSize[i] = indexRight - indexLeft + 1;
- clusterDetId[i] = det;
- firstStrip[i] = stripId[indexLeft];
- trueCluster[i] =
- (noiseSquared_i * clusterThresholdSquared <= adcSum_i * adcSum_i) and (clusterSize[i] <= clusterSizeLimit);
- } // i < nSeedStripsNC
- if (first == 0) {
- clust_data_d->nClusters_ = nSeedStripsNC;
- }
-}
-
-__global__ static void checkClusterConditionGPU(StripDataView *sst_data_d,
- const ConditionsDeviceView *conditions,
- SiStripClustersCUDADevice::DeviceView *clust_data_d) {
- const uint16_t *__restrict__ stripId = sst_data_d->stripId;
- const auto __restrict__ chanlocs = sst_data_d->chanlocs;
- const uint16_t *__restrict__ channels = sst_data_d->channel;
- const uint8_t *__restrict__ adc = sst_data_d->adc;
- const float minGoodCharge = sst_data_d->minGoodCharge; //1620.0;
- const auto nSeedStripsNC = clust_data_d->nClusters_;
- const auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_;
-
- auto __restrict__ clusterSize = clust_data_d->clusterSize_;
- auto __restrict__ clusterADCs = clust_data_d->clusterADCs_;
- auto __restrict__ trueCluster = clust_data_d->trueCluster_;
- auto __restrict__ barycenter = clust_data_d->barycenter_;
- auto __restrict__ charge = clust_data_d->charge_;
-
- constexpr uint16_t stripIndexMask = 0x7FFF;
-
- const int tid = threadIdx.x;
- const int bid = blockIdx.x;
- const int nthreads = blockDim.x;
- const int first = nthreads * bid + tid;
- const int stride = blockDim.x * gridDim.x;
-
- for (int i = first; i < nSeedStripsNC; i += stride) {
- if (trueCluster[i]) {
- const int left = clusterIndexLeft[i];
- const int size = clusterSize[i];
-
- if (i > 0 && clusterIndexLeft[i - 1] == left) {
- trueCluster[i] = 0; // ignore duplicates
- } else {
- float adcSum = 0.0f;
- int sumx = 0;
- int suma = 0;
-
- auto j = 0;
- for (int k = 0; k < size; k++) {
- const auto index = left + k;
- const auto chan = channels[index];
- const auto fed = chanlocs->fedID(chan);
- const auto channel = chanlocs->fedCh(chan);
- const auto strip = stripId[index];
-#ifdef GPU_CHECK
- if (fed == invalidFed) {
- printf("Invalid fed index %d\n", index);
- }
-#endif
- if (strip != invalidStrip) {
- const float gain_j = conditions->gain(fed, channel, strip);
-
- uint8_t adc_j = adc[index];
- const int charge = static_cast(static_cast(adc_j) / gain_j + 0.5f);
-
- constexpr uint8_t adc_low_saturation = 254;
- constexpr uint8_t adc_high_saturation = 255;
- constexpr int charge_low_saturation = 253;
- constexpr int charge_high_saturation = 1022;
- if (adc_j < adc_low_saturation) {
- adc_j =
- (charge > charge_high_saturation ? adc_high_saturation
- : (charge > charge_low_saturation ? adc_low_saturation : charge));
- }
- clusterADCs[j * nSeedStripsNC + i] = adc_j;
-
- adcSum += static_cast(adc_j);
- sumx += j * adc_j;
- suma += adc_j;
- j++;
- }
- } // loop over cluster strips
- charge[i] = adcSum;
- const auto chan = channels[left];
- const fedId_t fed = chanlocs->fedID(chan);
- const fedCh_t channel = chanlocs->fedCh(chan);
- trueCluster[i] = (adcSum * conditions->invthick(fed, channel)) > minGoodCharge;
- const auto bary_i = static_cast(sumx) / static_cast(suma);
- barycenter[i] = static_cast(stripId[left] & stripIndexMask) + bary_i + 0.5f;
- clusterSize[i] = j;
- } // not a duplicate cluster
- } // trueCluster[i] is true
- } // i < nSeedStripsNC
-}
-
-namespace stripgpu {
- void SiStripRawToClusterGPUKernel::unpackChannelsGPU(const ConditionsDeviceView *conditions, cudaStream_t stream) {
- constexpr int nthreads = 128;
- const auto channels = chanlocsGPU_->size();
- const auto nblocks = (channels + nthreads - 1) / nthreads;
-
- unpackChannels<<>>(chanlocsGPU_->channelLocsView(),
- conditions,
- stripdata_->alldataGPU_.get(),
- stripdata_->channelGPU_.get(),
- stripdata_->stripIdGPU_.get());
- }
-
- void SiStripRawToClusterGPUKernel::allocateSSTDataGPU(int max_strips, cudaStream_t stream) {
- stripdata_->seedStripsMask_ = cms::cuda::make_device_unique(2 * max_strips, stream);
- stripdata_->prefixSeedStripsNCMask_ = cms::cuda::make_device_unique(2 * max_strips, stream);
-
- sst_data_d_->chanlocs = chanlocsGPU_->channelLocsView();
- sst_data_d_->stripId = stripdata_->stripIdGPU_.get();
- sst_data_d_->channel = stripdata_->channelGPU_.get();
- sst_data_d_->adc = stripdata_->alldataGPU_.get();
- sst_data_d_->seedStripsMask = stripdata_->seedStripsMask_.get();
- sst_data_d_->prefixSeedStripsNCMask = stripdata_->prefixSeedStripsNCMask_.get();
-
- sst_data_d_->seedStripsNCMask = sst_data_d_->seedStripsMask + max_strips;
- sst_data_d_->seedStripsNCIndex = sst_data_d_->prefixSeedStripsNCMask + max_strips;
-
- sst_data_d_->channelThreshold = channelThreshold_;
- sst_data_d_->seedThreshold = seedThreshold_;
- sst_data_d_->clusterThresholdSquared = clusterThresholdSquared_;
- sst_data_d_->maxSequentialHoles = maxSequentialHoles_;
- sst_data_d_->maxSequentialBad = maxSequentialBad_;
- sst_data_d_->maxAdjacentBad = maxAdjacentBad_;
- sst_data_d_->minGoodCharge = minGoodCharge_;
- sst_data_d_->clusterSizeLimit = maxClusterSize_;
-
- pt_sst_data_d_ = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(pt_sst_data_d_, sst_data_d_, stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
- }
-
- void SiStripRawToClusterGPUKernel::findClusterGPU(const ConditionsDeviceView *conditions, cudaStream_t stream) {
- const int nthreads = 128;
- const int nStrips = sst_data_d_->nStrips;
- const int nSeeds = std::min(kMaxSeedStrips, nStrips);
- const int nblocks = (nSeeds + nthreads - 1) / nthreads;
-
-#ifdef GPU_DEBUG
- auto cpu_index = cms::cuda::make_host_unique(nStrips, stream);
- auto cpu_strip = cms::cuda::make_host_unique(nStrips, stream);
- auto cpu_adc = cms::cuda::make_host_unique(nStrips, stream);
-
- cudaCheck(cudaMemcpyAsync(
- cpu_strip.get(), sst_data_d_->stripId, nStrips * sizeof(uint16_t), cudaMemcpyDeviceToHost, stream));
- cudaCheck(
- cudaMemcpyAsync(cpu_adc.get(), sst_data_d_->adc, nStrips * sizeof(uint8_t), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaMemcpyAsync(
- cpu_index.get(), sst_data_d_->seedStripsNCIndex, nStrips * sizeof(int), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaStreamSynchronize(stream));
-
- for (int i = 0; i < nStrips; i++) {
- std::cout << " cpu_strip " << cpu_strip[i] << " cpu_adc " << (unsigned int)cpu_adc[i] << " cpu index "
- << cpu_index[i] << std::endl;
- }
-#endif
-
- auto clust_data_d = clusters_d_.view();
- findLeftRightBoundaryGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d);
- cudaCheck(cudaGetLastError());
-#ifdef GPU_CHECK
- cudaDeviceSynchronize();
- cudaCheck(cudaGetLastError());
-#endif
-
- cudaCheck(cudaMemcpyAsync(clusters_d_.nClustersPtr(),
- &(clust_data_d->nClusters_),
- sizeof(clust_data_d->nClusters_),
- cudaMemcpyDeviceToHost,
- stream));
-
- checkClusterConditionGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d);
- cudaCheck(cudaGetLastError());
-
-#ifdef GPU_CHECK
- cudaDeviceSynchronize();
- cudaCheck(cudaGetLastError());
-#endif
-
-#ifdef GPU_DEBUG
- cudaStreamSynchronize(stream);
- auto clust_data = std::make_unique(clusters_d_, stream);
- cudaStreamSynchronize(stream);
-
- const auto clusterIndexLeft = clust_data->clusterIndex().get();
- const auto clusterSize = clust_data->clusterSize().get();
- const auto trueCluster = clust_data->trueCluster().get();
- const auto clusterADCs = clust_data->clusterADCs().get();
- const auto detids = clust_data->clusterDetId().get();
- const auto charge = clust_data->charge().get();
-
- const auto nSeedStripsNC = clusters_d_.nClusters();
- std::cout << "findClusterGPU nSeedStripsNC=" << nSeedStripsNC << std::endl;
-
- for (auto i = 0U; i < nSeedStripsNC; i++) {
- if (trueCluster[i]) {
- int left = clusterIndexLeft[i];
- uint32_t size = clusterSize[i];
- const auto detid = detids[i];
- std::cout << "i=" << i << " detId " << detid << " left " << left << " size " << size << " charge " << charge[i]
- << ": ";
- size = std::min(size, maxClusterSize_);
- for (uint32_t j = 0; j < size; j++) {
- std::cout << (unsigned int)clusterADCs[j * nSeedStripsNC + i] << " ";
- }
- std::cout << std::endl;
- }
- }
-#endif
- }
-
- void SiStripRawToClusterGPUKernel::setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions,
- cudaStream_t stream) {
-#ifdef GPU_DEBUG
- int nStrips = sst_data_d_->nStrips;
- auto cpu_strip = cms::cuda::make_host_unique(nStrips, stream);
- auto cpu_adc = cms::cuda::make_host_unique(nStrips, stream);
-
- cudaCheck(cudaMemcpyAsync(
- cpu_strip.get(), sst_data_d_->stripId, nStrips * sizeof(uint16_t), cudaMemcpyDeviceToHost, stream));
- cudaCheck(
- cudaMemcpyAsync(cpu_adc.get(), sst_data_d_->adc, nStrips * sizeof(uint8_t), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaStreamSynchronize(stream));
-
- for (int i = 0; i < nStrips; i++) {
- std::cout << " cpu_strip " << cpu_strip[i] << " cpu_adc " << (unsigned int)cpu_adc[i] << std::endl;
- }
-#endif
-
- int nthreads = 256;
- int nblocks = (sst_data_d_->nStrips + nthreads - 1) / nthreads;
-
- //mark seed strips
- setSeedStripsGPU<<>>(pt_sst_data_d_.get(), conditions);
- cudaCheck(cudaGetLastError());
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
- //mark only non-consecutive seed strips (mask out consecutive seed strips)
- setNCSeedStripsGPU<<>>(pt_sst_data_d_.get(), conditions);
- cudaCheck(cudaGetLastError());
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
- std::size_t temp_storage_bytes = 0;
- cub::DeviceScan::ExclusiveSum(nullptr,
- temp_storage_bytes,
- sst_data_d_->seedStripsNCMask,
- sst_data_d_->prefixSeedStripsNCMask,
- sst_data_d_->nStrips,
- stream);
-#ifdef GPU_DEBUG
- std::cout << "temp_storage_bytes=" << temp_storage_bytes << std::endl;
-#endif
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
- {
- auto d_temp_storage = cms::cuda::make_device_unique(temp_storage_bytes, stream);
- cub::DeviceScan::ExclusiveSum(d_temp_storage.get(),
- temp_storage_bytes,
- sst_data_d_->seedStripsNCMask,
- sst_data_d_->prefixSeedStripsNCMask,
- sst_data_d_->nStrips,
- stream);
- }
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
- setStripIndexGPU<<>>(pt_sst_data_d_.get());
- cudaCheck(cudaGetLastError());
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-
-#ifdef GPU_DEBUG
- auto cpu_mask = cms::cuda::make_host_unique(nStrips, stream);
- auto cpu_prefix = cms::cuda::make_host_unique(nStrips, stream);
- auto cpu_index = cms::cuda::make_host_unique(nStrips, stream);
-
- cudaCheck(cudaMemcpyAsync(&(sst_data_d_->nSeedStripsNC),
- sst_data_d_->prefixSeedStripsNCMask + sst_data_d_->nStrips - 1,
- sizeof(int),
- cudaMemcpyDeviceToHost,
- stream));
- cudaCheck(cudaMemcpyAsync(
- cpu_mask.get(), sst_data_d_->seedStripsNCMask, nStrips * sizeof(int), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaMemcpyAsync(
- cpu_prefix.get(), sst_data_d_->prefixSeedStripsNCMask, nStrips * sizeof(int), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaMemcpyAsync(
- cpu_index.get(), sst_data_d_->seedStripsNCIndex, nStrips * sizeof(int), cudaMemcpyDeviceToHost, stream));
- cudaCheck(cudaStreamSynchronize(stream));
-
- const int nSeedStripsNC = std::min(kMaxSeedStrips, sst_data_d_->nSeedStripsNC);
- std::cout << "nStrips=" << nStrips << " nSeedStripsNC=" << sst_data_d_->nSeedStripsNC << std::endl;
- for (int i = 0; i < nStrips; i++) {
- std::cout << " i " << i << " mask " << cpu_mask[i] << " prefix " << cpu_prefix[i] << " index " << cpu_index[i]
- << std::endl;
- }
-#endif
- }
-} // namespace stripgpu
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h
deleted file mode 100644
index a9c88b340b623..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef RecoLocalTracker_SiStripClusterizer_plugins_SiStripRawToClusterGPUKernel_h
-#define RecoLocalTracker_SiStripClusterizer_plugins_SiStripRawToClusterGPUKernel_h
-
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-
-#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
-//#include "clusterGPU.cuh"
-
-#include
-
-#include
-#include
-
-class ChannelLocs;
-class ChannelLocsGPU;
-class FEDRawData;
-
-namespace sistrip {
- class FEDBuffer;
-}
-namespace edm {
- class ParameterSet;
-}
-
-namespace stripgpu {
- struct StripDataView;
-
- class StripDataGPU {
- public:
- StripDataGPU(size_t size, cudaStream_t stream);
-
- cms::cuda::device::unique_ptr alldataGPU_;
- cms::cuda::device::unique_ptr channelGPU_;
- cms::cuda::device::unique_ptr stripIdGPU_;
- cms::cuda::device::unique_ptr seedStripsMask_;
- cms::cuda::device::unique_ptr prefixSeedStripsNCMask_;
- };
-
- class SiStripRawToClusterGPUKernel {
- public:
- SiStripRawToClusterGPUKernel(const edm::ParameterSet& conf);
- void makeAsync(const std::vector& rawdata,
- const std::vector>& buffers,
- const SiStripClusterizerConditionsGPU& conditions,
- cudaStream_t stream);
- void copyAsync(cudaStream_t stream);
- SiStripClustersCUDADevice getResults(cudaStream_t stream);
-
- private:
- using ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView;
-
- void reset();
- void unpackChannelsGPU(const ConditionsDeviceView* conditions, cudaStream_t stream);
- void allocateSSTDataGPU(int max_strips, cudaStream_t stream);
- void freeSSTDataGPU(cudaStream_t stream);
-
- void setSeedStripsNCIndexGPU(const ConditionsDeviceView* conditions, cudaStream_t stream);
- void findClusterGPU(const ConditionsDeviceView* conditions, cudaStream_t stream);
-
- std::vector fedIndex_;
- std::vector fedRawDataOffsets_;
-
- std::unique_ptr stripdata_;
- std::unique_ptr chanlocsGPU_;
-
- cms::cuda::host::unique_ptr sst_data_d_;
- cms::cuda::device::unique_ptr pt_sst_data_d_;
-
- SiStripClustersCUDADevice clusters_d_;
- float channelThreshold_, seedThreshold_, clusterThresholdSquared_;
- uint8_t maxSequentialHoles_, maxSequentialBad_, maxAdjacentBad_;
- uint32_t maxClusterSize_;
- float minGoodCharge_;
- };
-} // namespace stripgpu
-#endif
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h b/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h
deleted file mode 100644
index 785b714376e2a..0000000000000
--- a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h
-#define RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h
-
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-#include "FWCore/Utilities/interface/HostDeviceConstant.h"
-
-#include
-
-class ChannelLocsView;
-
-namespace stripgpu {
- HOST_DEVICE_CONSTANT auto kMaxSeedStrips = 200000;
-
- struct StripDataView {
- const ChannelLocsView *chanlocs;
- uint8_t *adc;
- uint16_t *channel;
- stripId_t *stripId;
- int *seedStripsNCIndex, *seedStripsMask, *seedStripsNCMask, *prefixSeedStripsNCMask;
- int nSeedStripsNC;
- int nStrips;
- float channelThreshold, seedThreshold, clusterThresholdSquared;
- uint8_t maxSequentialHoles, maxSequentialBad, maxAdjacentBad;
- float minGoodCharge;
- int clusterSizeLimit;
- };
-} // namespace stripgpu
-#endif
diff --git a/RecoLocalTracker/SiStripClusterizer/python/customizeStripClustersFromRaw.py b/RecoLocalTracker/SiStripClusterizer/python/customizeStripClustersFromRaw.py
index e2afa2d575146..a8e856dbf3353 100644
--- a/RecoLocalTracker/SiStripClusterizer/python/customizeStripClustersFromRaw.py
+++ b/RecoLocalTracker/SiStripClusterizer/python/customizeStripClustersFromRaw.py
@@ -12,17 +12,6 @@ def customizeStripClustersFromRaw(process):
return process
-def customizeHLTStripClustersFromRaw(process):
- if hasattr(process, 'hltSiStripRawToClustersFacility'):
- import RecoLocalTracker.SiStripClusterizer.SiStripClusterizerOnDemand_cfi as SiStripClusterizerOnDemand_cfi
-
- process.load("RecoLocalTracker.SiStripClusterizer.SiStripClusterizerOnDemand_cfi")
- process.hltSiStripRawToClustersFacility = SiStripClusterizerOnDemand_cfi.siStripClusters.clone()
- process.HLTDoLocalStripSequence.replace(process.hltSiStripRawToClustersFacility,
- cms.Sequence(process.hltSiStripRawToClustersFacility, process.siStripClustersTaskCUDA))
-
- return process
-
def customizeHLTStripClustersFromRaw_alpaka(process: cms.Process, MaxClusterSize:int = 768, doNotReplaceInPath = []):
if hasattr(process, 'hltSiStripRawToClustersFacility'):
from RecoLocalTracker.SiStripZeroSuppression.DefaultAlgorithms_cff import DefaultAlgorithms