diff --git a/CUDADataFormats/SiStripCluster/BuildFile.xml b/CUDADataFormats/SiStripCluster/BuildFile.xml
index 5e401d215c4eb..4231da407f300 100644
--- a/CUDADataFormats/SiStripCluster/BuildFile.xml
+++ b/CUDADataFormats/SiStripCluster/BuildFile.xml
@@ -1,6 +1,7 @@
+
diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
deleted file mode 100644
index f64b8a533d513..0000000000000
--- a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h
-#define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h
-
-#include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-
-#include
-
-class SiStripClustersCUDADevice : public SiStripClustersSOABase {
-public:
- SiStripClustersCUDADevice() = default;
- explicit SiStripClustersCUDADevice(uint32_t maxClusters, uint32_t maxStripsPerCluster, cudaStream_t stream);
- ~SiStripClustersCUDADevice() override = default;
-
- SiStripClustersCUDADevice(const SiStripClustersCUDADevice &) = delete;
- SiStripClustersCUDADevice &operator=(const SiStripClustersCUDADevice &) = delete;
- SiStripClustersCUDADevice(SiStripClustersCUDADevice &&) = default;
- SiStripClustersCUDADevice &operator=(SiStripClustersCUDADevice &&) = default;
-
- struct DeviceView {
- uint32_t *clusterIndex_;
- uint32_t *clusterSize_;
- uint8_t *clusterADCs_;
- stripgpu::detId_t *clusterDetId_;
- stripgpu::stripId_t *firstStrip_;
- bool *trueCluster_;
- float *barycenter_;
- float *charge_;
- uint32_t nClusters_;
- uint32_t maxClusterSize_;
- };
-
- DeviceView *view() const { return view_d.get(); }
- uint32_t nClusters() const { return nClusters_; }
- uint32_t *nClustersPtr() { return &nClusters_; }
- uint32_t maxClusterSize() const { return maxClusterSize_; }
- uint32_t *maxClusterSizePtr() { return &maxClusterSize_; }
-
-private:
- cms::cuda::device::unique_ptr view_d; // "me" pointer
- uint32_t nClusters_;
- uint32_t maxClusterSize_;
-};
-
-class SiStripClustersCUDAHost : public SiStripClustersSOABase {
-public:
- SiStripClustersCUDAHost() = default;
- explicit SiStripClustersCUDAHost(const SiStripClustersCUDADevice &clusters_d, cudaStream_t stream);
- ~SiStripClustersCUDAHost() override = default;
-
- SiStripClustersCUDAHost(const SiStripClustersCUDAHost &) = delete;
- SiStripClustersCUDAHost &operator=(const SiStripClustersCUDAHost &) = delete;
- SiStripClustersCUDAHost(SiStripClustersCUDAHost &&) = default;
- SiStripClustersCUDAHost &operator=(SiStripClustersCUDAHost &&) = default;
-};
-
-#endif
diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h
new file mode 100644
index 0000000000000..5e81448e8edfa
--- /dev/null
+++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h
@@ -0,0 +1,35 @@
+#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoADevice_h
+#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoADevice_h
+
+#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h"
+
+class SiStripClustersSoADevice : public cms::cuda::PortableDeviceCollection {
+public:
+ using cms::cuda::PortableDeviceCollection::view;
+ using cms::cuda::PortableDeviceCollection::const_view;
+ using cms::cuda::PortableDeviceCollection::buffer;
+ using cms::cuda::PortableDeviceCollection::bufferSize;
+
+ SiStripClustersSoADevice() = default;
+ ~SiStripClustersSoADevice() = default;
+
+ explicit SiStripClustersSoADevice(uint32_t maxClusters, cudaStream_t stream)
+ : cms::cuda::PortableDeviceCollection(maxClusters, stream), maxClusters_{maxClusters} {
+ const uint32_t maxStripsPerCluster = SiStripClustersSoA::maxStripsPerCluster; //768
+ cudaCheck(
+ cudaMemcpyAsync(&(view().maxClusterSize()), &maxStripsPerCluster, sizeof(uint32_t), cudaMemcpyDefault, stream));
+ };
+
+ SiStripClustersSoADevice(const SiStripClustersSoADevice &&) = delete;
+ SiStripClustersSoADevice &operator=(const SiStripClustersSoADevice &&) = delete;
+ SiStripClustersSoADevice(SiStripClustersSoADevice &&) = default;
+ SiStripClustersSoADevice &operator=(SiStripClustersSoADevice &&) = default;
+
+ uint32_t maxClusters() const { return maxClusters_; }
+
+private:
+ uint32_t maxClusters_;
+};
+
+#endif
diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h
new file mode 100644
index 0000000000000..600282092d07f
--- /dev/null
+++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h
@@ -0,0 +1,28 @@
+#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAHost_h
+#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAHost_h
+
+#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h"
+
+class SiStripClustersSoAHost : public cms::cuda::PortableHostCollection {
+public:
+ using cms::cuda::PortableHostCollection::view;
+ using cms::cuda::PortableHostCollection::const_view;
+ using cms::cuda::PortableHostCollection::buffer;
+ using cms::cuda::PortableHostCollection::bufferSize;
+
+ SiStripClustersSoAHost() = default;
+ ~SiStripClustersSoAHost() = default;
+
+ explicit SiStripClustersSoAHost(uint32_t maxClusters, cudaStream_t stream)
+ : PortableHostCollection(maxClusters, stream){};
+
+ SiStripClustersSoAHost(const SiStripClustersSoAHost &&) = delete;
+ SiStripClustersSoAHost &operator=(const SiStripClustersSoAHost &&) = delete;
+ SiStripClustersSoAHost(SiStripClustersSoAHost &&) = default;
+ SiStripClustersSoAHost &operator=(SiStripClustersSoAHost &&) = default;
+
+private:
+};
+
+#endif
diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h
new file mode 100644
index 0000000000000..55827946d8e62
--- /dev/null
+++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h
@@ -0,0 +1,27 @@
+#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAUtilities_h
+#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAUtilities_h
+
+#include "DataFormats/SiStripCluster/interface/SiStripTypes.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+
+struct SiStripClustersSoA {
+ const static auto maxStripsPerCluster = 768;
+ using clusterADCsColumn = std::array;
+ GENERATE_SOA_LAYOUT(SiStripClustersSoALayout,
+ SOA_COLUMN(uint32_t, clusterIndex),
+ SOA_COLUMN(uint32_t, clusterSize),
+ SOA_COLUMN(clusterADCsColumn, clusterADCs),
+ SOA_COLUMN(stripgpu::detId_t, clusterDetId),
+ SOA_COLUMN(stripgpu::stripId_t, firstStrip),
+ SOA_COLUMN(bool, trueCluster),
+ SOA_COLUMN(float, barycenter),
+ SOA_COLUMN(float, charge),
+ SOA_SCALAR(uint32_t, nClusters),
+ SOA_SCALAR(uint32_t, maxClusterSize));
+};
+
+using SiStripClustersLayout = typename SiStripClustersSoA::SiStripClustersSoALayout<>;
+using SiStripClustersView = typename SiStripClustersSoA::SiStripClustersSoALayout<>::View;
+using SiStripClustersConstView = typename SiStripClustersSoA::SiStripClustersSoALayout<>::ConstView;
+
+#endif
diff --git a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc b/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc
deleted file mode 100644
index 220456760476a..0000000000000
--- a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-
-SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters,
- uint32_t maxStripsPerCluster,
- cudaStream_t stream) {
- maxClusterSize_ = maxStripsPerCluster;
-
- clusterIndex_ = cms::cuda::make_device_unique(maxClusters, stream);
- clusterSize_ = cms::cuda::make_device_unique(maxClusters, stream);
- clusterADCs_ = cms::cuda::make_device_unique(maxClusters * maxStripsPerCluster, stream);
- clusterDetId_ = cms::cuda::make_device_unique(maxClusters, stream);
- firstStrip_ = cms::cuda::make_device_unique(maxClusters, stream);
- trueCluster_ = cms::cuda::make_device_unique(maxClusters, stream);
- barycenter_ = cms::cuda::make_device_unique(maxClusters, stream);
- charge_ = cms::cuda::make_device_unique(maxClusters, stream);
-
- auto view = cms::cuda::make_host_unique(stream);
- view->clusterIndex_ = clusterIndex_.get();
- view->clusterSize_ = clusterSize_.get();
- view->clusterADCs_ = clusterADCs_.get();
- view->clusterDetId_ = clusterDetId_.get();
- view->firstStrip_ = firstStrip_.get();
- view->trueCluster_ = trueCluster_.get();
- view->barycenter_ = barycenter_.get();
- view->charge_ = charge_.get();
- view->maxClusterSize_ = maxStripsPerCluster;
-
- view_d = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(view_d, view, stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-}
-
-SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
- nClusters_ = clusters_d.nClusters();
- maxClusterSize_ = clusters_d.maxClusterSize();
- clusterIndex_ = cms::cuda::make_host_unique(nClusters_, stream);
- clusterSize_ = cms::cuda::make_host_unique(nClusters_, stream);
- clusterADCs_ = cms::cuda::make_host_unique(nClusters_ * maxClusterSize_, stream);
- clusterDetId_ = cms::cuda::make_host_unique(nClusters_, stream);
- firstStrip_ = cms::cuda::make_host_unique(nClusters_, stream);
- trueCluster_ = cms::cuda::make_host_unique(nClusters_, stream);
- barycenter_ = cms::cuda::make_host_unique(nClusters_, stream);
- charge_ = cms::cuda::make_host_unique(nClusters_, stream);
-
- cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream);
- cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream);
- cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream);
- cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream);
- cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream);
- cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream);
- cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream);
- cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream);
-#ifdef GPU_CHECK
- cudaCheck(cudaStreamSynchronize(stream));
-#endif
-}
diff --git a/CUDADataFormats/SiStripCluster/src/classes.h b/CUDADataFormats/SiStripCluster/src/classes.h
index b38f397dee067..f47527f3e0859 100644
--- a/CUDADataFormats/SiStripCluster/src/classes.h
+++ b/CUDADataFormats/SiStripCluster/src/classes.h
@@ -2,7 +2,8 @@
#define CUDADataFormats_SiStripCluster_classes_h
#include "CUDADataFormats/Common/interface/Product.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h"
#include "DataFormats/Common/interface/Wrapper.h"
#endif
diff --git a/CUDADataFormats/SiStripCluster/src/classes_def.xml b/CUDADataFormats/SiStripCluster/src/classes_def.xml
index 3c2f3ab27c620..3ecd85707c802 100644
--- a/CUDADataFormats/SiStripCluster/src/classes_def.xml
+++ b/CUDADataFormats/SiStripCluster/src/classes_def.xml
@@ -1,6 +1,13 @@
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc b/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc
index 2d7b4c83a4a4a..31452a8a57345 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc
@@ -68,7 +68,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer("Clusterizer")),
inputToken_(consumes(conf.getParameter("ProductLabel"))),
- outputToken_(produces>()),
+ outputToken_(produces>()),
conditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})),
cpuConditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})) {}
@@ -91,7 +91,6 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer. cms::cuda::Product stores also
// the current device and the CUDA stream since those will be needed
// in the consumer side.
- ctx.emplace(ev, outputToken_, gpuAlgo_.getResults(ctx.stream()));
+ ctx.emplace(ev, outputToken_, std::move(gpuAlgo_.getResults(ctx.stream())));
for (auto& buf : buffers_)
buf.reset(nullptr);
@@ -122,7 +121,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer inputToken_;
- edm::EDPutTokenT> outputToken_;
+ edm::EDPutTokenT> outputToken_;
edm::ESGetToken conditionsToken_;
edm::ESGetToken cpuConditionsToken_;
};
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc
index 8b891382085c7..6a666c7f8ba9a 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc
@@ -13,14 +13,17 @@
#include "FWCore/Framework/interface/ESHandle.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h"
+#include "CUDADataFormats/Common/interface/Product.h"
+#include "FWCore/Framework/interface/ConsumesCollector.h"
+#include
#include
class SiStripClustersFromSOA final : public edm::stream::EDProducer<> {
public:
explicit SiStripClustersFromSOA(const edm::ParameterSet& conf)
- : inputToken_(consumes(conf.getParameter("ProductLabel"))),
+ : inputToken_(consumes(conf.getParameter("ProductLabel"))),
outputToken_(produces>()) {}
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
@@ -34,12 +37,12 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> {
void produce(edm::Event& ev, const edm::EventSetup& es) override {
const auto& clust_data = ev.get(inputToken_);
- const int nSeedStripsNC = clust_data.nClusters();
- const auto clusterSize = clust_data.clusterSize().get();
- const auto clusterADCs = clust_data.clusterADCs().get();
- const auto detIDs = clust_data.clusterDetId().get();
- const auto stripIDs = clust_data.firstStrip().get();
- const auto trueCluster = clust_data.trueCluster().get();
+ const int nSeedStripsNC = clust_data->nClusters();
+ const auto clusterSize = clust_data->clusterSize();
+ const auto clusterADCs = clust_data->clusterADCs();
+ const auto detIDs = clust_data->clusterDetId();
+ const auto stripIDs = clust_data->firstStrip();
+ const auto trueCluster = clust_data->trueCluster();
const unsigned int initSeedStripsSize = 15000;
@@ -62,7 +65,7 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> {
adcs.reserve(size);
for (uint32_t j = 0; j < size; ++j) {
- adcs.push_back(clusterADCs[i + j * nSeedStripsNC]);
+ adcs.push_back(clusterADCs[i][j]);
}
record.push_back(SiStripCluster(firstStrip, std::move(adcs)));
}
@@ -75,7 +78,7 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> {
}
private:
- edm::EDGetTokenT inputToken_;
+ edm::EDGetTokenT inputToken_;
edm::EDPutTokenT> outputToken_;
};
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc
index a51bb1216d0e0..e3f3c9b65873f 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc
@@ -14,28 +14,37 @@
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h"
#include
class SiStripSOAtoHost {
public:
SiStripSOAtoHost() = default;
- void makeAsync(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
- hostView_ = std::make_unique(clusters_d, stream);
+ void makeAsync(const SiStripClustersSoADevice& clusters_d, cudaStream_t stream) {
+ maxClusters_ = clusters_d.maxClusters();
+ clusters_h_ = SiStripClustersSoAHost(maxClusters_, stream);
+ cudaCheck(cudaMemcpyAsync(clusters_h_.buffer().get(),
+ clusters_d.const_buffer().get(),
+ clusters_d.bufferSize(),
+ cudaMemcpyDeviceToHost,
+ stream)); // Copy data from Device to Host
}
- std::unique_ptr getResults() { return std::move(hostView_); }
+
+ SiStripClustersSoAHost getResults() { return std::move(clusters_h_); }
private:
- std::unique_ptr hostView_;
+ SiStripClustersSoAHost clusters_h_;
+ uint32_t maxClusters_;
};
class SiStripClustersSOAtoHost final : public edm::stream::EDProducer {
public:
explicit SiStripClustersSOAtoHost(const edm::ParameterSet& conf)
: inputToken_(
- consumes>(conf.getParameter("ProductLabel"))),
- outputToken_(produces()) {}
+ consumes>(conf.getParameter("ProductLabel"))),
+ outputToken_(produces()) {}
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
@@ -63,13 +72,13 @@ class SiStripClustersSOAtoHost final : public edm::stream::EDProducer> inputToken_;
- edm::EDPutTokenT outputToken_;
+ edm::EDGetTokenT> inputToken_;
+ edm::EDPutTokenT outputToken_;
};
#include "FWCore/Framework/interface/MakerMacros.h"
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc
index a8b9aa04a00b3..fbb8de0c3beb0 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc
@@ -6,6 +6,8 @@
#include "SiStripRawToClusterGPUKernel.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h"
+
#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
#include "ChannelLocsGPU.h"
#include "StripDataView.h"
@@ -126,6 +128,7 @@ namespace stripgpu {
const auto& condGPU = conditions.getGPUProductAsync(stream);
unpackChannelsGPU(condGPU.deviceView(), stream);
+
#ifdef GPU_CHECK
cudaCheck(cudaStreamSynchronize(stream));
#endif
@@ -166,13 +169,13 @@ namespace stripgpu {
allocateSSTDataGPU(n_strips, stream);
setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
- clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
+ clusters_d_ = SiStripClustersSoADevice(kMaxSeedStrips, stream);
findClusterGPU(condGPU.deviceView(), stream);
stripdata_.reset();
}
- SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
+ SiStripClustersSoADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
reset();
return std::move(clusters_d_);
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu
index c88d5a60ec86b..63646ce83703f 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu
@@ -9,8 +9,6 @@
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
-
#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
#include "ChannelLocsGPU.h"
@@ -150,7 +148,7 @@ __global__ static void setStripIndexGPU(StripDataView *sst_data_d) {
__global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d,
const ConditionsDeviceView *conditions,
- SiStripClustersCUDADevice::DeviceView *clust_data_d) {
+ SiStripClustersView clust_data_d) {
const int nStrips = sst_data_d->nStrips;
const int *__restrict__ seedStripsNCIndex = sst_data_d->seedStripsNCIndex;
const auto __restrict__ chanlocs = sst_data_d->chanlocs;
@@ -163,11 +161,11 @@ __global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d,
const float clusterThresholdSquared = sst_data_d->clusterThresholdSquared;
const int clusterSizeLimit = sst_data_d->clusterSizeLimit;
- auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_;
- auto __restrict__ clusterSize = clust_data_d->clusterSize_;
- auto __restrict__ clusterDetId = clust_data_d->clusterDetId_;
- auto __restrict__ firstStrip = clust_data_d->firstStrip_;
- auto __restrict__ trueCluster = clust_data_d->trueCluster_;
+ auto __restrict__ clusterIndexLeft = clust_data_d.clusterIndex();
+ auto __restrict__ clusterSize = clust_data_d.clusterSize();
+ auto __restrict__ clusterDetId = clust_data_d.clusterDetId();
+ auto __restrict__ firstStrip = clust_data_d.firstStrip();
+ auto __restrict__ trueCluster = clust_data_d.trueCluster();
const int tid = threadIdx.x;
const int bid = blockIdx.x;
@@ -273,26 +271,26 @@ __global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d,
(noiseSquared_i * clusterThresholdSquared <= adcSum_i * adcSum_i) and (clusterSize[i] <= clusterSizeLimit);
} // i < nSeedStripsNC
if (first == 0) {
- clust_data_d->nClusters_ = nSeedStripsNC;
+ clust_data_d.nClusters() = nSeedStripsNC;
}
}
__global__ static void checkClusterConditionGPU(StripDataView *sst_data_d,
const ConditionsDeviceView *conditions,
- SiStripClustersCUDADevice::DeviceView *clust_data_d) {
+ SiStripClustersView clust_data_d) {
const uint16_t *__restrict__ stripId = sst_data_d->stripId;
const auto __restrict__ chanlocs = sst_data_d->chanlocs;
const uint16_t *__restrict__ channels = sst_data_d->channel;
const uint8_t *__restrict__ adc = sst_data_d->adc;
const float minGoodCharge = sst_data_d->minGoodCharge; //1620.0;
- const auto nSeedStripsNC = clust_data_d->nClusters_;
- const auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_;
+ const auto nSeedStripsNC = clust_data_d.nClusters();
+ const auto __restrict__ clusterIndexLeft = clust_data_d.clusterIndex();
- auto __restrict__ clusterSize = clust_data_d->clusterSize_;
- auto __restrict__ clusterADCs = clust_data_d->clusterADCs_;
- auto __restrict__ trueCluster = clust_data_d->trueCluster_;
- auto __restrict__ barycenter = clust_data_d->barycenter_;
- auto __restrict__ charge = clust_data_d->charge_;
+ auto __restrict__ clusterSize = clust_data_d.clusterSize();
+ auto __restrict__ clusterADCs = clust_data_d.clusterADCs();
+ auto __restrict__ trueCluster = clust_data_d.trueCluster();
+ auto __restrict__ barycenter = clust_data_d.barycenter();
+ auto __restrict__ charge = clust_data_d.charge();
constexpr uint16_t stripIndexMask = 0x7FFF;
@@ -341,7 +339,7 @@ __global__ static void checkClusterConditionGPU(StripDataView *sst_data_d,
(charge > charge_high_saturation ? adc_high_saturation
: (charge > charge_low_saturation ? adc_low_saturation : charge));
}
- clusterADCs[j * nSeedStripsNC + i] = adc_j;
+ clusterADCs[i][j] = adc_j;
adcSum += static_cast(adc_j);
sumx += j * adc_j;
@@ -431,19 +429,16 @@ namespace stripgpu {
#endif
auto clust_data_d = clusters_d_.view();
+
findLeftRightBoundaryGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d);
+
cudaCheck(cudaGetLastError());
+
#ifdef GPU_CHECK
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
#endif
- cudaCheck(cudaMemcpyAsync(clusters_d_.nClustersPtr(),
- &(clust_data_d->nClusters_),
- sizeof(clust_data_d->nClusters_),
- cudaMemcpyDeviceToHost,
- stream));
-
checkClusterConditionGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d);
cudaCheck(cudaGetLastError());
@@ -451,37 +446,6 @@ namespace stripgpu {
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
#endif
-
-#ifdef GPU_DEBUG
- cudaStreamSynchronize(stream);
- auto clust_data = std::make_unique(clusters_d_, stream);
- cudaStreamSynchronize(stream);
-
- const auto clusterIndexLeft = clust_data->clusterIndex().get();
- const auto clusterSize = clust_data->clusterSize().get();
- const auto trueCluster = clust_data->trueCluster().get();
- const auto clusterADCs = clust_data->clusterADCs().get();
- const auto detids = clust_data->clusterDetId().get();
- const auto charge = clust_data->charge().get();
-
- const auto nSeedStripsNC = clusters_d_.nClusters();
- std::cout << "findClusterGPU nSeedStripsNC=" << nSeedStripsNC << std::endl;
-
- for (auto i = 0U; i < nSeedStripsNC; i++) {
- if (trueCluster[i]) {
- int left = clusterIndexLeft[i];
- uint32_t size = clusterSize[i];
- const auto detid = detids[i];
- std::cout << "i=" << i << " detId " << detid << " left " << left << " size " << size << " charge " << charge[i]
- << ": ";
- size = std::min(size, maxClusterSize_);
- for (uint32_t j = 0; j < size; j++) {
- std::cout << (unsigned int)clusterADCs[j * nSeedStripsNC + i] << " ";
- }
- std::cout << std::endl;
- }
- }
-#endif
}
void SiStripRawToClusterGPUKernel::setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions,
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h
index a9c88b340b623..1629c400e0f72 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h
@@ -2,7 +2,7 @@
#define RecoLocalTracker_SiStripClusterizer_plugins_SiStripRawToClusterGPUKernel_h
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
+#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h"
#include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
//#include "clusterGPU.cuh"
@@ -45,7 +45,7 @@ namespace stripgpu {
const SiStripClusterizerConditionsGPU& conditions,
cudaStream_t stream);
void copyAsync(cudaStream_t stream);
- SiStripClustersCUDADevice getResults(cudaStream_t stream);
+ SiStripClustersSoADevice getResults(cudaStream_t stream);
private:
using ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView;
@@ -67,7 +67,7 @@ namespace stripgpu {
cms::cuda::host::unique_ptr sst_data_d_;
cms::cuda::device::unique_ptr pt_sst_data_d_;
- SiStripClustersCUDADevice clusters_d_;
+ SiStripClustersSoADevice clusters_d_;
float channelThreshold_, seedThreshold_, clusterThresholdSquared_;
uint8_t maxSequentialHoles_, maxSequentialBad_, maxAdjacentBad_;
uint32_t maxClusterSize_;
diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h b/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h
index 785b714376e2a..e5b9b894d1f52 100644
--- a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h
+++ b/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h
@@ -1,7 +1,6 @@
#ifndef RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h
#define RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h
-#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
#include "FWCore/Utilities/interface/HostDeviceConstant.h"
#include