diff --git a/CUDADataFormats/SiStripCluster/BuildFile.xml b/CUDADataFormats/SiStripCluster/BuildFile.xml index 5e401d215c4eb..4231da407f300 100644 --- a/CUDADataFormats/SiStripCluster/BuildFile.xml +++ b/CUDADataFormats/SiStripCluster/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h deleted file mode 100644 index f64b8a533d513..0000000000000 --- a/CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h -#define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h - -#include "DataFormats/SiStripCluster/interface/SiStripClustersSOABase.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" - -#include - -class SiStripClustersCUDADevice : public SiStripClustersSOABase { -public: - SiStripClustersCUDADevice() = default; - explicit SiStripClustersCUDADevice(uint32_t maxClusters, uint32_t maxStripsPerCluster, cudaStream_t stream); - ~SiStripClustersCUDADevice() override = default; - - SiStripClustersCUDADevice(const SiStripClustersCUDADevice &) = delete; - SiStripClustersCUDADevice &operator=(const SiStripClustersCUDADevice &) = delete; - SiStripClustersCUDADevice(SiStripClustersCUDADevice &&) = default; - SiStripClustersCUDADevice &operator=(SiStripClustersCUDADevice &&) = default; - - struct DeviceView { - uint32_t *clusterIndex_; - uint32_t *clusterSize_; - uint8_t *clusterADCs_; - stripgpu::detId_t *clusterDetId_; - stripgpu::stripId_t *firstStrip_; - bool *trueCluster_; - float *barycenter_; - float *charge_; - uint32_t nClusters_; - uint32_t maxClusterSize_; - }; - - DeviceView *view() const { return view_d.get(); } - uint32_t nClusters() const { return nClusters_; } - uint32_t *nClustersPtr() { return &nClusters_; } - uint32_t maxClusterSize() const { return maxClusterSize_; } - uint32_t *maxClusterSizePtr() { return &maxClusterSize_; } - -private: - cms::cuda::device::unique_ptr view_d; // "me" pointer - uint32_t nClusters_; - uint32_t maxClusterSize_; -}; - -class SiStripClustersCUDAHost : public SiStripClustersSOABase { -public: - SiStripClustersCUDAHost() = default; - explicit SiStripClustersCUDAHost(const SiStripClustersCUDADevice &clusters_d, cudaStream_t stream); - ~SiStripClustersCUDAHost() override = default; - - SiStripClustersCUDAHost(const SiStripClustersCUDAHost &) = delete; - SiStripClustersCUDAHost &operator=(const SiStripClustersCUDAHost &) = delete; - SiStripClustersCUDAHost(SiStripClustersCUDAHost &&) = default; - SiStripClustersCUDAHost &operator=(SiStripClustersCUDAHost &&) = default; -}; - -#endif diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h new file mode 100644 index 0000000000000..5e81448e8edfa --- /dev/null +++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h @@ -0,0 +1,35 @@ +#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoADevice_h +#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoADevice_h + +#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h" + +class SiStripClustersSoADevice : public cms::cuda::PortableDeviceCollection { +public: + using cms::cuda::PortableDeviceCollection::view; + using cms::cuda::PortableDeviceCollection::const_view; + using cms::cuda::PortableDeviceCollection::buffer; + using cms::cuda::PortableDeviceCollection::bufferSize; + + SiStripClustersSoADevice() = default; + ~SiStripClustersSoADevice() = default; + + explicit SiStripClustersSoADevice(uint32_t maxClusters, cudaStream_t stream) + : cms::cuda::PortableDeviceCollection(maxClusters, stream), maxClusters_{maxClusters} { + const uint32_t maxStripsPerCluster = SiStripClustersSoA::maxStripsPerCluster; //768 + cudaCheck( + cudaMemcpyAsync(&(view().maxClusterSize()), &maxStripsPerCluster, sizeof(uint32_t), cudaMemcpyDefault, stream)); + }; + + SiStripClustersSoADevice(const SiStripClustersSoADevice &&) = delete; + SiStripClustersSoADevice &operator=(const SiStripClustersSoADevice &&) = delete; + SiStripClustersSoADevice(SiStripClustersSoADevice &&) = default; + SiStripClustersSoADevice &operator=(SiStripClustersSoADevice &&) = default; + + uint32_t maxClusters() const { return maxClusters_; } + +private: + uint32_t maxClusters_; +}; + +#endif diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h new file mode 100644 index 0000000000000..600282092d07f --- /dev/null +++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h @@ -0,0 +1,28 @@ +#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAHost_h +#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAHost_h + +#include "CUDADataFormats/Common/interface/PortableHostCollection.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h" + +class SiStripClustersSoAHost : public cms::cuda::PortableHostCollection { +public: + using cms::cuda::PortableHostCollection::view; + using cms::cuda::PortableHostCollection::const_view; + using cms::cuda::PortableHostCollection::buffer; + using cms::cuda::PortableHostCollection::bufferSize; + + SiStripClustersSoAHost() = default; + ~SiStripClustersSoAHost() = default; + + explicit SiStripClustersSoAHost(uint32_t maxClusters, cudaStream_t stream) + : PortableHostCollection(maxClusters, stream){}; + + SiStripClustersSoAHost(const SiStripClustersSoAHost &&) = delete; + SiStripClustersSoAHost &operator=(const SiStripClustersSoAHost &&) = delete; + SiStripClustersSoAHost(SiStripClustersSoAHost &&) = default; + SiStripClustersSoAHost &operator=(SiStripClustersSoAHost &&) = default; + +private: +}; + +#endif diff --git a/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h new file mode 100644 index 0000000000000..55827946d8e62 --- /dev/null +++ b/CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAUtilities.h @@ -0,0 +1,27 @@ +#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAUtilities_h +#define CUDADataFormats_SiStripCluster_interface_SiStripClustersSoAUtilities_h + +#include "DataFormats/SiStripCluster/interface/SiStripTypes.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +struct SiStripClustersSoA { + const static auto maxStripsPerCluster = 768; + using clusterADCsColumn = std::array; + GENERATE_SOA_LAYOUT(SiStripClustersSoALayout, + SOA_COLUMN(uint32_t, clusterIndex), + SOA_COLUMN(uint32_t, clusterSize), + SOA_COLUMN(clusterADCsColumn, clusterADCs), + SOA_COLUMN(stripgpu::detId_t, clusterDetId), + SOA_COLUMN(stripgpu::stripId_t, firstStrip), + SOA_COLUMN(bool, trueCluster), + SOA_COLUMN(float, barycenter), + SOA_COLUMN(float, charge), + SOA_SCALAR(uint32_t, nClusters), + SOA_SCALAR(uint32_t, maxClusterSize)); +}; + +using SiStripClustersLayout = typename SiStripClustersSoA::SiStripClustersSoALayout<>; +using SiStripClustersView = typename SiStripClustersSoA::SiStripClustersSoALayout<>::View; +using SiStripClustersConstView = typename SiStripClustersSoA::SiStripClustersSoALayout<>::ConstView; + +#endif diff --git a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc b/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc deleted file mode 100644 index 220456760476a..0000000000000 --- a/CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters, - uint32_t maxStripsPerCluster, - cudaStream_t stream) { - maxClusterSize_ = maxStripsPerCluster; - - clusterIndex_ = cms::cuda::make_device_unique(maxClusters, stream); - clusterSize_ = cms::cuda::make_device_unique(maxClusters, stream); - clusterADCs_ = cms::cuda::make_device_unique(maxClusters * maxStripsPerCluster, stream); - clusterDetId_ = cms::cuda::make_device_unique(maxClusters, stream); - firstStrip_ = cms::cuda::make_device_unique(maxClusters, stream); - trueCluster_ = cms::cuda::make_device_unique(maxClusters, stream); - barycenter_ = cms::cuda::make_device_unique(maxClusters, stream); - charge_ = cms::cuda::make_device_unique(maxClusters, stream); - - auto view = cms::cuda::make_host_unique(stream); - view->clusterIndex_ = clusterIndex_.get(); - view->clusterSize_ = clusterSize_.get(); - view->clusterADCs_ = clusterADCs_.get(); - view->clusterDetId_ = clusterDetId_.get(); - view->firstStrip_ = firstStrip_.get(); - view->trueCluster_ = trueCluster_.get(); - view->barycenter_ = barycenter_.get(); - view->charge_ = charge_.get(); - view->maxClusterSize_ = maxStripsPerCluster; - - view_d = cms::cuda::make_device_unique(stream); - cms::cuda::copyAsync(view_d, view, stream); -#ifdef GPU_CHECK - cudaCheck(cudaStreamSynchronize(stream)); -#endif -} - -SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) { - nClusters_ = clusters_d.nClusters(); - maxClusterSize_ = clusters_d.maxClusterSize(); - clusterIndex_ = cms::cuda::make_host_unique(nClusters_, stream); - clusterSize_ = cms::cuda::make_host_unique(nClusters_, stream); - clusterADCs_ = cms::cuda::make_host_unique(nClusters_ * maxClusterSize_, stream); - clusterDetId_ = cms::cuda::make_host_unique(nClusters_, stream); - firstStrip_ = cms::cuda::make_host_unique(nClusters_, stream); - trueCluster_ = cms::cuda::make_host_unique(nClusters_, stream); - barycenter_ = cms::cuda::make_host_unique(nClusters_, stream); - charge_ = cms::cuda::make_host_unique(nClusters_, stream); - - cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream); - cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream); - cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream); - cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream); - cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream); - cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream); - cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream); - cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream); -#ifdef GPU_CHECK - cudaCheck(cudaStreamSynchronize(stream)); -#endif -} diff --git a/CUDADataFormats/SiStripCluster/src/classes.h b/CUDADataFormats/SiStripCluster/src/classes.h index b38f397dee067..f47527f3e0859 100644 --- a/CUDADataFormats/SiStripCluster/src/classes.h +++ b/CUDADataFormats/SiStripCluster/src/classes.h @@ -2,7 +2,8 @@ #define CUDADataFormats_SiStripCluster_classes_h #include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h" #include "DataFormats/Common/interface/Wrapper.h" #endif diff --git a/CUDADataFormats/SiStripCluster/src/classes_def.xml b/CUDADataFormats/SiStripCluster/src/classes_def.xml index 3c2f3ab27c620..3ecd85707c802 100644 --- a/CUDADataFormats/SiStripCluster/src/classes_def.xml +++ b/CUDADataFormats/SiStripCluster/src/classes_def.xml @@ -1,6 +1,13 @@ - - - - + + + + + + + + + + + diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc b/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc index 2d7b4c83a4a4a..31452a8a57345 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc +++ b/RecoLocalTracker/SiStripClusterizer/plugins/ClustersFromRawProducerGPU.cc @@ -68,7 +68,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer("Clusterizer")), inputToken_(consumes(conf.getParameter("ProductLabel"))), - outputToken_(produces>()), + outputToken_(produces>()), conditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})), cpuConditionsToken_(esConsumes(edm::ESInputTag{"", conf.getParameter("ConditionsLabel")})) {} @@ -91,7 +91,6 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer. cms::cuda::Product stores also // the current device and the CUDA stream since those will be needed // in the consumer side. - ctx.emplace(ev, outputToken_, gpuAlgo_.getResults(ctx.stream())); + ctx.emplace(ev, outputToken_, std::move(gpuAlgo_.getResults(ctx.stream()))); for (auto& buf : buffers_) buf.reset(nullptr); @@ -122,7 +121,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer inputToken_; - edm::EDPutTokenT> outputToken_; + edm::EDPutTokenT> outputToken_; edm::ESGetToken conditionsToken_; edm::ESGetToken cpuConditionsToken_; }; diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc index 8b891382085c7..6a666c7f8ba9a 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc +++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersFromSOA.cc @@ -13,14 +13,17 @@ #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include #include class SiStripClustersFromSOA final : public edm::stream::EDProducer<> { public: explicit SiStripClustersFromSOA(const edm::ParameterSet& conf) - : inputToken_(consumes(conf.getParameter("ProductLabel"))), + : inputToken_(consumes(conf.getParameter("ProductLabel"))), outputToken_(produces>()) {} static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -34,12 +37,12 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> { void produce(edm::Event& ev, const edm::EventSetup& es) override { const auto& clust_data = ev.get(inputToken_); - const int nSeedStripsNC = clust_data.nClusters(); - const auto clusterSize = clust_data.clusterSize().get(); - const auto clusterADCs = clust_data.clusterADCs().get(); - const auto detIDs = clust_data.clusterDetId().get(); - const auto stripIDs = clust_data.firstStrip().get(); - const auto trueCluster = clust_data.trueCluster().get(); + const int nSeedStripsNC = clust_data->nClusters(); + const auto clusterSize = clust_data->clusterSize(); + const auto clusterADCs = clust_data->clusterADCs(); + const auto detIDs = clust_data->clusterDetId(); + const auto stripIDs = clust_data->firstStrip(); + const auto trueCluster = clust_data->trueCluster(); const unsigned int initSeedStripsSize = 15000; @@ -62,7 +65,7 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> { adcs.reserve(size); for (uint32_t j = 0; j < size; ++j) { - adcs.push_back(clusterADCs[i + j * nSeedStripsNC]); + adcs.push_back(clusterADCs[i][j]); } record.push_back(SiStripCluster(firstStrip, std::move(adcs))); } @@ -75,7 +78,7 @@ class SiStripClustersFromSOA final : public edm::stream::EDProducer<> { } private: - edm::EDGetTokenT inputToken_; + edm::EDGetTokenT inputToken_; edm::EDPutTokenT> outputToken_; }; diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc index a51bb1216d0e0..e3f3c9b65873f 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc +++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripClustersSOAtoHost.cc @@ -14,28 +14,37 @@ #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoAHost.h" #include class SiStripSOAtoHost { public: SiStripSOAtoHost() = default; - void makeAsync(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) { - hostView_ = std::make_unique(clusters_d, stream); + void makeAsync(const SiStripClustersSoADevice& clusters_d, cudaStream_t stream) { + maxClusters_ = clusters_d.maxClusters(); + clusters_h_ = SiStripClustersSoAHost(maxClusters_, stream); + cudaCheck(cudaMemcpyAsync(clusters_h_.buffer().get(), + clusters_d.const_buffer().get(), + clusters_d.bufferSize(), + cudaMemcpyDeviceToHost, + stream)); // Copy data from Device to Host } - std::unique_ptr getResults() { return std::move(hostView_); } + + SiStripClustersSoAHost getResults() { return std::move(clusters_h_); } private: - std::unique_ptr hostView_; + SiStripClustersSoAHost clusters_h_; + uint32_t maxClusters_; }; class SiStripClustersSOAtoHost final : public edm::stream::EDProducer { public: explicit SiStripClustersSOAtoHost(const edm::ParameterSet& conf) : inputToken_( - consumes>(conf.getParameter("ProductLabel"))), - outputToken_(produces()) {} + consumes>(conf.getParameter("ProductLabel"))), + outputToken_(produces()) {} static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -63,13 +72,13 @@ class SiStripClustersSOAtoHost final : public edm::stream::EDProducer> inputToken_; - edm::EDPutTokenT outputToken_; + edm::EDGetTokenT> inputToken_; + edm::EDPutTokenT outputToken_; }; #include "FWCore/Framework/interface/MakerMacros.h" diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc index a8b9aa04a00b3..fbb8de0c3beb0 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc +++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc @@ -6,6 +6,8 @@ #include "SiStripRawToClusterGPUKernel.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h" + #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h" #include "ChannelLocsGPU.h" #include "StripDataView.h" @@ -126,6 +128,7 @@ namespace stripgpu { const auto& condGPU = conditions.getGPUProductAsync(stream); unpackChannelsGPU(condGPU.deviceView(), stream); + #ifdef GPU_CHECK cudaCheck(cudaStreamSynchronize(stream)); #endif @@ -166,13 +169,13 @@ namespace stripgpu { allocateSSTDataGPU(n_strips, stream); setSeedStripsNCIndexGPU(condGPU.deviceView(), stream); - clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream); + clusters_d_ = SiStripClustersSoADevice(kMaxSeedStrips, stream); findClusterGPU(condGPU.deviceView(), stream); stripdata_.reset(); } - SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) { + SiStripClustersSoADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) { reset(); return std::move(clusters_d_); diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu index c88d5a60ec86b..63646ce83703f 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cu @@ -9,8 +9,6 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" - #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h" #include "ChannelLocsGPU.h" @@ -150,7 +148,7 @@ __global__ static void setStripIndexGPU(StripDataView *sst_data_d) { __global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d, const ConditionsDeviceView *conditions, - SiStripClustersCUDADevice::DeviceView *clust_data_d) { + SiStripClustersView clust_data_d) { const int nStrips = sst_data_d->nStrips; const int *__restrict__ seedStripsNCIndex = sst_data_d->seedStripsNCIndex; const auto __restrict__ chanlocs = sst_data_d->chanlocs; @@ -163,11 +161,11 @@ __global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d, const float clusterThresholdSquared = sst_data_d->clusterThresholdSquared; const int clusterSizeLimit = sst_data_d->clusterSizeLimit; - auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_; - auto __restrict__ clusterSize = clust_data_d->clusterSize_; - auto __restrict__ clusterDetId = clust_data_d->clusterDetId_; - auto __restrict__ firstStrip = clust_data_d->firstStrip_; - auto __restrict__ trueCluster = clust_data_d->trueCluster_; + auto __restrict__ clusterIndexLeft = clust_data_d.clusterIndex(); + auto __restrict__ clusterSize = clust_data_d.clusterSize(); + auto __restrict__ clusterDetId = clust_data_d.clusterDetId(); + auto __restrict__ firstStrip = clust_data_d.firstStrip(); + auto __restrict__ trueCluster = clust_data_d.trueCluster(); const int tid = threadIdx.x; const int bid = blockIdx.x; @@ -273,26 +271,26 @@ __global__ static void findLeftRightBoundaryGPU(const StripDataView *sst_data_d, (noiseSquared_i * clusterThresholdSquared <= adcSum_i * adcSum_i) and (clusterSize[i] <= clusterSizeLimit); } // i < nSeedStripsNC if (first == 0) { - clust_data_d->nClusters_ = nSeedStripsNC; + clust_data_d.nClusters() = nSeedStripsNC; } } __global__ static void checkClusterConditionGPU(StripDataView *sst_data_d, const ConditionsDeviceView *conditions, - SiStripClustersCUDADevice::DeviceView *clust_data_d) { + SiStripClustersView clust_data_d) { const uint16_t *__restrict__ stripId = sst_data_d->stripId; const auto __restrict__ chanlocs = sst_data_d->chanlocs; const uint16_t *__restrict__ channels = sst_data_d->channel; const uint8_t *__restrict__ adc = sst_data_d->adc; const float minGoodCharge = sst_data_d->minGoodCharge; //1620.0; - const auto nSeedStripsNC = clust_data_d->nClusters_; - const auto __restrict__ clusterIndexLeft = clust_data_d->clusterIndex_; + const auto nSeedStripsNC = clust_data_d.nClusters(); + const auto __restrict__ clusterIndexLeft = clust_data_d.clusterIndex(); - auto __restrict__ clusterSize = clust_data_d->clusterSize_; - auto __restrict__ clusterADCs = clust_data_d->clusterADCs_; - auto __restrict__ trueCluster = clust_data_d->trueCluster_; - auto __restrict__ barycenter = clust_data_d->barycenter_; - auto __restrict__ charge = clust_data_d->charge_; + auto __restrict__ clusterSize = clust_data_d.clusterSize(); + auto __restrict__ clusterADCs = clust_data_d.clusterADCs(); + auto __restrict__ trueCluster = clust_data_d.trueCluster(); + auto __restrict__ barycenter = clust_data_d.barycenter(); + auto __restrict__ charge = clust_data_d.charge(); constexpr uint16_t stripIndexMask = 0x7FFF; @@ -341,7 +339,7 @@ __global__ static void checkClusterConditionGPU(StripDataView *sst_data_d, (charge > charge_high_saturation ? adc_high_saturation : (charge > charge_low_saturation ? adc_low_saturation : charge)); } - clusterADCs[j * nSeedStripsNC + i] = adc_j; + clusterADCs[i][j] = adc_j; adcSum += static_cast(adc_j); sumx += j * adc_j; @@ -431,19 +429,16 @@ namespace stripgpu { #endif auto clust_data_d = clusters_d_.view(); + findLeftRightBoundaryGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d); + cudaCheck(cudaGetLastError()); + #ifdef GPU_CHECK cudaDeviceSynchronize(); cudaCheck(cudaGetLastError()); #endif - cudaCheck(cudaMemcpyAsync(clusters_d_.nClustersPtr(), - &(clust_data_d->nClusters_), - sizeof(clust_data_d->nClusters_), - cudaMemcpyDeviceToHost, - stream)); - checkClusterConditionGPU<<>>(pt_sst_data_d_.get(), conditions, clust_data_d); cudaCheck(cudaGetLastError()); @@ -451,37 +446,6 @@ namespace stripgpu { cudaDeviceSynchronize(); cudaCheck(cudaGetLastError()); #endif - -#ifdef GPU_DEBUG - cudaStreamSynchronize(stream); - auto clust_data = std::make_unique(clusters_d_, stream); - cudaStreamSynchronize(stream); - - const auto clusterIndexLeft = clust_data->clusterIndex().get(); - const auto clusterSize = clust_data->clusterSize().get(); - const auto trueCluster = clust_data->trueCluster().get(); - const auto clusterADCs = clust_data->clusterADCs().get(); - const auto detids = clust_data->clusterDetId().get(); - const auto charge = clust_data->charge().get(); - - const auto nSeedStripsNC = clusters_d_.nClusters(); - std::cout << "findClusterGPU nSeedStripsNC=" << nSeedStripsNC << std::endl; - - for (auto i = 0U; i < nSeedStripsNC; i++) { - if (trueCluster[i]) { - int left = clusterIndexLeft[i]; - uint32_t size = clusterSize[i]; - const auto detid = detids[i]; - std::cout << "i=" << i << " detId " << detid << " left " << left << " size " << size << " charge " << charge[i] - << ": "; - size = std::min(size, maxClusterSize_); - for (uint32_t j = 0; j < size; j++) { - std::cout << (unsigned int)clusterADCs[j * nSeedStripsNC + i] << " "; - } - std::cout << std::endl; - } - } -#endif } void SiStripRawToClusterGPUKernel::setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions, diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h index a9c88b340b623..1629c400e0f72 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.h @@ -2,7 +2,7 @@ #define RecoLocalTracker_SiStripClusterizer_plugins_SiStripRawToClusterGPUKernel_h #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" +#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersSoADevice.h" #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h" //#include "clusterGPU.cuh" @@ -45,7 +45,7 @@ namespace stripgpu { const SiStripClusterizerConditionsGPU& conditions, cudaStream_t stream); void copyAsync(cudaStream_t stream); - SiStripClustersCUDADevice getResults(cudaStream_t stream); + SiStripClustersSoADevice getResults(cudaStream_t stream); private: using ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView; @@ -67,7 +67,7 @@ namespace stripgpu { cms::cuda::host::unique_ptr sst_data_d_; cms::cuda::device::unique_ptr pt_sst_data_d_; - SiStripClustersCUDADevice clusters_d_; + SiStripClustersSoADevice clusters_d_; float channelThreshold_, seedThreshold_, clusterThresholdSquared_; uint8_t maxSequentialHoles_, maxSequentialBad_, maxAdjacentBad_; uint32_t maxClusterSize_; diff --git a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h b/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h index 785b714376e2a..e5b9b894d1f52 100644 --- a/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h +++ b/RecoLocalTracker/SiStripClusterizer/plugins/StripDataView.h @@ -1,7 +1,6 @@ #ifndef RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h #define RecoLocalTracker_SiStripClusterizer_plugins_StripDataView_h -#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h" #include "FWCore/Utilities/interface/HostDeviceConstant.h" #include