cms-sw · cmsbuild · Mar 21, 2024 · Mar 18, 2024 · Mar 18, 2024 · Mar 18, 2024
diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h
@@ -27,15 +27,14 @@ class TrackingRecHitDevice : public PortableDeviceCollection<TrackingRecHitLayou
       : PortableDeviceCollection<TrackingRecHitLayout<TrackerTraits>, TDev>(nHits, queue), offsetBPIX2_{offsetBPIX2} {
     const auto device = alpaka::getDev(queue);
 
-    auto start_h = cms::alpakatools::make_host_view(hitsModuleStart, TrackerTraits::numberOfModules + 1);
+    auto start_h = cms::alpakatools::make_device_view(device, hitsModuleStart, TrackerTraits::numberOfModules + 1);
     auto start_d =
         cms::alpakatools::make_device_view(device, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1);
     alpaka::memcpy(queue, start_d, start_h);
 
-    auto off_h = cms::alpakatools::make_host_view(offsetBPIX2);
+    auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_);
 TrackingRecHitsSoACollection<TrackerTraits> hits_d(queue, nHits, offsetBPIX2, clusters_d->clusModuleStart()); 
 alpaka::exec<Acc1D>( 
     queue, 
     cms::alpakatools::make_workdiv<Acc1D>(1, 1), 
     [] ALPAKA_FN_ACC(Acc1D const &acc, 
                      OuterHitOfCell *isOuterHitOfCell, 
                      OuterHitOfCellContainer *container, 
                      int32_t const *offset) { 
       // this code runs on the device 
       isOuterHitOfCell->container = container; 
       isOuterHitOfCell->offset = *offset; 
     }, 
     this->isOuterHitOfCell_.data(), 
     this->device_isOuterHitOfCell_.data(), 
     &hh.offsetBPIX2()); 
 explicit TrackingRecHitSoADevice(uint32_t nHits, 
                                  int32_t offsetBPIX2, 
                                  ParamsOnGPU const* cpeParams, 
                                  uint32_t const* hitsModuleStart, 
                                  cudaStream_t stream) 
     : cms::cuda::PortableDeviceCollection<TrackingRecHitLayout<TrackerTraits>>(nHits, stream), 
       offsetBPIX2_(offsetBPIX2) { 
   cudaCheck(cudaMemcpyAsync(&(view().nHits()), &nHits, sizeof(uint32_t), cudaMemcpyDefault, stream)); 
   // hitsModuleStart is on Device 
   cudaCheck(cudaMemcpyAsync(view().hitsModuleStart().data(), 
                             hitsModuleStart, 
                             sizeof(uint32_t) * int(TrackerTraits::numberOfModules + 1), 
                             cudaMemcpyDefault, 
                             stream)); 
   cudaCheck(cudaMemcpyAsync(&(view().offsetBPIX2()), &offsetBPIX2, sizeof(int32_t), cudaMemcpyDefault, stream)); 
   // cpeParams argument is a pointer to device memory, copy 
   // its contents into the Layout. 
   cudaCheck(cudaMemcpyAsync(&(view().cpeParams()), cpeParams, int(sizeof(ParamsOnGPU)), cudaMemcpyDefault, stream)); 
 } 
 TrackingRecHitsSoACollection<TrackerTraits> hits_d(queue, nHits, offsetBPIX2, clusters_d->clusModuleStart()); 
 alpaka::exec<Acc1D>( 
     queue, 
     cms::alpakatools::make_workdiv<Acc1D>(1, 1), 
     [] ALPAKA_FN_ACC(Acc1D const &acc, 
                      OuterHitOfCell *isOuterHitOfCell, 
                      OuterHitOfCellContainer *container, 
                      int32_t const *offset) { 
       // this code runs on the device 
       isOuterHitOfCell->container = container; 
       isOuterHitOfCell->offset = *offset; 
     }, 
     this->isOuterHitOfCell_.data(), 
     this->device_isOuterHitOfCell_.data(), 
     &hh.offsetBPIX2()); 
 explicit TrackingRecHitSoADevice(uint32_t nHits, 
                                  int32_t offsetBPIX2, 
                                  ParamsOnGPU const* cpeParams, 
                                  uint32_t const* hitsModuleStart, 
                                  cudaStream_t stream) 
     : cms::cuda::PortableDeviceCollection<TrackingRecHitLayout<TrackerTraits>>(nHits, stream), 
       offsetBPIX2_(offsetBPIX2) { 
   cudaCheck(cudaMemcpyAsync(&(view().nHits()), &nHits, sizeof(uint32_t), cudaMemcpyDefault, stream)); 
   // hitsModuleStart is on Device 
   cudaCheck(cudaMemcpyAsync(view().hitsModuleStart().data(), 
                             hitsModuleStart, 
                             sizeof(uint32_t) * int(TrackerTraits::numberOfModules + 1), 
                             cudaMemcpyDefault, 
                             stream)); 
   cudaCheck(cudaMemcpyAsync(&(view().offsetBPIX2()), &offsetBPIX2, sizeof(int32_t), cudaMemcpyDefault, stream)); 
  
   // cpeParams argument is a pointer to device memory, copy 
   // its contents into the Layout. 
   cudaCheck(cudaMemcpyAsync(&(view().cpeParams()), cpeParams, int(sizeof(ParamsOnGPU)), cudaMemcpyDefault, stream)); 
 } 
     auto off_d = cms::alpakatools::make_device_view(device, view().offsetBPIX2());
     alpaka::memcpy(queue, off_d, off_h);
-    alpaka::wait(queue);
   }
 
   uint32_t nHits() const { return view().metadata().size(); }

diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc
@@ -34,12 +34,15 @@ int main() {
     {
       uint32_t nHits = 2000;
       int32_t offset = 100;
-      uint32_t moduleStart[pixelTopology::Phase1::numberOfModules + 1];
-
+      auto moduleStartH =
+          cms::alpakatools::make_host_buffer<uint32_t[]>(queue, pixelTopology::Phase1::numberOfModules + 1);
       for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; ++i) {
-        moduleStart[i] = i * 2;
+        moduleStartH[i] = i * 2;
       }
-      TrackingRecHitsSoACollection<pixelTopology::Phase1> tkhit(queue, nHits, offset, moduleStart);
+      auto moduleStartD =
+          cms::alpakatools::make_device_buffer<uint32_t[]>(queue, pixelTopology::Phase1::numberOfModules + 1);
+      alpaka::memcpy(queue, moduleStartD, moduleStartH);
+      TrackingRecHitsSoACollection<pixelTopology::Phase1> tkhit(queue, nHits, offset, moduleStartD.data());
 
       testTrackingRecHitSoA::runKernels<pixelTopology::Phase1>(tkhit.view(), queue);
       tkhit.updateFromDevice(queue);

diff --git a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h
@@ -257,7 +257,7 @@ namespace cms::alpakatools {
                            nOnes,
                            nblocks,
                            ppsws,
-                           alpaka::getWarpSizes(alpaka::getDev(queue))[0]);
+                           alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
       } else {
         h->finalize();
       }

diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc
@@ -147,7 +147,7 @@ int main() {
   for (auto const& device : devices) {
     std::cout << "Test prefix scan on " << alpaka::getName(device) << '\n';
     auto queue = Queue(device);
-    const auto warpSize = alpaka::getWarpSizes(device)[0];
+    const auto warpSize = alpaka::getPreferredWarpSize(device);
     // WARP PREFIXSCAN (OBVIOUSLY GPU-ONLY)
     if constexpr (!requires_single_thread_per_block_v<Acc1D>) {
       std::cout << "warp level" << std::endl;

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc
@@ -137,12 +137,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   template <typename TrackerTraits>
   void SiPixelRawToCluster<TrackerTraits>::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) {
-    [[maybe_unused]] auto const& hMap = iSetup.getData(mapToken_);
+    auto const& hMap = iSetup.getData(mapToken_);
     auto const& dGains = iSetup.getData(gainsToken_);
-    auto gains = SiPixelGainCalibrationForHLTDevice(1, iEvent.queue());
-    auto modulesToUnpackRegional =
-        cms::alpakatools::make_device_buffer<unsigned char[]>(iEvent.queue(), ::pixelgpudetails::MAX_SIZE);
-    const unsigned char* modulesToUnpack;
+
     // initialize cabling map or update if necessary
     if (recordWatcher_.check(iSetup)) {
       // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel)
@@ -151,6 +148,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       cabling_ = cablingMap_->cablingTree();
       LogDebug("map version:") << cablingMap_->version();
     }
+
+    // if used, the buffer is guaranteed to stay alive until the after the execution of makePhase1ClustersAsync completes
+    std::optional<cms::alpakatools::device_buffer<Device, unsigned char[]>> modulesToUnpackRegional;
+    const unsigned char* modulesToUnpack;
     if (regions_) {
       regions_->run(iEvent, iSetup);
       LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs();
@@ -159,7 +160,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
       modulesToUnpackRegional = SiPixelMappingUtilities::getModToUnpRegionalAsync(
           *(regions_->modulesToUnpack()), cabling_.get(), fedIds_, iEvent.queue());
-      modulesToUnpack = modulesToUnpackRegional.data();
+      modulesToUnpack = modulesToUnpackRegional->data();
     } else {
       modulesToUnpack = hMap->modToUnpDefault();
     }
@@ -235,7 +236,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       return;
 
     // copy the FED data to a single cpu buffer
-    pixelDetails::WordFedAppender wordFedAppender(nDigis_);
+    pixelDetails::WordFedAppender wordFedAppender(iEvent.queue(), nDigis_);
     for (uint32_t i = 0; i < fedIds_.size(); ++i) {
       wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]);
     }

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h
@@ -122,12 +122,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     class WordFedAppender {
     public:
-      WordFedAppender();
-      ~WordFedAppender() = default;
-
-      WordFedAppender(uint32_t words)
-          : word_{cms::alpakatools::make_host_buffer<unsigned int[], Platform>(words)},
-            fedId_{cms::alpakatools::make_host_buffer<unsigned char[], Platform>(words)} {};
+      WordFedAppender(Queue& queue, uint32_t words)
+          : word_{cms::alpakatools::make_host_buffer<unsigned int[]>(queue, words)},
+            fedId_{cms::alpakatools::make_host_buffer<unsigned char[]>(queue, words)} {};
 
       void initializeWordFed(int fedId, unsigned int wordCounterGPU, const uint32_t* src, unsigned int length) {
         std::memcpy(word_.data() + wordCounterGPU, src, sizeof(uint32_t) * length);