cms-patatrack · fwyzard · Dec 7, 2018 · Dec 3, 2018 · Dec 4, 2018
diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h
@@ -28,33 +28,20 @@ class SiPixelFedCablingMapGPUWrapper {
   // returns pointer to GPU memory
   const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
 
+  // returns pointer to GPU memory
+  const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
+  edm::cuda::device::unique_ptr<unsigned char[]> getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const;
 
-  // Allocates host and device memory, converts data to host memory,
-  // copies host memory to device memory asynchronously. It is the
-  // caller's responsibility to have this object to live until all
-  // operations on the device memory have completed.
-  class ModulesToUnpack {
-  public:
-    ModulesToUnpack(cuda::stream_t<>& cudaStream);
-    ~ModulesToUnpack() = default;
-
-    void fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream);
-
-    const unsigned char *get() const { return modToUnpDevice.get(); }
-
-  private:
-    edm::cuda::device::unique_ptr<unsigned char[]> modToUnpDevice;
-    edm::cuda::host::unique_ptr<unsigned char[]> modToUnpHost;
-  };
-
 private:
+  const SiPixelFedCablingMap *cablingMap_;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  fedMap;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  linkMap;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  rocMap;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  RawId;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  rocInDet;
   std::vector<unsigned int,  CUDAHostAllocator<unsigned int>>  moduleId;
   std::vector<unsigned char, CUDAHostAllocator<unsigned char>> badRocs;
+  std::vector<unsigned char, CUDAHostAllocator<unsigned char>> modToUnpDefault;
   unsigned int size;
   bool hasQuality_;
 
@@ -64,6 +51,12 @@ class SiPixelFedCablingMapGPUWrapper {
     SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU
   };
   CUDAESProduct<GPUData> gpuData_;
+
+  struct ModulesToUnpack {
+    ~ModulesToUnpack();
+    unsigned char *modToUnpDefault = nullptr; // pointer to GPU
+  };
+  CUDAESProduct<ModulesToUnpack> modToUnp_;
 };
 
 

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc
@@ -132,7 +132,6 @@ std::unique_ptr<PixelUnpackingRegions> regions_;
 
   edm::ESWatcher<SiPixelFedCablingMapRcd> recordWatcher;
   edm::ESWatcher<SiPixelQualityRcd> qualityWatcher;
-  bool recordWatcherUpdatedSinceLastTransfer_ = false;
 
   bool usePilotBlade;
   bool usePhase1;
@@ -276,7 +275,6 @@ const FEDRawDataCollection *SiPixelRawToClusterHeterogeneous::initialize(const e
     fedIds   = cablingMap->fedIds();
     cabling_ = cablingMap->cablingTree();
     LogDebug("map version:")<< cabling_->version();
-    recordWatcherUpdatedSinceLastTransfer_ = true;
   }
   // initialize quality record or update if necessary
   if (qualityWatcher.check( es )&&useQuality) {
@@ -462,17 +460,6 @@ void SiPixelRawToClusterHeterogeneous::produceCPU(edm::HeterogeneousEvent& ev, c
 void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent& ev, const edm::EventSetup& es, cuda::stream_t<>& cudaStream) {
   const auto buffers = initialize(ev.event(), es);
 
-  auto gpuModulesToUnpack = SiPixelFedCablingMapGPUWrapper::ModulesToUnpack(cudaStream);
-  if (regions_) {
-    std::set<unsigned int> modules = *(regions_->modulesToUnpack());
-    gpuModulesToUnpack.fillAsync(*cablingMap_, modules, cudaStream);
-  }
-  else if(recordWatcherUpdatedSinceLastTransfer_) {
-    // If regions_ are disabled, it is enough to fill and transfer only if cablingMap has changed
-    gpuModulesToUnpack.fillAsync(*cablingMap_, std::set<unsigned int>(), cudaStream);
-    recordWatcherUpdatedSinceLastTransfer_ = false;
-  }
-
   edm::ESHandle<SiPixelFedCablingMapGPUWrapper> hgpuMap;
   es.get<CkfComponentsRecord>().get(hgpuMap);
   if(hgpuMap->hasQuality() != useQuality) {
@@ -481,6 +468,17 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
   // get the GPU product already here so that the async transfer can begin
   const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
 
+  edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
+  const unsigned char *gpuModulesToUnpack;
+  if (regions_) {
+    modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), cudaStream);
+    gpuModulesToUnpack = modulesToUnpackRegional.get();
+  }
+  else {
+    gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(cudaStream);
+  }
+
+
   edm::ESHandle<SiPixelGainCalibrationForHLTGPU> hgains;
   es.get<SiPixelGainCalibrationForHLTGPURcd>().get(hgains);
 
@@ -548,7 +546,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
 
   } // end of for loop
 
-  gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack.get(), hgains->getGPUProductAsync(cudaStream),
+  gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, hgains->getGPUProductAsync(cudaStream),
                              wordFedAppender,
                              wordCounterGPU, fedCounter, convertADCtoElectrons,
                              useQuality, includeErrors, enableTransfer_, debug, cudaStream);

diff --git a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc
@@ -21,9 +21,10 @@
 SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCablingMap const& cablingMap,
                                                                TrackerGeometry const& trackerGeom,
                                                                SiPixelQuality const *badPixelInfo):
+  cablingMap_(&cablingMap),
   fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE),
   RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE),
-  badRocs(pixelgpudetails::MAX_SIZE),
+  badRocs(pixelgpudetails::MAX_SIZE), modToUnpDefault(pixelgpudetails::MAX_SIZE),
   hasQuality_(badPixelInfo != nullptr)
 {
   std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
@@ -46,6 +47,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
         if (pixelRoc != nullptr) {
           RawId[index] = pixelRoc->rawId();
           rocInDet[index] = pixelRoc->idInDetUnit();
+          modToUnpDefault[index] = false;
           if (badPixelInfo != nullptr)
             badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit());
           else
@@ -54,6 +56,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
           RawId[index] = 9999;
           rocInDet[index] = 9999;
           badRocs[index] = true;
+          modToUnpDefault[index] = true;
         }
         index++;
       }
@@ -124,16 +127,21 @@ const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsyn
   return data.cablingMapDevice;
 }
 
-SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack(cuda::stream_t<>& cudaStream)
-{
-  edm::Service<CUDAService> cs;
-  modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
-  modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
+const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {
+  const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cuda::stream_t<>& stream) {
+      cudaCheck(cudaMalloc((void**) & data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL));
+      cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id()));
+    });
+  return data.modToUnpDefault;
 }
 
-void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) {
-  std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
-  std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap.cablingTree();
+edm::cuda::device::unique_ptr<unsigned char[]> SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const {
+  edm::Service<CUDAService> cs;
+  auto modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
+  auto modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
+
+  std::vector<unsigned int> const& fedIds = cablingMap_->fedIds();
+  std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap_->cablingTree();
 
   unsigned int startFed = *(fedIds.begin());
   unsigned int endFed   = *(fedIds.end() - 1);
@@ -157,6 +165,7 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin
   }
 
   cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.get(), pixelgpudetails::MAX_SIZE * sizeof(unsigned char), cudaStream.id());
+  return modToUnpDevice;
 }
 
 
@@ -173,3 +182,7 @@ SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() {
   }
   cudaCheck(cudaFree(cablingMapDevice));
 }
+
+SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::~ModulesToUnpack() {
+  cudaCheck(cudaFree(modToUnpDefault));
+}