Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,20 @@ class SiPixelFedCablingMapGPUWrapper {
// returns pointer to GPU memory
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;

// returns pointer to GPU memory
const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
edm::cuda::device::unique_ptr<unsigned char[]> getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const;

// Allocates host and device memory, converts data to host memory,
// copies host memory to device memory asynchronously. It is the
// caller's responsibility to have this object to live until all
// operations on the device memory have completed.
class ModulesToUnpack {
public:
ModulesToUnpack(cuda::stream_t<>& cudaStream);
~ModulesToUnpack() = default;

void fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream);

const unsigned char *get() const { return modToUnpDevice.get(); }

private:
edm::cuda::device::unique_ptr<unsigned char[]> modToUnpDevice;
edm::cuda::host::unique_ptr<unsigned char[]> modToUnpHost;
};

private:
const SiPixelFedCablingMap *cablingMap_;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> fedMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> linkMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> RawId;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocInDet;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> moduleId;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> badRocs;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> modToUnpDefault;
unsigned int size;
bool hasQuality_;

Expand All @@ -64,6 +51,12 @@ class SiPixelFedCablingMapGPUWrapper {
SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU
};
CUDAESProduct<GPUData> gpuData_;

struct ModulesToUnpack {
~ModulesToUnpack();
unsigned char *modToUnpDefault = nullptr; // pointer to GPU
};
CUDAESProduct<ModulesToUnpack> modToUnp_;
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ std::unique_ptr<PixelUnpackingRegions> regions_;

edm::ESWatcher<SiPixelFedCablingMapRcd> recordWatcher;
edm::ESWatcher<SiPixelQualityRcd> qualityWatcher;
bool recordWatcherUpdatedSinceLastTransfer_ = false;

bool usePilotBlade;
bool usePhase1;
Expand Down Expand Up @@ -276,7 +275,6 @@ const FEDRawDataCollection *SiPixelRawToClusterHeterogeneous::initialize(const e
fedIds = cablingMap->fedIds();
cabling_ = cablingMap->cablingTree();
LogDebug("map version:")<< cabling_->version();
recordWatcherUpdatedSinceLastTransfer_ = true;
}
// initialize quality record or update if necessary
if (qualityWatcher.check( es )&&useQuality) {
Expand Down Expand Up @@ -462,17 +460,6 @@ void SiPixelRawToClusterHeterogeneous::produceCPU(edm::HeterogeneousEvent& ev, c
void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent& ev, const edm::EventSetup& es, cuda::stream_t<>& cudaStream) {
const auto buffers = initialize(ev.event(), es);

auto gpuModulesToUnpack = SiPixelFedCablingMapGPUWrapper::ModulesToUnpack(cudaStream);
if (regions_) {
std::set<unsigned int> modules = *(regions_->modulesToUnpack());
gpuModulesToUnpack.fillAsync(*cablingMap_, modules, cudaStream);
}
else if(recordWatcherUpdatedSinceLastTransfer_) {
// If regions_ are disabled, it is enough to fill and transfer only if cablingMap has changed
gpuModulesToUnpack.fillAsync(*cablingMap_, std::set<unsigned int>(), cudaStream);
recordWatcherUpdatedSinceLastTransfer_ = false;
}

edm::ESHandle<SiPixelFedCablingMapGPUWrapper> hgpuMap;
es.get<CkfComponentsRecord>().get(hgpuMap);
if(hgpuMap->hasQuality() != useQuality) {
Expand All @@ -481,6 +468,17 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
// get the GPU product already here so that the async transfer can begin
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);

edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
const unsigned char *gpuModulesToUnpack;
if (regions_) {
modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), cudaStream);
gpuModulesToUnpack = modulesToUnpackRegional.get();
}
else {
gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(cudaStream);
}


edm::ESHandle<SiPixelGainCalibrationForHLTGPU> hgains;
es.get<SiPixelGainCalibrationForHLTGPURcd>().get(hgains);

Expand Down Expand Up @@ -548,7 +546,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv

} // end of for loop

gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack.get(), hgains->getGPUProductAsync(cudaStream),
gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, hgains->getGPUProductAsync(cudaStream),
wordFedAppender,
wordCounterGPU, fedCounter, convertADCtoElectrons,
useQuality, includeErrors, enableTransfer_, debug, cudaStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCablingMap const& cablingMap,
TrackerGeometry const& trackerGeom,
SiPixelQuality const *badPixelInfo):
cablingMap_(&cablingMap),
fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE),
RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE),
badRocs(pixelgpudetails::MAX_SIZE),
badRocs(pixelgpudetails::MAX_SIZE), modToUnpDefault(pixelgpudetails::MAX_SIZE),
hasQuality_(badPixelInfo != nullptr)
{
std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
Expand All @@ -46,6 +47,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
if (pixelRoc != nullptr) {
RawId[index] = pixelRoc->rawId();
rocInDet[index] = pixelRoc->idInDetUnit();
modToUnpDefault[index] = false;
if (badPixelInfo != nullptr)
badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit());
else
Expand All @@ -54,6 +56,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
RawId[index] = 9999;
rocInDet[index] = 9999;
badRocs[index] = true;
modToUnpDefault[index] = true;
}
index++;
}
Expand Down Expand Up @@ -124,16 +127,21 @@ const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsyn
return data.cablingMapDevice;
}

SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack(cuda::stream_t<>& cudaStream)
{
edm::Service<CUDAService> cs;
modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {
const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cuda::stream_t<>& stream) {
cudaCheck(cudaMalloc((void**) & data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL));
cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id()));
});
return data.modToUnpDefault;
}

void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) {
std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap.cablingTree();
edm::cuda::device::unique_ptr<unsigned char[]> SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const {
edm::Service<CUDAService> cs;
auto modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
auto modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);

std::vector<unsigned int> const& fedIds = cablingMap_->fedIds();
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap_->cablingTree();

unsigned int startFed = *(fedIds.begin());
unsigned int endFed = *(fedIds.end() - 1);
Expand All @@ -157,6 +165,7 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin
}

cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.get(), pixelgpudetails::MAX_SIZE * sizeof(unsigned char), cudaStream.id());
return modToUnpDevice;
}


Expand All @@ -173,3 +182,7 @@ SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() {
}
cudaCheck(cudaFree(cablingMapDevice));
}

SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::~ModulesToUnpack() {
cudaCheck(cudaFree(modToUnpDefault));
}