diff --git a/Geometry/CommonTopologies/interface/SimplePixelTopology.h b/Geometry/CommonTopologies/interface/SimplePixelTopology.h index b37b9636fff14..4983908d22e50 100644 --- a/Geometry/CommonTopologies/interface/SimplePixelTopology.h +++ b/Geometry/CommonTopologies/interface/SimplePixelTopology.h @@ -349,6 +349,9 @@ namespace pixelTopology { static constexpr uint16_t last_barrel_detIndex = 504; static constexpr uint32_t maxPixInModule = 6000; + static constexpr uint32_t maxPixInModuleForMorphing = 0; + static constexpr uint32_t maxIterClustering = 16; + static constexpr uint32_t maxNumClustersPerModules = phase2PixelTopology::maxNumClustersPerModules; static constexpr uint32_t maxHitsInModule = phase2PixelTopology::maxNumClustersPerModules; @@ -443,6 +446,9 @@ namespace pixelTopology { static constexpr uint16_t last_barrel_detIndex = 1184; static constexpr uint32_t maxPixInModule = 6000; + static constexpr uint32_t maxPixInModuleForMorphing = maxPixInModule * 2 / 5; + static constexpr uint32_t maxIterClustering = 24; + static constexpr uint32_t maxNumClustersPerModules = phase1PixelTopology::maxNumClustersPerModules; static constexpr uint32_t maxHitsInModule = phase1PixelTopology::maxNumClustersPerModules; @@ -554,6 +560,8 @@ namespace pixelTopology { static constexpr uint32_t maxNumberOfQuadruplets = maxNumberOfTuples; static constexpr uint32_t maxPixInModule = 10000; + static constexpr uint32_t maxPixInModuleForMorphing = maxPixInModule * 1 / 10; + static constexpr uint32_t maxIterClustering = 32; static constexpr uint32_t maxNumOfActiveDoublets = maxNumberOfDoublets / 4; // TODO need to think a better way to avoid this duplication diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index 1169eded95d8e..33481ba71621e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -58,7 +58,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { constexpr uint32_t pixelSizeX = pixelTopology::Phase1::numRowsInModule; // 2 x 80 = 160 constexpr uint32_t pixelSizeY = pixelTopology::Phase1::numColsInModule; // 8 x 52 = 416 - enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; + enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03, kFake = 0x02 }; // 2-bit per pixel Status packed in 32-bit words constexpr uint32_t bits = 2; @@ -114,6 +114,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { return new_status; } + ALPAKA_FN_ACC + inline bool isMorphingModule(uint32_t moduleId, const uint32_t* morphingModules, uint32_t nMorphingModules) { + // Binary search for moduleId in sorted morphingModules + int left = 0, right = static_cast(nMorphingModules) - 1; + while (left <= right) { + int mid = left + (right - left) / 2; + uint32_t val = morphingModules[mid]; + if (val == moduleId) + return true; + if (val < moduleId) + left = mid + 1; + else + right = mid - 1; + } + return false; + } } // namespace pixelStatus template @@ -158,31 +174,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { template struct FindClus { // assume that we can cover the whole module with up to 16 blockDimension-wide iterations - static constexpr uint32_t maxIterGPU = 16; - // this must be larger than maxPixInModule / maxIterGPU, and should be a multiple of the warp size + // this must be larger than maxPixInModule / maxIterClustering, and should be a multiple of the warp size static constexpr uint32_t maxElementsPerBlock = - cms::alpakatools::round_up_by(TrackerTraits::maxPixInModule / maxIterGPU, 128); + cms::alpakatools::round_up_by(TrackerTraits::maxPixInModule / TrackerTraits::maxIterClustering, 64); + static constexpr uint32_t maxElementsPerBlockMorph = cms::alpakatools::round_up_by( + (TrackerTraits::maxPixInModule + TrackerTraits::maxPixInModuleForMorphing) / TrackerTraits::maxIterClustering, + 64); + static_assert(maxElementsPerBlockMorph >= maxElementsPerBlock); ALPAKA_FN_ACC void operator()(Acc1D const& acc, SiPixelDigisSoAView digi_view, + SiPixelDigisSoAView fakes_view, + bool enableDigiMorphing, + uint32_t* morphingModules, + uint32_t nMorphingModules, + uint32_t maxFakesInModule, SiPixelClustersSoAView clus_view, const unsigned int numElements) const { static_assert(TrackerTraits::numberOfModules < ::pixelClustering::maxNumModules); auto& lastPixel = alpaka::declareSharedVar(acc); + auto& fakePixels = alpaka::declareSharedVar(acc); #ifdef GPU_DEBUG auto& goodPixels = alpaka::declareSharedVar(acc); #endif const uint32_t lastModule = clus_view[0].moduleStart(); for (uint32_t module : cms::alpakatools::independent_groups(acc, lastModule)) { + auto block = alpaka::getIdx(acc)[0u]; + auto firstPixel = clus_view[1 + module].moduleStart(); uint32_t thisModuleId = digi_view[firstPixel].moduleId(); + uint32_t rawModuleId = digi_view[firstPixel].rawIdArr(); + bool applyDigiMorphing = + enableDigiMorphing && pixelStatus::isMorphingModule(rawModuleId, morphingModules, nMorphingModules); ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG - auto block = alpaka::getIdx(acc)[0u]; if (thisModuleId % 100 == 1) if (cms::alpakatools::once_per_block(acc)) printf("start clusterizer for module %4d in block %4d\n", thisModuleId, block); @@ -192,6 +221,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // Note: modules are not consecutive in clus_view, so we cannot use something like // lastPixel = (module + 1 == lastModule) ? numElements : clus_view[2 + module].moduleStart(); lastPixel = numElements; + const uint32_t firstFake = maxFakesInModule * block; + fakePixels = 0; #ifdef GPU_DEBUG goodPixels = 0; #endif @@ -209,11 +240,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } - using Hist = cms::alpakatools::HistoContainer; + // clear the fake pixels + if (applyDigiMorphing) { + // Assume no more than `maxFakesInModule` fake (recovered) pixels per module. + // The fake pixels for the `module`-th module are stored in `fakes_view[...]` starting at index `firstFake`, + // equal to `maxFakesInModule * module`. + ALPAKA_ASSERT_ACC(static_cast(fakes_view.metadata().size()) >= firstFake + maxFakesInModule); + for (uint32_t i : + cms::alpakatools::independent_group_elements(acc, firstFake, firstFake + maxFakesInModule)) { + // The cluster id of the fake pixels do not need to be unique across modules, as they are transient and only + // using within each module. They can start independently at `numElements` on each module. + // This ensures that when comparing the cluser id of a valid pixel with a fake pixel (with `atomicMin`) the + // resulting cluster id will be the one of the valid pixel. + fakes_view[i] = {static_cast(numElements + i), // cluster id + 0, // pdigi + 0, // rawIdArr + 0, // adc + 0, // xx + 0, // yy + ::pixelClustering::invalidModuleId}; // moduleId + } + } + + using Hist = + cms::alpakatools::HistoContainer; constexpr int warpSize = cms::alpakatools::warpSize; auto& hist = alpaka::declareSharedVar(acc); auto& ws = alpaka::declareSharedVar(acc); @@ -273,6 +327,173 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } alpaka::syncBlockThreads(acc); + + // apply the digi morphing recovery algorithm + if (applyDigiMorphing) { + using namespace pixelStatus; + constexpr uint32_t rowSize = pixelSizeX / valuesPerWord; // 160 / 16 = 10 words per row + + // Mark all duplicate pixels as empty in the image, to let the morphing attempt to recover them. + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, size)) { + uint32_t value = image[i]; + // Duplicate pixels are marked as kDuplicate = 0b11. + // Identify them from the high bit, and if they are found remove both bits. + uint32_t masked = value & 0b10'10'10'10'10'10'10'10'10'10'10'10'10'10'10'10; + masked |= (masked >> 1); + value &= ~masked; + image[i] = value; + } + alpaka::syncBlockThreads(acc); + + // use the status buffer as a 2-bit-per-pixel image, with 16 pixels packed in each 32-bit word + // ...... ............................................... ..... + // ....## ##.##.##.##.##.##.##.##.##.##.##.##.##.##.##.## ##... + // ....## [##.##.##.##.##.##.##.##.##.##.##.##.##.##.##.##] ##... + // ....## ##.##.##.##.##.##.##.##.##.##.##.##.##.##.##.## ##... + // ...... ............................................... ..... + + // first step: expand and mark expanded pixels as kFake + // size = pixelSizeX * pixelSizeY / valuesPerWord; // 160 x 416 / 16 = 4160 32-bit words + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, size)) { + uint16_t x = i % rowSize * valuesPerWord; // 0..9 x 16 = 0, 16, 32, ..., 144 + uint16_t y = i / rowSize; // 0..4159 / 10 = 0..415 + uint32_t value = image[i]; + uint64_t buffer = static_cast(value) << 2; + if (y > 0) { + // merge the word above + buffer |= static_cast(image[i - rowSize]) << 2; + } + if (y < pixelSizeY - 1) { + // merge the word below + buffer |= static_cast(image[i + rowSize]) << 2; + } + if (x > 0) { + // extract the pixels from the previous column, and merge them in the buffer + buffer |= static_cast(image[i - 1]) >> 30 & mask; + if (y > 0) + buffer |= static_cast(image[i - rowSize - 1]) >> 30 & mask; + if (y < pixelSizeY - 1) + buffer |= static_cast(image[i + rowSize - 1]) >> 30 & mask; + } + if (x < pixelSizeX - valuesPerWord) { + // extract the pixels from the following column, and merge them in the buffer + buffer |= static_cast(image[i + 1] & mask) << 34; + if (y > 0) + buffer |= static_cast(image[i - rowSize + 1] & mask) << 34; + if (y < pixelSizeY - 1) + buffer |= static_cast(image[i + rowSize + 1] & mask) << 34; + } + // mark kEmpty pixels as kFake if any neighbour is non-empty (kFound or kDuplicate) + for (uint32_t j = 0; j < valuesPerWord; ++j) { + uint32_t shift = j * 2; + // skip non-empty pixels + if (Status{(value >> shift) & mask} != kEmpty) { + continue; + } + // extract the kFound or kDuplicate status of the three columns of pixels in the buffer + if (((buffer >> shift) & 0b010101) != 0) { + // set the status of the non-edge pixel in the word to kFake + value |= kFake << shift; + } + } + // store the result back into the buffer + image[i] = value; + } + alpaka::syncBlockThreads(acc); + + // second step: erode, and create new fake pixels for the remaining ones + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, size)) { + uint16_t x = i % rowSize * valuesPerWord; // 0..9 x 16 = 0, 16, 32, ..., 144 + uint16_t y = i / rowSize; // 0..4159 / 10 = 0..415 + uint32_t value = image[i]; + uint32_t above = (y > 0) ? (image[i - rowSize]) : 0; + uint32_t below = (y < pixelSizeY - 1) ? image[i + rowSize] : 0; + // First pixel (j = 0) + { + // shift = 0 + // Process only fake (recovered) pixels. + if ((value & mask) == kFake) { + // If there are no pixels on the edge, pretend it is a fake (recovered) one. + Status edge = (x > 0) ? Status{image[i - 1] >> ((valuesPerWord - 1) * bits) & mask} : kFake; + // Check that the pixels to the left, above, below, and to the right are not empty. + if (edge != kEmpty and (above & mask) != kEmpty and (below & mask) != kEmpty and + (value >> bits & mask) != kEmpty) { + // Create a fake pixel, up to maxFakesInModule pixels per module. + unsigned int index = + alpaka::atomicInc(acc, &fakePixels, 0xffffffff, alpaka::hierarchy::Threads{}); + if (index < maxFakesInModule) { + auto fake = fakes_view[firstFake + index]; + ALPAKA_ASSERT_ACC(fake.clus() == static_cast(numElements + firstFake + index)); + fake.xx() = x; + fake.yy() = y; + fake.moduleId() = thisModuleId; + } else { + printf("Too many pixels recovered by digi morphing in module %u: %u > %u\n", + thisModuleId, + index, + maxFakesInModule); + } + } + } + } + // Non-edge pixels (j = 1..14) + for (uint32_t j = 1; j < valuesPerWord - 1; ++j) { + uint32_t shift = j * bits; + // Process only fake (recovered) pixels. + if ((value >> shift & mask) == kFake) { + // Check that the pixels to the left, above, below, and to the right are not empty. + if ((value >> (shift - bits) & mask) != kEmpty and (above >> shift & mask) != kEmpty and + (below >> shift & mask) != kEmpty and (value >> (shift + bits) & mask) != kEmpty) { + // Create a fake pixel, up to maxFakesInModule pixels per module. + unsigned int index = + alpaka::atomicInc(acc, &fakePixels, 0xffffffff, alpaka::hierarchy::Threads{}); + if (index < maxFakesInModule) { + auto fake = fakes_view[firstFake + index]; + ALPAKA_ASSERT_ACC(fake.clus() == static_cast(numElements + firstFake + index)); + fake.xx() = x + j; + fake.yy() = y; + fake.moduleId() = thisModuleId; + } else { + printf("Too many pixels recovered by digi morphing in module %u: %u > %u\n", + thisModuleId, + index, + maxFakesInModule); + } + } + } + } + // Last pixel (j = 15) + { + uint32_t shift = ((valuesPerWord - 1) * bits); + // Process only fake (recovered) pixels. + if ((value >> shift & mask) == kFake) { + // If there are no pixels on the edge, pretend it is a fake (recovered) one. + Status edge = (x < pixelSizeX - valuesPerWord) ? Status{image[i + 1] & mask} : kFake; + // Check that the pixels to the left, above, below, and to the right are not empty. + if ((value >> (shift - bits) & mask) != kEmpty and (above >> shift & mask) != kEmpty and + (below >> shift & mask) != kEmpty and edge != kEmpty) { + // Create a fake pixel, up to maxFakesInModule pixels per module. + unsigned int index = + alpaka::atomicInc(acc, &fakePixels, 0xffffffff, alpaka::hierarchy::Threads{}); + if (index < maxFakesInModule) { + auto fake = fakes_view[firstFake + index]; + ALPAKA_ASSERT_ACC(fake.clus() == static_cast(numElements + firstFake + index)); + fake.xx() = x + valuesPerWord - 1; + fake.yy() = y; + fake.moduleId() = thisModuleId; + } else { + printf("Too many pixels recovered by digi morphing in module %u: %u > %u\n", + thisModuleId, + index, + maxFakesInModule); + } + } + } + } + } + alpaka::syncBlockThreads(acc); + + } // if (applyDigiMorphing) } // if (lastPixel > 1) } // if constexpr (not isPhase2) @@ -286,6 +507,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { #endif } } + if (applyDigiMorphing) { + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstFake, firstFake + fakePixels)) { + hist.count(acc, fakes_view[i].yy()); + } + } for (uint32_t i : cms::alpakatools::independent_group_elements(acc, warpSize)) { ws[i] = 0; // used by prefix scan... } @@ -294,9 +520,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { alpaka::syncBlockThreads(acc); #ifdef GPU_DEBUG - ALPAKA_ASSERT_ACC(hist.size() == goodPixels); + ALPAKA_ASSERT_ACC(hist.size() == goodPixels + fakePixels); if (thisModuleId % 100 == 1) { if (cms::alpakatools::once_per_block(acc)) { + printf( + "module %d has %d good pixels and recovered %d pixels by morphing\n", module, goodPixels, fakePixels); printf("histo size %d\n", hist.size()); } } @@ -309,6 +537,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { hist.fill(acc, digi_view[i].yy(), i - firstPixel); } } + if (applyDigiMorphing) { + // For fake pixels, the id used in the histogram is `i - firstFake + TrackerTraits::maxPixInModule`, so + // in the range `[TrackerTraits::maxPixInModule, `TrackerTraits::maxPixInModule + maxFakesInModule`). + // This ensures that the fake pixels have different ids from the valid pixels. + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstFake, firstFake + fakePixels)) { + hist.fill(acc, fakes_view[i].yy(), i - firstFake + TrackerTraits::maxPixInModule); + } + } #ifdef GPU_DEBUG // look for anomalous high occupancy @@ -336,15 +572,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { #endif [[maybe_unused]] const uint32_t blockDimension = alpaka::getWorkDiv(acc)[0u]; - // assume that we can cover the whole module with up to maxIterGPU blockDimension-wide iterations - ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < maxIterGPU); + // assume that we can cover the whole module with up to maxIterClustering blockDimension-wide iterations + ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < TrackerTraits::maxIterClustering); // number of elements per thread - constexpr uint32_t maxElements = - cms::alpakatools::requires_single_thread_per_block_v ? maxElementsPerBlock : 1; + const uint32_t maxElements = cms::alpakatools::requires_single_thread_per_block_v + ? (enableDigiMorphing ? maxElementsPerBlockMorph : maxElementsPerBlock) + : 1; + +#ifdef GPU_DEBUG + const auto nElementsPerThread = alpaka::getWorkDiv(acc)[0u]; + if (nElementsPerThread > maxElements) + printf("This is WRONG: nElementsPerThread > maxElements : %d > %d\n", nElementsPerThread, maxElements); + else if (thisModuleId % 500 == 1) + printf("This is OK: nElementsPerThread <= maxElements : %d <= %d\n", nElementsPerThread, maxElements); +#endif + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0u] <= maxElements)); - constexpr unsigned int maxIter = maxIterGPU * maxElements; + const unsigned int maxIter = TrackerTraits::maxIterClustering * maxElements; // nearest neighbours (nn) constexpr int maxNeighbours = 8; @@ -361,19 +607,52 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; - auto i = *p + firstPixel; - ALPAKA_ASSERT_ACC(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); - ALPAKA_ASSERT_ACC(digi_view[i].moduleId() == thisModuleId); // same module - auto bin = Hist::bin(digi_view[i].yy() + 1); - auto end = hist.end(bin); + bool isFake = (*p >= TrackerTraits::maxPixInModule); + uint32_t i; + uint16_t ix, iy; + const uint16_t* end; + if (applyDigiMorphing and isFake) { + // recovered "fake" pixel + i = *p - TrackerTraits::maxPixInModule + firstFake; + auto pixel = fakes_view[i]; + ALPAKA_ASSERT_ACC(pixel.moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_ACC(pixel.moduleId() == thisModuleId); // same module + ix = pixel.xx(); + iy = pixel.yy(); + auto bin = Hist::bin(iy + 1); + end = hist.end(bin); + } else { + // real pixel + i = *p + firstPixel; + auto pixel = digi_view[i]; + ALPAKA_ASSERT_ACC(pixel.moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_ACC(pixel.moduleId() == thisModuleId); // same module + ix = pixel.xx(); + iy = pixel.yy(); + auto bin = Hist::bin(iy + 1); + end = hist.end(bin); + } ++p; ALPAKA_ASSERT_ACC(0 == nnn[k]); for (; p < end; ++p) { - auto m = *p + firstPixel; - ALPAKA_ASSERT_ACC(m != i); - ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); - ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); - if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { + bool otherIsFake = (*p >= TrackerTraits::maxPixInModule); + uint32_t m; + uint16_t mx, my; + if (applyDigiMorphing and otherIsFake) { + m = *p - TrackerTraits::maxPixInModule + firstFake; + auto pixel = fakes_view[m]; + mx = pixel.xx(); + my = pixel.yy(); + } else { + m = *p + firstPixel; + auto pixel = digi_view[m]; + mx = pixel.xx(); + my = pixel.yy(); + } + ALPAKA_ASSERT_ACC(m != i or otherIsFake != isFake); + ALPAKA_ASSERT_ACC(int(my) - int(iy) >= 0); + ALPAKA_ASSERT_ACC(int(my) - int(iy) <= 1); + if (std::abs(int(mx) - int(ix)) <= 1) { auto l = nnn[k]++; ALPAKA_ASSERT_ACC(l < maxNeighbours); nn[k][l] = *p; @@ -393,31 +672,76 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; - auto i = *p + firstPixel; + bool isFake = (*p >= TrackerTraits::maxPixInModule); + uint32_t i; + int32_t* iclus; + if (applyDigiMorphing and isFake) { + // recovered "fake" pixel + i = *p - TrackerTraits::maxPixInModule + firstFake; + iclus = &fakes_view[i].clus(); + } else { + // real pixel + i = *p + firstPixel; + iclus = &digi_view[i].clus(); + } for (int kk = 0; kk < nnn[k]; ++kk) { auto l = nn[k][kk]; - auto m = l + firstPixel; - ALPAKA_ASSERT_ACC(m != i); - // The algorithm processes one module per block, so the atomic operation's scope - // is "Threads" (all threads in the current block) - auto old = - alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Threads{}); - if (old != digi_view[i].clus()) { + bool otherIsFake = (l >= TrackerTraits::maxPixInModule); + uint32_t m; + int32_t* mclus; + if (applyDigiMorphing and otherIsFake) { + // recovered "fake" pixel + m = l - TrackerTraits::maxPixInModule + firstFake; + mclus = &fakes_view[m].clus(); + } else { + // real pixel + m = l + firstPixel; + mclus = &digi_view[m].clus(); + } + ALPAKA_ASSERT_ACC(m != i or otherIsFake != isFake); + // the algorithm processes one module per block, so the atomic operation's scope is "Threads" (all threads in the current block) + auto old = alpaka::atomicMin(acc, mclus, *iclus, alpaka::hierarchy::Threads{}); + if (old != *iclus) { // end the loop only if no changes were applied done = false; } - alpaka::atomicMin(acc, &digi_view[i].clus(), old, alpaka::hierarchy::Threads{}); + alpaka::atomicMin(acc, iclus, old, alpaka::hierarchy::Threads{}); } // neighbours loop ++k; } // pixel loop alpaka::syncBlockThreads(acc); for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { auto p = hist.begin() + j; - auto i = *p + firstPixel; - auto m = digi_view[i].clus(); - while (m != digi_view[m].clus()) - m = digi_view[m].clus(); - digi_view[i].clus() = m; + bool isFake = (*p >= TrackerTraits::maxPixInModule); + uint32_t i, m; + if (applyDigiMorphing and isFake) { + // recovered "fake" pixel + i = *p - TrackerTraits::maxPixInModule + firstFake; + m = fakes_view[i].clus(); + } else { + // real pixel + i = *p + firstPixel; + m = digi_view[i].clus(); + } + + while (true) { + uint32_t n; + if (m < numElements) { + n = digi_view[m].clus(); + } else { + n = fakes_view[m - numElements].clus(); + } + if (m == n) { + break; + } + m = n; + } + + if (isFake) { + fakes_view[i].clus() = m; + } else { + digi_view[i].clus() = m; + } } } // end while diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h new file mode 100644 index 0000000000000..0259c9d44f353 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h @@ -0,0 +1,12 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h + +#include + +struct SiPixelMorphingConfig { + bool applyDigiMorphing; + uint32_t maxFakesInModule; + uint32_t numMorphingModules; +}; + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc index 2b027bd595383..a9b88769f2242 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc @@ -4,6 +4,9 @@ #include #include #include +#include + +#include #include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" #include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" @@ -36,6 +39,12 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "DataFormats/DetId/interface/DetId.h" + +#include "SiPixelMorphingConfig.h" #include "SiPixelRawToClusterKernel.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -49,6 +58,90 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); using Algo = pixelDetails::SiPixelRawToClusterKernel; + typedef std::pair range; + typedef std::vector region; + + static std::vector parseRegions(const std::vector& regionStrings, size_t size) { + std::vector regions; + for (auto const& str : regionStrings) { + region reg; + std::vector ranges; + boost::split(ranges, str, boost::is_any_of(",")); + if (ranges.size() != size) { + throw cms::Exception("Configuration") << "[SiPixelDigiMorphing]:" + << " invalid number of coordinates provided in " << str << " (" << size + << " expected, " << ranges.size() << " provided)\n"; + } + for (auto const& r : ranges) { + std::vector limits; + boost::split(limits, r, boost::is_any_of("-")); + try { + if (limits.size() == 2) { + reg.push_back(std::make_pair(std::stoi(limits.at(0)), std::stoi(limits.at(1)))); + } else if (limits.size() == 1) { + reg.push_back(std::make_pair(std::stoi(limits.at(0)), std::stoi(limits.at(0)))); + } else { + throw cms::Exception("Configuration") + << "[SiPixelDigiMorphing]:" + << " invalid range format in '" << r << "' (expected 'A' or 'A-B')\n"; + } + } catch (cms::Exception&) { + throw; + } catch (...) { + throw cms::Exception("Configuration") << "[SiPixelDigiMorphing]:" + << " invalid coordinate value provided in " << str << "\n"; + } + } + regions.push_back(reg); + } + return regions; + } + + static bool skipDetId(const TrackerTopology* tTopo, + const DetId& detId, + const std::vector& theBarrelRegions, + const std::vector& theEndcapRegions) { + if (detId.subdetId() == static_cast(PixelSubdetector::PixelBarrel)) { + if (theBarrelRegions.empty()) { + return true; + } else { + uint32_t layer = tTopo->pxbLayer(detId.rawId()); + uint32_t ladder = tTopo->pxbLadder(detId.rawId()); + uint32_t module = tTopo->pxbModule(detId.rawId()); + bool inRegion = false; + for (auto const& reg : theBarrelRegions) { + if ((layer >= reg.at(0).first && layer <= reg.at(0).second) && + (ladder >= reg.at(1).first && ladder <= reg.at(1).second) && + (module >= reg.at(2).first && module <= reg.at(2).second)) { + inRegion = true; + break; + } + } + return !inRegion; + } + } else { + if (theEndcapRegions.empty()) { + return true; + } else { + uint32_t disk = tTopo->pxfDisk(detId.rawId()); + uint32_t blade = tTopo->pxfBlade(detId.rawId()); + uint32_t side = tTopo->pxfSide(detId.rawId()); + uint32_t panel = tTopo->pxfPanel(detId.rawId()); + bool inRegion = false; + for (auto const& reg : theEndcapRegions) { + if ((disk >= reg.at(0).first && disk <= reg.at(0).second) && + (blade >= reg.at(1).first && blade <= reg.at(1).second) && + (side >= reg.at(2).first && side <= reg.at(2).second) && + (panel >= reg.at(3).first && panel <= reg.at(3).second)) { + inRegion = true; + break; + } + } + return !inRegion; + } + } + } + private: void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; @@ -60,9 +153,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { device::EDPutToken clusterPutToken_; edm::ESWatcher recordWatcher_; + edm::ESWatcher trackerTopologyWatcher_; const device::ESGetToken mapToken_; const device::ESGetToken gainsToken_; const edm::ESGetToken cablingMapToken_; + const edm::ESGetToken cablingMapTokenBeginRun_; + const edm::ESGetToken trackerTopologyToken_; + SiPixelMorphingConfig digiMorphingConfig_; + std::optional> morphingModulesDevice_; std::unique_ptr cabling_; std::vector fedIds_; @@ -76,6 +174,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const bool useQuality_; uint32_t nDigis_; const SiPixelClusterThresholds clusterThresholds_; + const std::vector theBarrelRegions_; + const std::vector theEndcapRegions_; }; template @@ -88,6 +188,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { gainsToken_(esConsumes()), cablingMapToken_(esConsumes( edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + trackerTopologyToken_(esConsumes()), includeErrors_(iConfig.getParameter("IncludeErrors")), useQuality_(iConfig.getParameter("UseQualityInfo")), clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), @@ -95,11 +196,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { static_cast(iConfig.getParameter("VCaltoElectronGain")), static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), static_cast(iConfig.getParameter("VCaltoElectronOffset")), - static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { + static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))}, + theBarrelRegions_( + SiPixelRawToCluster::parseRegions(iConfig.getParameter>("barrelRegions"), 3)), + theEndcapRegions_( + SiPixelRawToCluster::parseRegions(iConfig.getParameter>("endcapRegions"), 4)) { if (includeErrors_) { digiErrorPutToken_ = produces(); fmtErrorToken_ = produces(); } + digiMorphingConfig_.applyDigiMorphing = iConfig.getParameter("DoDigiMorphing"); + digiMorphingConfig_.maxFakesInModule = iConfig.getParameter("MaxFakesInModule"); + + if (digiMorphingConfig_.maxFakesInModule > TrackerTraits::maxPixInModuleForMorphing) { + throw cms::Exception("Configuration") + << "[SiPixelDigiMorphing]:" + << " maxFakesInModule should be <= " << TrackerTraits::maxPixInModuleForMorphing + << " (TrackerTraits::maxPixInModuleForMorphing)" + << " while " << digiMorphingConfig_.maxFakesInModule << " was provided at config level.\n"; + } // regions if (!iConfig.getParameter("Regions").getParameterNames().empty()) { @@ -122,6 +237,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { desc.add("VCaltoElectronGain_L1", 50.f); desc.add("VCaltoElectronOffset", -60.f); desc.add("VCaltoElectronOffset_L1", -670.f); + desc.add("DoDigiMorphing", false); + desc.add("MaxFakesInModule", TrackerTraits::maxPixInModuleForMorphing); desc.add("InputLabel", edm::InputTag("rawDataCollector")); { @@ -133,6 +250,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { desc.add("Regions", psd0) ->setComment("## Empty Regions PSet means complete unpacking"); } + // LAYER,LADDER,MODULE (coordinates can also be specified as a range FIRST-LAST where appropriate) + desc.add>("barrelRegions", {"1,1-12,1-2", "1,1-12,7-8", "2,1-28,1", "2,1-28,8"}); + // DISK,BLADE,SIDE,PANEL (coordinates can also be specified as a range FIRST-LAST where appropriate) + desc.add>("endcapRegions", {}); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav descriptions.addWithDefaultLabel(desc); } @@ -143,12 +265,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const& dGains = iSetup.getData(gainsToken_); // initialize cabling map or update if necessary - if (recordWatcher_.check(iSetup)) { + if (recordWatcher_.check(iSetup) || trackerTopologyWatcher_.check(iSetup)) { // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) cablingMap_ = &iSetup.getData(cablingMapToken_); fedIds_ = cablingMap_->fedIds(); cabling_ = cablingMap_->cablingTree(); LogDebug("map version:") << cablingMap_->version(); + const TrackerTopology* tTopo_ = &iSetup.getData(trackerTopologyToken_); + // collect morphing module ids on host, then copy once to device + std::vector morphingModulesHost; + for (const auto& connection : cablingMap_->det2fedMap()) { + auto rawId = connection.first; + if (rawId == 0) + continue; + DetId detId(rawId); + if (!SiPixelRawToCluster::skipDetId(tTopo_, detId, theBarrelRegions_, theEndcapRegions_)) { + morphingModulesHost.push_back(rawId); + } + } + + // Sort once on CPU for efficient binary search on device later + std::sort(morphingModulesHost.begin(), morphingModulesHost.end()); + + // update count in config and copy module ids to device buffer once + digiMorphingConfig_.numMorphingModules = morphingModulesHost.size(); + if (!morphingModulesHost.empty()) { + morphingModulesDevice_ = + cms::alpakatools::make_device_buffer(iEvent.queue(), morphingModulesHost.size()); + auto morphingModules_h = + cms::alpakatools::make_host_view(morphingModulesHost.data(), morphingModulesHost.size()); + alpaka::memcpy(iEvent.queue(), *morphingModulesDevice_, morphingModules_h); + } else { + morphingModulesDevice_ = cms::alpakatools::make_device_buffer(iEvent.queue(), 0); + } } // if used, the buffer is guaranteed to stay alive until the after the execution of makePhase1ClustersAsync completes @@ -252,6 +401,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { fedCounter, useQuality_, includeErrors_, + digiMorphingConfig_, + morphingModulesDevice_->data(), edm::MessageDrop::instance()->debugEnabled); } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index e03881559fca8..492a5b79170a1 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -35,6 +35,7 @@ #include "ClusterChargeCut.h" #include "PixelClustering.h" #include "SiPixelRawToClusterKernel.h" +#include "SiPixelMorphingConfig.h" //#define GPU_DEBUG @@ -509,6 +510,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, + SiPixelMorphingConfig digiMorphingConfig, + uint32_t *morphingModulesDevice, bool debug) { nDigis = wordCounter; @@ -623,15 +626,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { { const int blocks = 64; - const auto elementsPerBlockFindClus = FindClus::maxElementsPerBlock; - const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(blocks, elementsPerBlockFindClus); + const auto elementsPerBlockFindClus = digiMorphingConfig.applyDigiMorphing + ? FindClus::maxElementsPerBlockMorph + : FindClus::maxElementsPerBlock; + const auto workDivFindClus = cms::alpakatools::make_workdiv(blocks, elementsPerBlockFindClus); + + // allocate a transient collection for the fake pixels recovered by the digi morphing algorithm + auto fakes_d = SiPixelDigisSoACollection(blocks * digiMorphingConfig.maxFakesInModule, queue); #ifdef GPU_DEBUG - std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus + alpaka::wait(queue); + std::cout << "FindClus kernel launch with " << blocks << " blocks of " << elementsPerBlockFindClus << " threadsPerBlockOrElementsPerThread\n"; #endif - alpaka::exec( - queue, workDivMaxNumModules, FindClus{}, digis_d->view(), clusters_d->view(), wordCounter); + + // Use device buffer created by producer and the module count stored in digiMorphingConfig + alpaka::exec(queue, + workDivFindClus, + FindClus{}, + digis_d->view(), + fakes_d.view(), + digiMorphingConfig.applyDigiMorphing, + morphingModulesDevice, + digiMorphingConfig.numMorphingModules, + digiMorphingConfig.maxFakesInModule, + clusters_d->view(), + wordCounter); #ifdef GPU_DEBUG alpaka::wait(queue); #endif @@ -681,7 +701,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // end clusterizer scope } - template void SiPixelRawToClusterKernel::makePhase2ClustersAsync( Queue &queue, @@ -720,8 +739,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus << " threadsPerBlockOrElementsPerThread\n"; #endif - alpaka::exec( - queue, workDivMaxNumModules, FindClus{}, digis_view, clusters_d->view(), numDigis); + auto unused = SiPixelDigisSoACollection(0, queue); + + alpaka::exec(queue, + workDivMaxNumModules, + FindClus{}, + digis_view, + unused.view(), + false, + static_cast(nullptr), + static_cast(0), + static_cast(0), + clusters_d->view(), + numDigis); #ifdef GPU_DEBUG alpaka::wait(queue); #endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h index aed00c9a48ed2..05cf94a18fac7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -23,6 +23,7 @@ #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" #include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" #include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" +#include "SiPixelMorphingConfig.h" namespace pixelDetails { @@ -160,6 +161,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, + SiPixelMorphingConfig digiMorphingConfig, + uint32_t* morphingModulesDevice, bool debug); void makePhase2ClustersAsync(Queue& queue,