diff --git a/CUDADataFormats/TrackingRecHit/interface/SiPixelStatus.h b/CUDADataFormats/TrackingRecHit/interface/SiPixelStatus.h new file mode 100644 index 0000000000000..75e4468a811b6 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/SiPixelStatus.h @@ -0,0 +1,12 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_SiPixelStatus_H +#define CUDADataFormats_TrackingRecHit_interface_SiPixelStatus_H + +struct SiPixelStatus { + uint8_t isBigX : 1; + uint8_t isOneX : 1; + uint8_t isBigY : 1; + uint8_t isOneY : 1; + uint8_t qBin : 3; +}; + +#endif diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 73a6daaa4e387..3a1d90b251ee2 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -14,10 +14,12 @@ class TrackingRecHit2DHeterogeneous { TrackingRecHit2DHeterogeneous() = default; - explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, - pixelCPEforGPU::ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - cudaStream_t stream); + explicit TrackingRecHit2DHeterogeneous( + uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cudaStream_t stream, + TrackingRecHit2DHeterogeneous const* input = nullptr); ~TrackingRecHit2DHeterogeneous() = default; @@ -41,6 +43,9 @@ class TrackingRecHit2DHeterogeneous { cms::cuda::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; + // needs specialization for Host + void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream); + private: static constexpr uint32_t n16 = 4; static constexpr uint32_t n32 = 9; @@ -64,26 +69,33 @@ class TrackingRecHit2DHeterogeneous { int16_t* m_iphi; }; +using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; + #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" template -TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, - pixelCPEforGPU::ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - cudaStream_t stream) +TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous( + uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cudaStream_t stream, + TrackingRecHit2DHeterogeneous const* input) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { auto view = Traits::template make_host_unique(stream); view->m_nHits = nHits; - m_view = Traits::template make_device_unique(stream); - m_AverageGeometryStore = Traits::template make_device_unique(stream); + m_view = Traits::template make_unique(stream); // leave it on host and pass it by value? + m_AverageGeometryStore = Traits::template make_unique(stream); view->m_averageGeometry = m_AverageGeometryStore.get(); view->m_cpeParams = cpeParams; view->m_hitsModuleStart = hitsModuleStart; // if empy do not bother - if (0 == nHits) { + if (0 == nHits) if constexpr (std::is_same::value) { cms::cuda::copyAsync(m_view, view, stream); } else { @@ -97,38 +109,57 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // if ordering is relevant they may have to be stored phi-ordered by layer or so // this will break 1to1 correspondence with cluster and module locality // so unless proven VERY inefficient we keep it ordered as generated - m_store16 = Traits::template make_device_unique(nHits * n16, stream); - m_store32 = Traits::template make_device_unique(nHits * n32 + 11, stream); - m_HistStore = Traits::template make_device_unique(stream); - auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; + // host copy is "reduced" (to be reviewed at some point) + if +#ifdef __cpp_if_constexpr + constexpr +#endif + (std::is_same::value) { + // it has to compile for ALL cases + copyFromGPU(input, stream); + } else { + assert(input == nullptr); + m_store16 = Traits::template make_unique(nHits * n16, stream); + m_store32 = Traits::template make_unique(nHits * n32 + 11, stream); + m_HistStore = Traits::template make_unique(stream); + } + auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; // copy all the pointers - m_hist = view->m_hist = m_HistStore.get(); view->m_xl = get32(0); view->m_yl = get32(1); view->m_xerr = get32(2); view->m_yerr = get32(3); + view->m_chargeAndStatus = reinterpret_cast(get32(4)); - view->m_xg = get32(4); - view->m_yg = get32(5); - view->m_zg = get32(6); - view->m_rg = get32(7); - - m_iphi = view->m_iphi = reinterpret_cast(get16(0)); - - view->m_charge = reinterpret_cast(get32(8)); - view->m_xsize = reinterpret_cast(get16(2)); - view->m_ysize = reinterpret_cast(get16(3)); - view->m_detInd = get16(1); - - m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + if +#ifdef __cpp_if_constexpr + constexpr +#endif + (!std::is_same::value) { + assert(input == nullptr); + view->m_xg = get32(5); + view->m_yg = get32(6); + view->m_zg = get32(7); + view->m_rg = get32(8); + + auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; + m_iphi = view->m_iphi = reinterpret_cast(get16(1)); + + view->m_xsize = reinterpret_cast(get16(2)); + view->m_ysize = reinterpret_cast(get16(3)); + view->m_detInd = get16(0); + + m_hist = view->m_hist = m_HistStore.get(); + m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + } // transfer view if -#ifndef __CUDACC__ +#ifdef __cpp_if_constexpr constexpr #endif (std::is_same::value) { @@ -138,9 +169,4 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH } } -using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; - #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DReduced.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DReduced.h new file mode 100644 index 0000000000000..263a0dee56dd5 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DReduced.h @@ -0,0 +1,52 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DReduced_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DReduced_h + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" + +// a reduced (in content and therefore in size) version to be used on CPU for Legacy reconstruction +class TrackingRecHit2DReduced { +public: + using HLPstorage = HostProduct; + using HIDstorage = HostProduct; + + template + TrackingRecHit2DReduced(UP32&& istore32, UP16&& istore16, int nhits) + : m_store32(std::move(istore32)), m_store16(std::move(istore16)), m_nHits(nhits) { + auto get32 = [&](int i) { return const_cast(m_store32.get()) + i * nhits; }; + + // copy all the pointers (better be in sync with the producer store) + + m_view.m_xl = get32(0); + m_view.m_yl = get32(1); + m_view.m_xerr = get32(2); + m_view.m_yerr = get32(3); + m_view.m_chargeAndStatus = reinterpret_cast(get32(4)); + m_view.m_detInd = const_cast(m_store16.get()); + } + + // view only! + TrackingRecHit2DReduced(TrackingRecHit2DSOAView const& iview, int nhits) : m_view(iview), m_nHits(nhits) {} + + TrackingRecHit2DReduced() = default; + ~TrackingRecHit2DReduced() = default; + + TrackingRecHit2DReduced(const TrackingRecHit2DReduced&) = delete; + TrackingRecHit2DReduced& operator=(const TrackingRecHit2DReduced&) = delete; + TrackingRecHit2DReduced(TrackingRecHit2DReduced&&) = default; + TrackingRecHit2DReduced& operator=(TrackingRecHit2DReduced&&) = default; + + TrackingRecHit2DSOAView& view() { return m_view; } + TrackingRecHit2DSOAView const& view() const { return m_view; } + + auto nHits() const { return m_nHits; } + + TrackingRecHit2DSOAView m_view; + + HLPstorage m_store32; + HIDstorage m_store16; + + int m_nHits; +}; + +#endif diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 808feb2a4218f..52bc64dc66ad9 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -7,6 +7,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "CUDADataFormats/TrackingRecHit/interface/SiPixelStatus.h" namespace pixelCPEforGPU { struct ParamsOnGPU; @@ -14,6 +15,9 @@ namespace pixelCPEforGPU { class TrackingRecHit2DSOAView { public: + using Status = SiPixelStatus; + static_assert(sizeof(Status) == sizeof(uint8_t)); + static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } using hindex_type = uint16_t; // if above is <=2^16 @@ -24,6 +28,7 @@ class TrackingRecHit2DSOAView { template friend class TrackingRecHit2DHeterogeneous; + friend class TrackingRecHit2DReduced; __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } @@ -49,8 +54,19 @@ class TrackingRecHit2DSOAView { __device__ __forceinline__ int16_t& iphi(int i) { return m_iphi[i]; } __device__ __forceinline__ int16_t iphi(int i) const { return __ldg(m_iphi + i); } - __device__ __forceinline__ int32_t& charge(int i) { return m_charge[i]; } - __device__ __forceinline__ int32_t charge(int i) const { return __ldg(m_charge + i); } + __device__ __forceinline__ void setChargeAndStatus(int i, uint32_t ich, Status is) { + // static_assert(0xffffff == chargeMask()); + ich = std::min(ich, chargeMask()); + uint32_t w = *reinterpret_cast(&is); + ich |= (w << 24); + m_chargeAndStatus[i] = ich; + } + + __device__ __forceinline__ Status status(int i) const { + uint8_t w = __ldg(m_chargeAndStatus + i) >> 24; + return *reinterpret_cast(&w); + } + __device__ __forceinline__ uint32_t charge(int i) const { return __ldg(m_chargeAndStatus + i) & chargeMask(); } __device__ __forceinline__ int16_t& clusterSizeX(int i) { return m_xsize[i]; } __device__ __forceinline__ int16_t clusterSizeX(int i) const { return __ldg(m_xsize + i); } __device__ __forceinline__ int16_t& clusterSizeY(int i) { return m_ysize[i]; } @@ -81,7 +97,8 @@ class TrackingRecHit2DSOAView { int16_t* m_iphi; // cluster properties - int32_t* m_charge; + static constexpr uint32_t chargeMask() { return (1 << 24) - 1; } + uint32_t* m_chargeAndStatus; int16_t* m_xsize; int16_t* m_ysize; uint16_t* m_detInd; diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc index 7b04ed2d530a0..6d994c9cf74c5 100644 --- a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc @@ -1,19 +1,33 @@ -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); - cms::cuda::copyAsync(ret, m_store32, 4 * nHits(), stream); +cms::cuda::host::unique_ptr TrackingRecHit2DGPU::localCoordToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(5 * nHits(), stream); + cms::cuda::copyAsync(ret, m_store32, 5 * nHits(), stream); return ret; } template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { +cms::cuda::host::unique_ptr TrackingRecHit2DGPU::detIndexToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nHits(), stream); + cms::cuda::copyAsync(ret, m_store16, nHits(), stream); + return ret; +} + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DGPU::hitsModuleStartToHostAsync(cudaStream_t stream) const { auto ret = cms::cuda::make_host_unique(2001, stream); cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, 4 * 2001, cudaMemcpyDefault, stream)); return ret; } + +// the only specialization needed +template <> +void TrackingRecHit2DHost::copyFromGPU(TrackingRecHit2DGPU const* input, cudaStream_t stream) { + assert(input); + m_store32 = input->localCoordToHostAsync(stream); +} diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index 3d40821493c5b..7de2d7621ef32 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -3,6 +3,7 @@ #include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DReduced.h" #include "DataFormats/Common/interface/Wrapper.h" #endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 32af6c181ae68..faf3cf887cb4e 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -15,14 +15,18 @@ int main() { cudaStream_t stream; cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); + uint32_t nHits = 200; // inner scope to deallocate memory before destroying the stream { - auto nHits = 200; TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); testTrackingRecHit2D::runKernels(tkhit.view()); - } + TrackingRecHit2DHost tkhitH(nHits, nullptr, nullptr, stream, &tkhit); + cudaStreamSynchronize(stream); + assert(tkhitH.view()); + assert(tkhitH.view()->nHits() == nHits); + } cudaCheck(cudaStreamDestroy(stream)); return 0; diff --git a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu b/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu index a1fc79b41eca6..f4bba39e6b971 100644 --- a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu +++ b/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu @@ -2,11 +2,6 @@ #include "DataFormats/CaloRecHit/interface/MultifitComputations.h" -// needed to compile with USER_CXXFLAGS="-DCOMPUTE_TDC_TIME" -#include "DataFormats/HcalRecHit/interface/HcalSpecialTimes.h" -// TODO reuse some of the HCAL constats from -//#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" -// ? #include "SimpleAlgoGPU.h" #include "KernelHelpers.h" diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 681211b82e1af..f306d421bc0e6 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -11,8 +11,11 @@ #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include "CUDADataFormats/TrackingRecHit/interface/SiPixelStatus.h" + namespace pixelCPEforGPU { + using Status = SiPixelStatus; using Frame = SOAFrame; using Rotation = SOARotation; @@ -39,7 +42,11 @@ namespace pixelCPEforGPU { float x0, y0, z0; // the vertex in the local coord of the detector - float sx[3], sy[3]; // the errors... + float apeX, apeY; // ape^2 + uint8_t sx2, sy1, sy2; + uint8_t sigmax[16], sigmax1[16], sigmay[16]; // in micron + float xfact[5], yfact[5]; + int minCh[5]; Frame frame; }; @@ -94,8 +101,10 @@ namespace pixelCPEforGPU { float xerr[N]; float yerr[N]; - int16_t xsize[N]; // clipped at 127 if negative is edge.... + int16_t xsize[N]; // (*8) clipped at 127 if negative is edge.... int16_t ysize[N]; + + Status status[N]; }; constexpr int32_t MaxHitsInIter = gpuClustering::maxHitsInIter(); @@ -205,8 +214,8 @@ namespace pixelCPEforGPU { if (phase1PixelTopology::isBigPixY(cp.maxCol[ic])) ++ysize; - int unbalanceX = 8. * std::abs(float(cp.Q_f_X[ic] - cp.Q_l_X[ic])) / float(cp.Q_f_X[ic] + cp.Q_l_X[ic]); - int unbalanceY = 8. * std::abs(float(cp.Q_f_Y[ic] - cp.Q_l_Y[ic])) / float(cp.Q_f_Y[ic] + cp.Q_l_Y[ic]); + int unbalanceX = 8.f * std::abs(float(cp.Q_f_X[ic] - cp.Q_l_X[ic])) / float(cp.Q_f_X[ic] + cp.Q_l_X[ic]); + int unbalanceY = 8.f * std::abs(float(cp.Q_f_Y[ic] - cp.Q_l_Y[ic])) / float(cp.Q_f_Y[ic] + cp.Q_l_Y[ic]); xsize = 8 * xsize - unbalanceX; ysize = 8 * ysize - unbalanceY; @@ -284,8 +293,8 @@ namespace pixelCPEforGPU { auto sy = cp.maxCol[ic] - cp.minCol[ic]; // is edgy ? - bool isEdgeX = cp.minRow[ic] == 0 or cp.maxRow[ic] == phase1PixelTopology::lastRowInModule; - bool isEdgeY = cp.minCol[ic] == 0 or cp.maxCol[ic] == phase1PixelTopology::lastColInModule; + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; // is one and big? bool isBig1X = (0 == sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); bool isBig1Y = (0 == sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); @@ -322,19 +331,42 @@ namespace pixelCPEforGPU { auto sx = cp.maxRow[ic] - cp.minRow[ic]; auto sy = cp.maxCol[ic] - cp.minCol[ic]; - // is edgy ? - bool isEdgeX = cp.minRow[ic] == 0 or cp.maxRow[ic] == phase1PixelTopology::lastRowInModule; - bool isEdgeY = cp.minCol[ic] == 0 or cp.maxCol[ic] == phase1PixelTopology::lastColInModule; + // is edgy ? (size is set negative: see above) + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + // is one and big? - uint32_t ix = (0 == sx); - uint32_t iy = (0 == sy); - ix += (0 == sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); - iy += (0 == sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); + bool isOneX = (0 == sx); + bool isOneY = (0 == sy); + bool isBigX = phase1PixelTopology::isBigPixX(cp.minRow[ic]); + bool isBigY = phase1PixelTopology::isBigPixY(cp.minCol[ic]); + + auto ch = cp.charge[ic]; + auto bin = 0; + for (; bin < 4; ++bin) + if (ch < detParams.minCh[bin + 1]) + break; + assert(bin < 5); + + cp.status[ic].qBin = 4 - bin; + cp.status[ic].isOneX = isOneX; + cp.status[ic].isBigX = (isOneX & isBigX) | isEdgeX; + cp.status[ic].isOneY = isOneY; + cp.status[ic].isBigY = (isOneY & isBigY) | isEdgeY; + + auto xoff = 81.f * comParams.thePitchX; + int jx = std::min(15, std::max(0, int(16.f * (cp.xpos[ic] + xoff) / (162.f * comParams.thePitchX)))); + + auto toCM = [](uint8_t x) { return float(x) * 1.e-4; }; if (not isEdgeX) - cp.xerr[ic] = detParams.sx[ix]; + cp.xerr[ic] = isOneX ? toCM(isBigX ? detParams.sx2 : detParams.sigmax1[jx]) + : detParams.xfact[bin] * toCM(detParams.sigmax[jx]); + + auto ey = cp.ysize[ic] > 8 ? detParams.sigmay[std::min(cp.ysize[ic] - 9, 15)] : detParams.sy1; + if (not isEdgeY) - cp.yerr[ic] = detParams.sy[iy]; + cp.yerr[ic] = isOneY ? toCM(isBigY ? detParams.sy2 : detParams.sy1) : detParams.yfact[bin] * toCM(ey); } } // namespace pixelCPEforGPU diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 17cd5aad4db52..689a324982104 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -149,8 +149,9 @@ namespace gpuPixelRecHits { assert(cl < MaxHitsInIter); auto x = digis.xx(i); auto y = digis.yy(i); - auto ch = std::min(digis.adc(i), pixmx); + auto ch = digis.adc(i); atomicAdd(&clusParams.charge[cl], ch); + ch = std::min(ch, pixmx); if (clusParams.minRow[cl] == x) atomicAdd(&clusParams.Q_f_X[cl], ch); if (clusParams.maxRow[cl] == x) @@ -181,7 +182,9 @@ namespace gpuPixelRecHits { // store it - hits.charge(h) = clusParams.charge[ic]; + hits.setChargeAndStatus(h, clusParams.charge[ic], clusParams.status[ic]); + + // printf("charge,status %d %d %d %d %d\n",hits.charge(h),hits.status(h).qBin,hits.chargeAndStatus(h),clusParams.charge[ic],clusParams.status[ic].qBin); hits.detectorIndex(h) = me; @@ -192,8 +195,9 @@ namespace gpuPixelRecHits { hits.clusterSizeX(h) = clusParams.xsize[ic]; hits.clusterSizeY(h) = clusParams.ysize[ic]; - hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; - hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; + hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeX; + hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeY; + ; // keep it local for computations float xg, yg, zg; diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index f3b3f308fa9d3..6ba4c743a8b40 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -112,7 +112,8 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; - // std::cout << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY << " " << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE << std::endl; + // std::cout << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY << " " + // << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE << std::endl; // zero average geometry memset(&m_averageGeometry, 0, sizeof(pixelCPEforGPU::AverageGeometry)); @@ -184,77 +185,136 @@ void PixelCPEFast::fillParamsForGpu() { pl += vv.phi(); // (not obvious) // errors ..... - ClusterParamGeneric cp; - auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchX; - auto gvy = p.theOrigin.y(); - auto gvz = 1.f / p.theOrigin.z(); - //--- Note that the normalization is not required as only the ratio used - // calculate angles - cp.cotalpha = gvx * gvz; - cp.cotbeta = gvy * gvz; + ClusterParamGeneric cp; cp.with_track_angle = false; auto lape = p.theDet->localAlignmentError(); if (lape.invalid()) lape = LocalError(); // zero.... - + g.apeX = lape.xx(); + g.apeY = lape.yy(); + + auto toMicron = [&](float x) { return std::min(511, int(x * 1.e4f + 0.5f)); }; + + { + // average angle + auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + errorFromTemplates(p, cp, 20000.); + } #ifdef DUMP_ERRORS auto m = 10000.f; - for (float qclus = 15000; qclus < 35000; qclus += 15000) { - errorFromTemplates(p, cp, qclus); + std::cout << i << ' ' << (g.isBarrel ? 'B' : 'E') << " ape " << m * std::sqrt(lape.xx()) << ' ' + << m * std::sqrt(lape.yy()) << std::endl; + std::cout << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << 20000 << ' ' << cp.qBin_ << ' ' << cp.pixmx << ' ' + << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 + << ' ' << m * cp.sy2 << std::endl; +#endif + + g.pixmx = std::max(0, cp.pixmx); + g.sx2 = toMicron(cp.sx2); + g.sy1 = std::max(21, toMicron(cp.sy1)); // for some angles sy1 is very small + g.sy2 = std::max(55, toMicron(cp.sy2)); // sometimes sy2 is smaller than others (due to angle?) + + // sample xerr as function of position + auto xoff = -81.f * m_commonParamsGPU.thePitchX; + + for (int ix = 0; ix < 16; ++ix) { + auto x = xoff + (0.5f + float(ix)) * 162.f * m_commonParamsGPU.thePitchX / 16.f; + auto gvx = p.theOrigin.x() - x; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + g.sigmax[ix] = toMicron(cp.sigmax); + g.sigmax1[ix] = toMicron(cp.sx1); +#ifdef DUMP_ERRORS + std::cout << "sigmax vs x " << i << ' ' << x << ' ' << cp.cotalpha << ' ' << int(g.sigmax[ix]) << ' ' + << int(g.sigmax1[ix]) << ' ' << 10000.f * cp.sigmay << std::endl; +#endif + } - std::cout << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' - << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; +#ifdef DUMP_ERRORS + // sample yerr as function of position + auto yoff = -54 * 4.f * m_commonParamsGPU.thePitchY; + for (int ix = 0; ix < 16; ++ix) { + auto y = yoff + (0.5f + float(ix)) * 432.f * m_commonParamsGPU.thePitchY / 16.f; + auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchY; + auto gvy = p.theOrigin.y() - y; + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + std::cout << "sigmay vs y " << i << ' ' << y << ' ' << cp.cotbeta << ' ' << 10000.f * cp.sigmay << std::endl; } - std::cout << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()) << std::endl; #endif - errorFromTemplates(p, cp, 20000.f); - g.pixmx = std::max(0, cp.pixmx); - g.sx[0] = cp.sigmax; - g.sx[1] = cp.sx1; - g.sx[2] = cp.sx2; - - g.sy[0] = cp.sigmay; - g.sy[1] = cp.sy1; - g.sy[2] = cp.sy2; - - /* - // from run1?? - if (i<96) { - g.sx[0] = 0.00120; - g.sx[1] = 0.00115; - g.sx[2] = 0.0050; - - g.sy[0] = 0.00210; - g.sy[1] = 0.00375; - g.sy[2] = 0.0085; - } else if (g.isBarrel) { - g.sx[0] = 0.00120; - g.sx[1] = 0.00115; - g.sx[2] = 0.0050; - - g.sy[0] = 0.00210; - g.sy[1] = 0.00375; - g.sy[2] = 0.0085; - } else { - g.sx[0] = 0.0020; - g.sx[1] = 0.0020; - g.sx[2] = 0.0050; - - g.sy[0] = 0.0021; - g.sy[1] = 0.0021; - g.sy[2] = 0.0085; - } - */ - - for (int i = 0; i < 3; ++i) { - g.sx[i] = std::sqrt(g.sx[i] * g.sx[i] + lape.xx()); - g.sy[i] = std::sqrt(g.sy[i] * g.sy[i] + lape.yy()); + // average angle + auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + // calculate angles + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + auto aveCB = cp.cotbeta; + + // sample x by charge + int qbin = 5; // low charge + int k = 0; + for (int qclus = 1000; qclus < 200000; qclus += 1000) { + errorFromTemplates(p, cp, qclus); + if (cp.qBin_ == qbin) + continue; + qbin = cp.qBin_; + g.xfact[k] = cp.sigmax; + g.yfact[k] = cp.sigmay; + g.minCh[k++] = qclus; +#ifdef DUMP_ERRORS + std::cout << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << qclus << ' ' << cp.qBin_ << ' ' << cp.pixmx + << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' + << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; +#endif + } + assert(k <= 5); + // fill the rest (sometimes bin 4 is missing) + for (int kk = k; kk < 5; ++kk) { + g.xfact[kk] = g.xfact[k - 1]; + g.yfact[kk] = g.yfact[k - 1]; + g.minCh[kk] = g.minCh[k - 1]; + } + auto detx = 1.f / g.xfact[0]; + auto dety = 1.f / g.yfact[0]; + for (int kk = 0; kk < 5; ++kk) { + g.xfact[kk] *= detx; + g.yfact[kk] *= dety; } - } + + // sample y in angle + float ys = 8.f - 4.f; // apperent bias of half pixel (see plot) + // sample yerr as function of "size" + for (int iy = 0; iy < 16; ++iy) { + ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3) + if (15 == iy) + ys += 8.f; // last bin for "overflow" + // cp.cotalpha = ys*100.f/(8.f*285.f); + cp.cotbeta = std::copysign(ys * 150.f / (8.f * 285.f), aveCB); + errorFromTemplates(p, cp, 20000.f); + g.sigmay[iy] = toMicron(cp.sigmay); +#ifdef DUMP_ERRORS + std::cout << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha << '/' << cp.cotbeta << ' ' + << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) << std::endl; +#endif + } + + } // loop over det // compute ladder baricenter (only in global z) for the barrel auto& aveGeom = m_averageGeometry; @@ -406,13 +466,20 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam cp.Q_f_Y[0] = Q_f_Y; cp.Q_l_Y[0] = Q_l_Y; + cp.charge[0] = theClusterParam.theCluster->charge(); + auto ind = theDetParam.theDet->index(); pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind], cp, 0); auto xPos = cp.xpos[0]; auto yPos = cp.ypos[0]; - // std::cout<<" in PixelCPEFast:localPosition - pos = "<(theClusterParamBase); - // Default errors are the maximum error used for edge clusters. - // These are determined by looking at residuals for edge clusters - float xerr = EdgeClusterErrorX_ * micronsToCm; - float yerr = EdgeClusterErrorY_ * micronsToCm; - - // Find if cluster is at the module edge. - int maxPixelCol = theClusterParam.theCluster->maxPixelCol(); - int maxPixelRow = theClusterParam.theCluster->maxPixelRow(); - int minPixelCol = theClusterParam.theCluster->minPixelCol(); - int minPixelRow = theClusterParam.theCluster->minPixelRow(); - - bool edgex = phase1PixelTopology::isEdgeX(minPixelRow) | phase1PixelTopology::isEdgeX(maxPixelRow); - bool edgey = phase1PixelTopology::isEdgeY(minPixelCol) | phase1PixelTopology::isEdgeY(maxPixelCol); - - unsigned int sizex = theClusterParam.theCluster->sizeX(); - unsigned int sizey = theClusterParam.theCluster->sizeY(); - - // Find if cluster contains double (big) pixels. - bool bigInX = theDetParam.theRecTopol->containsBigPixelInX(minPixelRow, maxPixelRow); - bool bigInY = theDetParam.theRecTopol->containsBigPixelInY(minPixelCol, maxPixelCol); - - if (UseErrorsFromTemplates_) { - // - // Use template errors - - if (!edgex) { // Only use this for non-edge clusters - if (sizex == 1) { - if (!bigInX) { - xerr = theClusterParam.sx1; - } else { - xerr = theClusterParam.sx2; - } - } else { - xerr = theClusterParam.sigmax; - } - } - - if (!edgey) { // Only use for non-edge clusters - if (sizey == 1) { - if (!bigInY) { - yerr = theClusterParam.sy1; - } else { - yerr = theClusterParam.sy2; - } - } else { - yerr = theClusterParam.sigmay; - } - } - - } else { // simple errors - - // This are the simple errors, hardcoded in the code - //cout << "Track angles are not known " << endl; - //cout << "Default angle estimation which assumes track from PV (0,0,0) does not work." << endl; - - if (GeomDetEnumerators::isTrackerPixel(theDetParam.thePart)) { - if (GeomDetEnumerators::isBarrel(theDetParam.thePart)) { - DetId id = (theDetParam.theDet->geographicalId()); - int layer = ttopo_.layer(id); - if (layer == 1) { - if (!edgex) { - if (sizex <= xerr_barrel_l1_.size()) - xerr = xerr_barrel_l1_[sizex - 1]; - else - xerr = xerr_barrel_l1_def_; - } - - if (!edgey) { - if (sizey <= yerr_barrel_l1_.size()) - yerr = yerr_barrel_l1_[sizey - 1]; - else - yerr = yerr_barrel_l1_def_; - } - } else { // layer 2,3 - if (!edgex) { - if (sizex <= xerr_barrel_ln_.size()) - xerr = xerr_barrel_ln_[sizex - 1]; - else - xerr = xerr_barrel_ln_def_; - } - - if (!edgey) { - if (sizey <= yerr_barrel_ln_.size()) - yerr = yerr_barrel_ln_[sizey - 1]; - else - yerr = yerr_barrel_ln_def_; - } - } - - } else { // EndCap - - if (!edgex) { - if (sizex <= xerr_endcap_.size()) - xerr = xerr_endcap_[sizex - 1]; - else - xerr = xerr_endcap_def_; - } - - if (!edgey) { - if (sizey <= yerr_endcap_.size()) - yerr = yerr_endcap_[sizey - 1]; - else - yerr = yerr_endcap_def_; - } - } // end endcap - } - - } // end + auto xerr = theClusterParam.sigmax; + auto yerr = theClusterParam.sigmay; - // std::cout<<" errors "<begin(tkid); +// #define YERR_FROM_DC +#ifdef YERR_FROM_DC + // try to compute more precise error in y + auto dx = hhp->xGlobal(hitId[hitsInFit - 1]) - hhp->xGlobal(hitId[0]); + auto dy = hhp->yGlobal(hitId[hitsInFit - 1]) - hhp->yGlobal(hitId[0]); + auto dz = hhp->zGlobal(hitId[hitsInFit - 1]) - hhp->zGlobal(hitId[0]); + float ux, uy, uz; +#endif for (unsigned int i = 0; i < hitsInFit; ++i) { auto hit = hitId[i]; float ge[6]; +#ifdef YERR_FROM_DC + auto const &dp = hhp->cpeParams().detParams(hhp->detectorIndex(hit)); + auto status = hhp->status(hit); + int qbin = 4 - status.qBin; + assert(qbin >= 0 && qbin < 5); + bool nok = (status.isBigY | status.isOneY); + // compute cotanbeta and use it to recompute error + dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz); + auto cb = std::abs(uy / uz); + int bin = int(cb * (285.f / 150.f) * 8.f) - 4; + bin = std::max(0, std::min(15, bin)); + float yerr = dp.sigmay[bin] * 1.e-4f; + yerr *= dp.yfact[qbin]; // inflate + yerr *= yerr; + yerr += dp.apeY; + yerr = nok ? hhp->yerrLocal(hit) : yerr; + dp.frame.toGlobal(hhp->xerrLocal(hit), 0, yerr, ge); +#else hhp->cpeParams() .detParams(hhp->detectorIndex(hit)) .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge); +#endif + #ifdef BL_DUMP_HITS if (dump) { printf("Hit global: %d: %d hits.col(%d) << %f,%f,%f\n", diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h index 691395887dddb..9d51acd9e4d76 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h @@ -375,16 +375,13 @@ __global__ void kernel_classifyTracks(HitContainer const *__restrict__ tuples, } // compute a pT-dependent chi2 cut - // default parameters: - // - chi2MaxPt = 10 GeV - // - chi2Coeff = { 0.68177776, 0.74609577, -0.08035491, 0.00315399 } - // - chi2Scale = 30 for broken line fit, 45 for Riemann fit + // at the moment just a flat cut... // (see CAHitNtupletGeneratorGPU.cc) - float pt = std::min(tracks->pt(it), cuts.chi2MaxPt); - float chi2Cut = cuts.chi2Scale * - (cuts.chi2Coeff[0] + pt * (cuts.chi2Coeff[1] + pt * (cuts.chi2Coeff[2] + pt * cuts.chi2Coeff[3]))); - // above number were for Quads not normalized so for the time being just multiple by ndof for Quads (triplets to be understood) - if (3.f * tracks->chi2(it) >= chi2Cut) { + // float pt = std::min(tracks->pt(it), cuts.chi2MaxPt); + // float chi2Cut = cuts.chi2Scale * + // (cuts.chi2Coeff[0] + pt * (cuts.chi2Coeff[1] + pt * (cuts.chi2Coeff[2] + pt * cuts.chi2Coeff[3]))); + float chi2Cut = cuts.chi2Scale; + if (tracks->chi2(it) >= chi2Cut) { #ifdef NTUPLE_DEBUG printf("Bad fit %d size %d pt %f eta %f chi2 %f\n", it, diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc index 464744594e9a6..aa5fbb94b21eb 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc @@ -150,11 +150,11 @@ void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& edm::ParameterSetDescription trackQualityCuts; trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); - trackQualityCuts.add>("chi2Coeff", {0.68177776, 0.74609577, -0.08035491, 0.00315399}) + trackQualityCuts.add>("chi2Coeff", {1., 0., 0., 0.}) ->setComment("Polynomial coefficients to derive the pT-dependent chi2 cut"); - trackQualityCuts.add("chi2Scale", 30.) + trackQualityCuts.add("chi2Scale", 25.) ->setComment( - "Factor to multiply the pT-dependent chi2 cut (currently: 30 for the broken line fit, 45 for the Riemann " + "Factor to multiply the pT-dependent chi2 cut (currently: 25 for the broken line fit, ?? for the Riemann " "fit)"); trackQualityCuts.add("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV"); trackQualityCuts.add("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm");