diff --git a/DataFormats/CaloRecHit/interface/MultifitComputations.h b/DataFormats/CaloRecHit/interface/MultifitComputations.h index 65dee2cc40f8c..019f86d11e92b 100644 --- a/DataFormats/CaloRecHit/interface/MultifitComputations.h +++ b/DataFormats/CaloRecHit/interface/MultifitComputations.h @@ -283,6 +283,98 @@ namespace calo { } } + template + EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calculateChiSq(MatrixType1 const& matrixL, + MatrixType2 const& pulseMatrixView, + MatrixType3 const& resultAmplitudesVector, + MatrixType4 const& inputAmplitudesView, + float& chi2) { + // FIXME: this assumes pulses are on columns and samples on rows + constexpr auto NPULSES = MatrixType2::ColsAtCompileTime; + constexpr auto NSAMPLES = MatrixType2::RowsAtCompileTime; + + // replace pulseMatrixView * resultAmplitudesVector - inputAmplitudesView + // NOTE: + float accum[NSAMPLES]; + { + float results[NPULSES]; + + // preload results and permute according to the pulse offsets /////////////// ??? this is not done in ECAL +#pragma unroll + for (int counter = 0; counter < NPULSES; counter++) { + results[counter] = resultAmplitudesVector[counter]; + } + + // load accum +#pragma unroll + for (int counter = 0; counter < NSAMPLES; counter++) + accum[counter] = -inputAmplitudesView(counter); + + // iterate + for (int icol = 0; icol < NPULSES; icol++) { + float pm_col[NSAMPLES]; + + // preload a column of pulse matrix +#pragma unroll + for (int counter = 0; counter < NSAMPLES; counter++) +#ifdef __CUDA_ARCH__ + pm_col[counter] = __ldg(&pulseMatrixView.coeffRef(counter, icol)); +#else + pm_col[counter] = pulseMatrixView.coeffRef(counter, icol); +#endif + + // accum +#pragma unroll + for (int counter = 0; counter < NSAMPLES; counter++) + accum[counter] += results[icol] * pm_col[counter]; + } + } + + // compute chi2 and check that there is no rotation + // chi2 = matrixDecomposition + // .matrixL() + // . solve(mapAccum) + // .solve(pulseMatrixView * resultAmplitudesVector - inputAmplitudesView) + // .squaredNorm(); + + { + float reg_L[NSAMPLES]; + float accumSum = 0; + + // preload a column and load column 0 of cholesky +#pragma unroll + for (int i = 0; i < NSAMPLES; i++) { + reg_L[i] = matrixL(i, 0); + } + + // compute x0 and store it + auto x_prev = accum[0] / reg_L[0]; + accumSum += x_prev * x_prev; + + // iterate +#pragma unroll + for (int iL = 1; iL < NSAMPLES; iL++) { + // update accum +#pragma unroll + for (int counter = iL; counter < NSAMPLES; counter++) + accum[counter] -= x_prev * reg_L[counter]; + + // load the next column of cholesky +#pragma unroll + for (int counter = iL; counter < NSAMPLES; counter++) + reg_L[counter] = matrixL(counter, iL); + + // compute the next x for M(iL, icol) + x_prev = accum[iL] / reg_L[iL]; + + // store the result value + accumSum += x_prev * x_prev; + } + + chi2 = accumSum; + } + } + // TODO: add active bxs template EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,