@@ -1140,11 +1140,10 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
1140
1140
// STEP 0: pre-phase (possibly) the c_j input strengths into c'_j batch...
1141
1141
timer.restart ();
1142
1142
#pragma omp parallel for num_threads(opts.nthreads) // or batchSize?
1143
- for (int i = 0 ; i < thisBatchSize; i++) {
1144
- BIGINT ioff = i * nj;
1145
- for (BIGINT j = 0 ; j < nj; ++j) {
1146
- CpBatch[ioff + j] = prephase[j] * cjb[ioff + j];
1147
- }
1143
+ for (BIGINT j = 0 ; j < nj; ++j) {
1144
+ auto phase = prephase[j];
1145
+ for (int i = 0 ; i < thisBatchSize; i++)
1146
+ CpBatch[i * nj + j] = phase * cjb[i * nj + j];
1148
1147
}
1149
1148
t_phase += timer.elapsedsec ();
1150
1149
@@ -1164,21 +1163,17 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
1164
1163
// STEP 3: apply deconvolve (precomputed 1/phiHat(targ_k), phasing too)...
1165
1164
timer.restart ();
1166
1165
#pragma omp parallel for num_threads(opts.nthreads)
1167
- for (int i = 0 ; i < thisBatchSize; i++) {
1168
- BIGINT ioff = i * nk;
1169
- for (BIGINT k = 0 ; k < nk; ++k) fkb[ioff + k] *= deconv[k];
1170
- }
1166
+ for (BIGINT k = 0 ; k < nk; ++k)
1167
+ for (int i = 0 ; i < thisBatchSize; i++) fkb[i * nk + k] *= deconv[k];
1171
1168
t_deconv += timer.elapsedsec ();
1172
1169
} else { // adjoint mode
1173
1170
// STEP 0: apply deconvolve (precomputed 1/phiHat(targ_k), conjugate phasing
1174
1171
// too)... write output into CpBatch
1175
1172
timer.restart ();
1176
1173
#pragma omp parallel for num_threads(opts.nthreads)
1177
- for (int i = 0 ; i < thisBatchSize; i++) {
1178
- BIGINT ioff = i * nk;
1179
- for (BIGINT k = 0 ; k < nk; ++k)
1180
- CpBatch[ioff + k] = fkb[ioff + k] * conj (deconv[k]);
1181
- }
1174
+ for (BIGINT k = 0 ; k < nk; ++k)
1175
+ for (int i = 0 ; i < thisBatchSize; i++)
1176
+ CpBatch[i * nk + k] = fkb[i * nk + k] * conj (deconv[k]);
1182
1177
t_deconv += timer.elapsedsec ();
1183
1178
// STEP 1: adjoint type 2 (i.e. type 1) NUFFT from CpBatch to fwBatch...
1184
1179
timer.restart ();
@@ -1194,11 +1189,9 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
1194
1189
// STEP 3: post-phase (possibly) the c_j output strengths (in place) ...
1195
1190
timer.restart ();
1196
1191
#pragma omp parallel for num_threads(opts.nthreads) // or batchSize?
1197
- for (int i = 0 ; i < thisBatchSize; i++) {
1198
- BIGINT ioff = i * nj;
1199
- for (BIGINT j = 0 ; j < nj; ++j) {
1200
- cjb[ioff + j] *= conj (prephase[j]); // FIXME
1201
- }
1192
+ for (BIGINT j = 0 ; j < nj; ++j) {
1193
+ auto phase = conj (prephase[j]);
1194
+ for (int i = 0 ; i < thisBatchSize; i++) cjb[i * nj + j] *= phase;
1202
1195
}
1203
1196
t_phase += timer.elapsedsec ();
1204
1197
}
0 commit comments