Skip to content

Commit cffb6c3

Browse files
committed
Added hint nj
1 parent 84622f7 commit cffb6c3

File tree

10 files changed

+559
-131
lines changed

10 files changed

+559
-131
lines changed

docs/opts.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ for only two settings, as follows. Otherwise, setting it to zero chooses a good
205205
* ``upsampfac=1.25`` : low-upsampling option, with lower RAM, smaller FFTs, but wider spreading kernel. The latter can be much faster than the standard when the number of nonuniform points is similar or smaller to the number of modes, and/or if low accuracy is required. It is especially much (2 to 3 times) faster for type 3 transforms. However, the kernel widths :math:`w` are about 50% larger in each dimension, which can lead to slower spreading (it can also be faster due to the smaller size of the fine grid). Because the kernel width is limited to 16, currently, thus only 9-digit accuracy can currently be reached when using ``upsampfac=1.25``.
206206

207207

208+
**hint_nj**: Estimated number of nonuniform points available at plan time.
209+
If nonzero, ``makeplan`` uses this estimate to choose ``upsampfac``.
210+
Each ``setpts`` call recomputes the factor using the actual ``nj`` and
211+
re-initializes the plan if it changes. A value of ``0`` defers the choice
212+
until ``setpts``. If ``upsampfac`` is set explicitly, ``hint_nj`` is ignored.
213+
214+
208215
**spread_thread**: in the case of multiple transforms per call (``ntr>1``, or the "many" interfaces), controls how multithreading is used to spread/interpolate each batch of data.
209216

210217
* ``spread_thread=0`` : makes an automatic choice between the below. Recommended.

include/finufft.fh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ c alg performance opts...
1616
integer nthreads, fftw, spread_sort, spread_kerevalmeth
1717
integer spread_kerpad, spread_simd
1818
real*8 upsampfac
19+
integer*8 hint_nj
1920
integer spread_thread, maxbatchsize, spread_nthr_atomic
2021
integer spread_max_sp_size
2122
integer fftw_lock_fun, fftw_unlock_fun, fftw_lock_data

include/finufft/finufft_core.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
121121
finufft_opts opts; // this and spopts could be made ptrs
122122
finufft_spread_opts spopts;
123123

124+
bool upsamp_locked;
125+
126+
int init_spreader_and_fft();
127+
124128
// Remaining actions (not create/delete) in guru interface are now methods...
125129
int setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF *s, TF *t, TF *u);
126130
int execute_internal(TC *cj, TC *fk, bool adjoint = false, int ntrans_actual = -1,

include/finufft/heuristics.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ double bestUpsamplingFactor(const int nthreads, const double density, const int
332332
}
333333

334334
// 2) Special-case for nufftType == 3
335-
// TODO: maybe use the bandwidth here?
335+
// TODO: use the bandwidth to populate the talbe.
336336
if (nufftType == 3) {
337337
return 1.25;
338338
}

include/finufft_mod.f90

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ module finufft_mod
1717
integer(kind=C_INT) :: nthreads,fftw,spread_sort,spread_kerevalmeth
1818
integer(kind=C_INT) :: spread_kerpad, spread_simd
1919
real(kind=C_DOUBLE) :: upsampfac
20+
integer(kind=C_SIZE_T) :: hint_nj
2021
integer(kind=C_INT) :: spread_thread, maxbatchsize
2122
integer(kind=C_INT) :: spread_nthr_atomic, spread_max_sp_size
2223
integer(kind=C_SIZE_T) :: fftw_lock_fun, fftw_unlock_fun, fftw_lock_data

include/finufft_opts.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#ifndef FINUFFT_OPTS_H
66
#define FINUFFT_OPTS_H
77

8+
#include <stddef.h>
9+
810
typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_opts_t()
911
// sphinx tag (don't remove): @opts_start
1012
// FINUFFT options:
@@ -27,6 +29,7 @@ typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_o
2729
int spread_kerpad; // (exp(sqrt()) only): 0 don't pad kernel to 4n, 1 do
2830
int spread_simd; // 0 auto(=2), 1 scalar, 2 manual vectorization
2931
double upsampfac; // upsampling ratio sigma: 2.0 std, 1.25 small FFT, 0.0 auto
32+
size_t hint_nj; // estimated nj at plan time; 0 means unknown
3033
int spread_thread; // (vectorized ntr>1 only): 0 auto, 1 seq multithreaded,
3134
// 2 parallel single-thread spread
3235
int maxbatchsize; // (vectorized ntr>1 only): max transform batch, 0 auto

python/finufft/finufft/_finufft.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ctypes import c_float
1313
from ctypes import c_int
1414
from ctypes import c_longlong
15+
from ctypes import c_size_t
1516
from ctypes import c_void_p
1617

1718
import numpy as np
@@ -83,6 +84,7 @@ class FinufftOpts(ctypes.Structure):
8384
('spread_kerpad', c_int),
8485
('spread_simd', c_int),
8586
('upsampfac', c_double),
87+
('hint_nj', c_size_t),
8688
('spread_thread', c_int),
8789
('maxbatchsize', c_int),
8890
('spread_nthr_atomic', c_int),

0 commit comments

Comments
 (0)