Added hint nj

DiamonDinoia · DiamonDinoia · commit cffb6c3702d6 · 2025-08-22T08:38:13.000-04:00
diff --git a/docs/opts.rst b/docs/opts.rst
@@ -205,6 +205,13 @@ for only two settings, as follows. Otherwise, setting it to zero chooses a good
 * ``upsampfac=1.25`` : low-upsampling option, with lower RAM, smaller FFTs, but wider spreading kernel. The latter can be much faster than the standard when the number of nonuniform points is similar or smaller to the number of modes, and/or if low accuracy is required. It is especially much (2 to 3 times) faster for type 3 transforms. However, the kernel widths :math:`w` are about 50% larger in each dimension, which can lead to slower spreading (it can also be faster due to the smaller size of the fine grid). Because the kernel width is limited to 16, currently, thus only 9-digit accuracy can currently be reached when using ``upsampfac=1.25``.
 
 
+**hint_nj**: Estimated number of nonuniform points available at plan time.
+If nonzero, ``makeplan`` uses this estimate to choose ``upsampfac``.
+Each ``setpts`` call recomputes the factor using the actual ``nj`` and
+re-initializes the plan if it changes.  A value of ``0`` defers the choice
+until ``setpts``.  If ``upsampfac`` is set explicitly, ``hint_nj`` is ignored.
+
+
 **spread_thread**: in the case of multiple transforms per call (``ntr>1``, or the "many" interfaces), controls how multithreading is used to spread/interpolate each batch of data.
 
 * ``spread_thread=0`` : makes an automatic choice between the below. Recommended.
diff --git a/include/finufft.fh b/include/finufft.fh
@@ -16,6 +16,7 @@ c     alg performance opts...
       integer nthreads, fftw, spread_sort, spread_kerevalmeth
       integer spread_kerpad, spread_simd
       real*8 upsampfac
+      integer*8 hint_nj
       integer spread_thread, maxbatchsize, spread_nthr_atomic
       integer spread_max_sp_size
       integer fftw_lock_fun, fftw_unlock_fun, fftw_lock_data
diff --git a/include/finufft/finufft_core.h b/include/finufft/finufft_core.h
@@ -121,6 +121,10 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
   finufft_opts opts; // this and spopts could be made ptrs
   finufft_spread_opts spopts;
 
+  bool upsamp_locked;
+
+  int init_spreader_and_fft();
+
   // Remaining actions (not create/delete) in guru interface are now methods...
   int setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF *s, TF *t, TF *u);
   int execute_internal(TC *cj, TC *fk, bool adjoint = false, int ntrans_actual = -1,
diff --git a/include/finufft/heuristics.hpp b/include/finufft/heuristics.hpp
@@ -332,7 +332,7 @@ double bestUpsamplingFactor(const int nthreads, const double density, const int
   }
 
   // 2) Special-case for nufftType == 3
-  //    TODO: maybe use the bandwidth here?
+  //    TODO: use the bandwidth to populate the talbe.
   if (nufftType == 3) {
     return 1.25;
   }
diff --git a/include/finufft_mod.f90 b/include/finufft_mod.f90
@@ -17,6 +17,7 @@ module finufft_mod
    integer(kind=C_INT) :: nthreads,fftw,spread_sort,spread_kerevalmeth
    integer(kind=C_INT) :: spread_kerpad, spread_simd
    real(kind=C_DOUBLE) :: upsampfac
+   integer(kind=C_SIZE_T) :: hint_nj
    integer(kind=C_INT) :: spread_thread, maxbatchsize
    integer(kind=C_INT) :: spread_nthr_atomic, spread_max_sp_size
    integer(kind=C_SIZE_T) :: fftw_lock_fun, fftw_unlock_fun, fftw_lock_data
diff --git a/include/finufft_opts.h b/include/finufft_opts.h
@@ -5,6 +5,8 @@
 #ifndef FINUFFT_OPTS_H
 #define FINUFFT_OPTS_H
 
+#include <stddef.h>
+
 typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_opts_t()
   // sphinx tag (don't remove): @opts_start
   // FINUFFT options:
@@ -27,6 +29,7 @@ typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_o
   int spread_kerpad;      // (exp(sqrt()) only): 0 don't pad kernel to 4n, 1 do
   int spread_simd;        // 0 auto(=2), 1 scalar, 2 manual vectorization
   double upsampfac;       // upsampling ratio sigma: 2.0 std, 1.25 small FFT, 0.0 auto
+  size_t hint_nj;         // estimated nj at plan time; 0 means unknown
   int spread_thread;      // (vectorized ntr>1 only): 0 auto, 1 seq multithreaded,
                           //                          2 parallel single-thread spread
   int maxbatchsize;       // (vectorized ntr>1 only): max transform batch, 0 auto
diff --git a/python/finufft/finufft/_finufft.py b/python/finufft/finufft/_finufft.py
@@ -12,6 +12,7 @@
 from ctypes import c_float
 from ctypes import c_int
 from ctypes import c_longlong
+from ctypes import c_size_t
 from ctypes import c_void_p
 
 import numpy as np
@@ -83,6 +84,7 @@ class FinufftOpts(ctypes.Structure):
                       ('spread_kerpad', c_int),
                       ('spread_simd', c_int),
                       ('upsampfac', c_double),
+                      ('hint_nj', c_size_t),
                       ('spread_thread', c_int),
                       ('maxbatchsize', c_int),
                       ('spread_nthr_atomic', c_int),
diff --git a/src/finufft_core.cpp b/src/finufft_core.cpp
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
diff --git a/test/hint_nj_test.cpp b/test/hint_nj_test.cpp

Original file line number	Diff line number	Diff line change
`@@ -332,7 +332,7 @@ double bestUpsamplingFactor(const int nthreads, const double density, const int`
`332`	`332`	`}`
`333`	`333`
`334`	`334`	`// 2) Special-case for nufftType == 3`
`335`		`- // TODO: maybe use the bandwidth here?`
	`335`	`+ // TODO: use the bandwidth to populate the talbe.`
`336`	`336`	`if (nufftType == 3) {`
`337`	`337`	`return 1.25;`
`338`	`338`	`}`