flatironinstitute
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/cufinufft/defs.h‎
Lines changed: 1 addition & 11 deletions b/‎include/cufinufft/defs.h‎
Lines changed: 1 addition & 11 deletions
diff --git a/‎include/cufinufft/impl.h‎
Lines changed: 1 addition & 0 deletions b/‎include/cufinufft/impl.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/cufinufft/utils.h‎
Lines changed: 18 additions & 36 deletions b/‎include/cufinufft/utils.h‎
Lines changed: 18 additions & 36 deletions
diff --git a/‎include/finufft/finufft_core.h‎
Lines changed: 1 addition & 76 deletions b/‎include/finufft/finufft_core.h‎
Lines changed: 1 addition & 76 deletions
diff --git a/‎include/finufft/finufft_utils.hpp‎
Lines changed: 4 additions & 9 deletions b/‎include/finufft/finufft_utils.hpp‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎include/finufft/test_defs.h‎
Lines changed: 2 additions & 0 deletions b/‎include/finufft/test_defs.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎makefile‎
Lines changed: 6 additions & 6 deletions b/‎makefile‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/cuda/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎src/cuda/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
@@ -262,6 +262,7 @@ if(FINUFFT_USE_CPU)
         src/finufft_core.cpp
         src/c_interface.cpp
         src/finufft_utils.cpp
+        src/utils.cpp
     )
 
     if(FINUFFT_BUILD_FORTRAN)
 
@@ -1,18 +1,8 @@
 #ifndef CUFINUFFT_DEFS_H
 #define CUFINUFFT_DEFS_H
 
+#include <common/common.h>
 #include <limits>
-// constants needed within common
-// upper bound on w, ie nspread, even when padded (see evaluate_kernel_vector); also for
-// common
-#define MAX_NSPREAD          16
-#define MIN_NSPREAD          2
-
-// max number of positive quadr nodes
-#define MAX_NQUAD            100
-
-// Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3
-#define ARRAYWIDCEN_GROWFRAC 0.1
 
 // FIXME: If cufft ever takes N > INT_MAX...
 constexpr int32_t MAX_NF = std::numeric_limits<int32_t>::max();
 
@@ -72,6 +72,7 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
       Marco Barbone 07/26/24. Using SM when shared memory available is enough.
   */
   using namespace cufinufft::common;
+  using namespace finufft::common;
   int ier;
   if (type < 1 || type > 3) {
     fprintf(stderr, "[%s] Invalid type (%d): should be 1, 2, or 3.\n", __func__, type);
 
@@ -4,25 +4,21 @@
 // octave (mkoctfile) needs this otherwise it doesn't know what int64_t is!
 #include <complex>
 
-#include <cuComplex.h>
 #include <cufinufft/types.h>
 
 #include <cuda_runtime.h>
 #include <thrust/extrema.h>
+#include <tuple>
 #include <type_traits>
 #include <utility> // for std::forward
 
-#include <finufft_errors.h>
+#include <common/common.h>
 
 #ifndef _USE_MATH_DEFINES
 #define _USE_MATH_DEFINES
 #endif
 #include <cmath>
 
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
 #else
 __inline__ __device__ double atomicAdd(double *address, double val) {
@@ -72,6 +68,8 @@ template<typename T> __forceinline__ __device__ auto interval(const int ns, cons
 namespace cufinufft {
 namespace utils {
 
+using namespace finufft::common;
+
 class WithCudaDevice {
 public:
   explicit WithCudaDevice(const int device) : orig_device_{get_orig_device()} {
@@ -90,10 +88,8 @@ class WithCudaDevice {
   }
 };
 
-// math helpers whose source is in src/cuda/utils.cpp
-CUFINUFFT_BIGINT next235beven(CUFINUFFT_BIGINT n, CUFINUFFT_BIGINT b);
-void gaussquad(int n, double *xgl, double *wgl);
-std::tuple<double, double> leg_eval(int n, double x);
+// math helpers whose source is in src/utils.cpp
+long next235beven(long n, long b);
 
 template<typename T> T infnorm(int n, std::complex<T> *a) {
   T nrm = 0.0;
@@ -124,8 +120,8 @@ static __forceinline__ __device__ void atomicAddComplexShared(
  * on shared memory are supported so we leverage them
  */
 template<typename T>
-static __forceinline__ __device__ void atomicAddComplexGlobal(
-    cuda_complex<T> *address, cuda_complex<T> res) {
+static __forceinline__ __device__ void atomicAddComplexGlobal(cuda_complex<T> *address,
+                                                              cuda_complex<T> res) {
   if constexpr (
       std::is_same_v<cuda_complex<T>, float2> && COMPUTE_CAPABILITY_90_OR_HIGHER) {
     atomicAdd(address, res);
@@ -150,7 +146,7 @@ template<typename T> auto arrayrange(int n, T *a, cudaStream_t stream) {
 
 // Writes out w = half-width and c = center of an interval enclosing all a[n]'s
 // Only chooses a nonzero center if this increases w by less than fraction
-// ARRAYWIDCEN_GROWFRAC defined in defs.h.
+// ARRAYWIDCEN_GROWFRAC defined in common/constants.h.
 // This prevents rephasings which don't grow nf by much. 6/8/17
 // If n==0, w and c are not finite.
 template<typename T> auto arraywidcen(int n, T *a, cudaStream_t stream) {
@@ -180,41 +176,27 @@ auto set_nhg_type3(T S, T X, const cufinufft_opts &opts,
   else
     Ssafe = std::max(Ssafe, T(1) / X);
   // use the safe X and S...
-  T nfd = 2.0 * opts.upsampfac * Ssafe * Xsafe / M_PI + nss;
+  T nfd = 2.0 * opts.upsampfac * Ssafe * Xsafe / PI + nss;
   if (!std::isfinite(nfd)) nfd = 0.0; // use FLT to catch inf
   auto nf = (int)nfd;
   // printf("initial nf=%lld, ns=%d\n",*nf,spopts.nspread);
   //  catch too small nf, and nan or +-inf, otherwise spread fails...
   if (nf < 2 * spopts.nspread) nf = 2 * spopts.nspread;
-  if (nf < MAX_NF)                   // otherwise will fail anyway
-    nf = utils::next235beven(nf, 1); // expensive at huge nf
+  if (nf < MAX_NF)            // otherwise will fail anyway
+    nf = next235beven(nf, 1); // expensive at huge nf
   // Note: b is 1 because type 3 uses a type 2 plan, so it should not need the extra
   // condition that seems to be used by Block Gather as type 2 are only GM-sort
-  auto h   = 2 * T(M_PI) / nf;                       // upsampled grid spacing
+  auto h   = 2 * T(PI) / nf;                         // upsampled grid spacing
   auto gam = T(nf) / (2.0 * opts.upsampfac * Ssafe); // x scale fac to x'
   return std::make_tuple(nf, h, gam);
 }
 
-// Generalized dispatcher for any function requiring ns-based dispatch
-template<typename Func, typename T, int ns, typename... Args>
-int dispatch_ns(Func &&func, int target_ns, Args &&...args) {
-  if constexpr (ns > MAX_NSPREAD) {
-    return FINUFFT_ERR_METHOD_NOTVALID; // Stop recursion
-  } else {
-    if (target_ns == ns) {
-      return std::forward<Func>(func).template operator()<ns>(
-          std::forward<Args>(args)...);
-    }
-    return dispatch_ns<Func, T, ns + 1>(std::forward<Func>(func), target_ns,
-                                        std::forward<Args>(args)...);
-  }
-}
-
-// Wrapper function that starts the dispatch recursion
+// Wrapper around the generic dispatcher for nspread-based dispatch
 template<typename Func, typename T, typename... Args>
-int launch_dispatch_ns(Func &&func, int target_ns, Args &&...args) {
-  return dispatch_ns<Func, T, MIN_NSPREAD>(std::forward<Func>(func), target_ns,
-                                           std::forward<Args>(args)...);
+auto launch_dispatch_ns(Func &&func, int target_ns, Args &&...args) {
+  using NsSeq = make_range<MIN_NSPREAD, MAX_NSPREAD>;
+  auto params = std::make_tuple(DispatchParam<NsSeq>{target_ns});
+  return dispatch(std::forward<Func>(func), params, std::forward<Args>(args)...);
 }
 
 /**
 
@@ -4,67 +4,10 @@
 #include <xsimd/xsimd.hpp>
 
 #include <array>
+#include <common/common.h>
 #include <finufft_errors.h>
 #include <memory>
 
-/* IMPORTANT: for Windows compilers, you should add a line
-        #define FINUFFT_DLL
-   here if you are compiling/using FINUFFT as a DLL,
-   in order to do the proper importing/exporting, or
-   alternatively compile with -DFINUFFT_DLL or the equivalent
-   command-line flag.  This is not necessary under MinGW/Cygwin, where
-   libtool does the imports/exports automatically.
-   Alternatively use include(GenerateExportHeader) and
-   generate_export_header(finufft) to auto generate an header containing
-   these defines.The main reason is that if msvc changes the way it deals
-   with it in the future we just need to update cmake for it to work
-   instead of having a check on the msvc version. */
-#if defined(FINUFFT_DLL) && (defined(_WIN32) || defined(__WIN32__))
-#if defined(dll_EXPORTS)
-#define FINUFFT_EXPORT __declspec(dllexport)
-#else
-#define FINUFFT_EXPORT __declspec(dllimport)
-#endif
-#else
-#define FINUFFT_EXPORT
-#endif
-
-/* specify calling convention (Windows only)
-   The cdecl calling convention is actually not the default in all but a very
-   few C/C++ compilers.
-   If the user code changes the default compiler calling convention, may need
-   this when generating DLL. */
-#if defined(_WIN32) || defined(__WIN32__)
-#define FINUFFT_CDECL __cdecl
-#else
-#define FINUFFT_CDECL
-#endif
-
-// inline macro, to force inlining of small functions
-// this avoids the use of macros to implement functions
-#if defined(_MSC_VER)
-#define FINUFFT_ALWAYS_INLINE __forceinline inline
-#define FINUFFT_NEVER_INLINE  __declspec(noinline)
-#define FINUFFT_RESTRICT      __restrict
-#define FINUFFT_UNREACHABLE   __assume(0)
-#define FINUFFT_UNLIKELY(x)   (x)
-#define FINUFFT_LIKELY(x)     (x)
-#elif defined(__GNUC__) || defined(__clang__)
-#define FINUFFT_ALWAYS_INLINE __attribute__((always_inline)) inline
-#define FINUFFT_NEVER_INLINE  __attribute__((noinline))
-#define FINUFFT_RESTRICT      __restrict__
-#define FINUFFT_UNREACHABLE   __builtin_unreachable()
-#define FINUFFT_UNLIKELY(x)   __builtin_expect(!!(x), 0)
-#define FINUFFT_LIKELY(x)     __builtin_expect(!!(x), 1)
-#else
-#define FINUFFT_ALWAYS_INLINE inline
-#define FINUFFT_NEVER_INLINE
-#define FINUFFT_RESTRICT
-#define FINUFFT_UNREACHABLE
-#define FINUFFT_UNLIKELY(x) (x)
-#define FINUFFT_LIKELY(x)   (x)
-#endif
-
 // All indexing in library that potentially can exceed 2^31 uses 64-bit signed.
 // This includes all calling arguments (eg M,N) that could be huge someday.
 using BIGINT  = int64_t;
@@ -75,20 +18,6 @@ using UBIGINT = uint64_t;
 // Library version (is a string)
 #define FINUFFT_VER "2.5.0dev"
 
-// Smallest possible kernel spread width per dimension, in fine grid points
-// (used only in spreadinterp.cpp)
-inline constexpr int MIN_NSPREAD = 2;
-
-// Largest possible kernel spread width per dimension, in fine grid points
-// (used only in spreadinterp.cpp)
-inline constexpr int MAX_NSPREAD = 16;
-
-// Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3
-inline constexpr double ARRAYWIDCEN_GROWFRAC = 0.1;
-
-// Max number of positive quadr nodes for kernel FT (used only in common.cpp)
-inline constexpr int MAX_NQUAD = 100;
-
 // Internal (nf1 etc) array allocation size that immediately raises error.
 // (Note: next235 takes 1s for 1e11, so it is also to prevent hang here.)
 // Increase this if you need >10TB (!) RAM...
@@ -98,10 +27,6 @@ inline constexpr BIGINT MAX_NF = BIGINT(1e12);
 // values for M = nj (also nk in type 3)...
 inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14);
 
-// We define our own PI here because M_PI is not actually part of standard C++
-inline constexpr double PI      = 3.14159265358979329;
-inline constexpr double INV_2PI = 0.159154943091895336;
-
 // ----- OpenMP macros which also work when omp not present -----
 // Allows compile-time switch off of openmp, so compilation without any openmp
 // is done (Note: _OPENMP is automatically set by -fopenmp compile flag)
 
@@ -3,12 +3,9 @@
 
 #pragma once
 
-#include <cmath>
-//  for CNTime...
-//  (use chrono since the interface is portable between linux and windows)
-#include <chrono>
-
 #include "finufft_core.h"
+#include <cmath>
+#include <common/common.h>
 
 namespace finufft::utils {
 
@@ -31,24 +28,22 @@ FINUFFT_EXPORT FINUFFT_ALWAYS_INLINE void FINUFFT_CDECL arraywidcen(BIGINT n, co
                                                                     T *w, T *c)
 // Writes out w = half-width and c = center of an interval enclosing all a[n]'s
 // Only chooses a nonzero center if this increases w by less than fraction
-// ARRAYWIDCEN_GROWFRAC defined in finufft_core.h.
+// ARRAYWIDCEN_GROWFRAC defined in common/constants.h.
 // This prevents rephasings which don't grow nf by much. 6/8/17
 // If n==0, w and c are not finite.
 {
   T lo, hi;
   arrayrange(n, a, &lo, &hi);
   *w = (hi - lo) / 2;
   *c = (hi + lo) / 2;
-  if (std::abs(*c) < ARRAYWIDCEN_GROWFRAC * (*w)) {
+  if (std::abs(*c) < common::ARRAYWIDCEN_GROWFRAC * (*w)) {
     *w += std::abs(*c);
     *c = 0.0;
   }
 }
 
 // routines in finufft_utils.cpp ...
 FINUFFT_EXPORT BIGINT next235even(BIGINT n);
-FINUFFT_EXPORT void gaussquad(int n, double *xgl, double *wgl);
-FINUFFT_EXPORT std::tuple<double, double> leg_eval(int n, double x);
 
 // jfm's timer class
 class FINUFFT_EXPORT CNTime {
 
@@ -15,6 +15,7 @@
 #include <finufft.h>
 
 // convenient private finufft internals that tests need
+#include <common/common.h>
 #include <finufft/finufft_core.h>
 #include <finufft/finufft_utils.hpp>
 #include <memory>
@@ -36,6 +37,7 @@ using CPX = std::complex<FLT>;
 
 // -------------- Math consts (not in math.h) and useful math macros ----------
 #include <cmath>
+using ::finufft::common::PI;
 
 // either-precision unit imaginary number...
 #define IMA (CPX(0.0, 1.0))
 
@@ -136,7 +136,7 @@ STATICLIB = lib-static/$(LIBNAME).a
 ABSDYNLIB = $(FINUFFT)$(DYNLIB)
 
 # spreader objs
-SOBJS = src/finufft_utils.o src/spreadinterp.o
+SOBJS = src/finufft_utils.o src/utils.o src/spreadinterp.o
 
 # all lib dual-precision objs (note DUCC_OBJS empty if unused)
 OBJS = $(SOBJS) src/fft.o src/finufft_core.o src/c_interface.o fortran/finufftfort.o $(DUCC_OBJS)
@@ -174,7 +174,7 @@ usage:
 	@echo "Also see docs/install.rst and docs/README"
 
 # collect headers for implicit depends (we don't separate public from private)
-HEADERS = $(wildcard include/*.h include/finufft/*.h)
+HEADERS = $(wildcard include/*.h include/finufft/*.h include/common/*.h)
 
 # implicit rules for objects (note -o ensures writes to correct dir)
 %.o: %.cpp $(HEADERS)
@@ -262,10 +262,10 @@ test/%: test/%.cpp $(DYNLIB)
 test/%f: test/%.cpp $(DYNLIB)
 	$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE $< $(ABSDYNLIB) $(LIBSFFT) -o $@
 # low-level tests that are cleaner if depend on only specific objects...
-test/testutils: test/testutils.cpp src/finufft_utils.o
-	$(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutils
-test/testutilsf: test/testutils.cpp src/finufft_utils.o
-	$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutilsf
+test/testutils: test/testutils.cpp src/finufft_utils.o src/utils.o
+	$(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/finufft_utils.o src/utils.o $(LIBS) -o test/testutils
+test/testutilsf: test/testutils.cpp src/finufft_utils.o src/utils.o
+	$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/finufft_utils.o src/utils.o $(LIBS) -o test/testutilsf
 
 # make sure all double-prec test executables ready for testing
 TESTS := $(basename $(wildcard test/*.cpp))
 
@@ -1,4 +1,4 @@
-set(PRECISION_INDEPENDENT_SRC precision_independent.cu utils.cpp)
+set(PRECISION_INDEPENDENT_SRC precision_independent.cu ../utils.cpp)
 
 set(PRECISION_DEPENDENT_SRC
     spreadinterp.cpp
Original file line number	Diff line number	Diff line change
`@@ -262,6 +262,7 @@ if(FINUFFT_USE_CPU)`
`262`	`262`	`src/finufft_core.cpp`
`263`	`263`	`src/c_interface.cpp`
`264`	`264`	`src/finufft_utils.cpp`
	`265`	`+ src/utils.cpp`
`265`	`266`	`)`
`266`	`267`
`267`	`268`	`if(FINUFFT_BUILD_FORTRAN)`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-set(PRECISION_INDEPENDENT_SRC precision_independent.cu utils.cpp)`
	`1`	`+set(PRECISION_INDEPENDENT_SRC precision_independent.cu ../utils.cpp)`
`2`	`2`
`3`	`3`	`set(PRECISION_DEPENDENT_SRC`
`4`	`4`	`spreadinterp.cpp`