Skip to content

Commit 98574aa

Browse files
committed
cleaner code
1 parent bf6e7e5 commit 98574aa

23 files changed

+235
-527
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ if(FINUFFT_USE_CPU)
262262
src/finufft_core.cpp
263263
src/c_interface.cpp
264264
src/finufft_utils.cpp
265+
src/utils.cpp
265266
)
266267

267268
if(FINUFFT_BUILD_FORTRAN)

include/cufinufft/defs.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,8 @@
11
#ifndef CUFINUFFT_DEFS_H
22
#define CUFINUFFT_DEFS_H
33

4+
#include <common/common.h>
45
#include <limits>
5-
// constants needed within common
6-
// upper bound on w, ie nspread, even when padded (see evaluate_kernel_vector); also for
7-
// common
8-
#define MAX_NSPREAD 16
9-
#define MIN_NSPREAD 2
10-
11-
// max number of positive quadr nodes
12-
#define MAX_NQUAD 100
13-
14-
// Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3
15-
#define ARRAYWIDCEN_GROWFRAC 0.1
166

177
// FIXME: If cufft ever takes N > INT_MAX...
188
constexpr int32_t MAX_NF = std::numeric_limits<int32_t>::max();

include/cufinufft/impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
7272
Marco Barbone 07/26/24. Using SM when shared memory available is enough.
7373
*/
7474
using namespace cufinufft::common;
75+
using namespace finufft::common;
7576
int ier;
7677
if (type < 1 || type > 3) {
7778
fprintf(stderr, "[%s] Invalid type (%d): should be 1, 2, or 3.\n", __func__, type);

include/cufinufft/utils.h

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,21 @@
44
// octave (mkoctfile) needs this otherwise it doesn't know what int64_t is!
55
#include <complex>
66

7-
#include <cuComplex.h>
87
#include <cufinufft/types.h>
98

109
#include <cuda_runtime.h>
1110
#include <thrust/extrema.h>
11+
#include <tuple>
1212
#include <type_traits>
1313
#include <utility> // for std::forward
1414

15-
#include <finufft_errors.h>
15+
#include <common/common.h>
1616

1717
#ifndef _USE_MATH_DEFINES
1818
#define _USE_MATH_DEFINES
1919
#endif
2020
#include <cmath>
2121

22-
#ifndef M_PI
23-
#define M_PI 3.14159265358979323846
24-
#endif
25-
2622
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
2723
#else
2824
__inline__ __device__ double atomicAdd(double *address, double val) {
@@ -72,6 +68,8 @@ template<typename T> __forceinline__ __device__ auto interval(const int ns, cons
7268
namespace cufinufft {
7369
namespace utils {
7470

71+
using namespace finufft::common;
72+
7573
class WithCudaDevice {
7674
public:
7775
explicit WithCudaDevice(const int device) : orig_device_{get_orig_device()} {
@@ -90,10 +88,8 @@ class WithCudaDevice {
9088
}
9189
};
9290

93-
// math helpers whose source is in src/cuda/utils.cpp
94-
CUFINUFFT_BIGINT next235beven(CUFINUFFT_BIGINT n, CUFINUFFT_BIGINT b);
95-
void gaussquad(int n, double *xgl, double *wgl);
96-
std::tuple<double, double> leg_eval(int n, double x);
91+
// math helpers whose source is in src/utils.cpp
92+
long next235beven(long n, long b);
9793

9894
template<typename T> T infnorm(int n, std::complex<T> *a) {
9995
T nrm = 0.0;
@@ -124,8 +120,8 @@ static __forceinline__ __device__ void atomicAddComplexShared(
124120
* on shared memory are supported so we leverage them
125121
*/
126122
template<typename T>
127-
static __forceinline__ __device__ void atomicAddComplexGlobal(
128-
cuda_complex<T> *address, cuda_complex<T> res) {
123+
static __forceinline__ __device__ void atomicAddComplexGlobal(cuda_complex<T> *address,
124+
cuda_complex<T> res) {
129125
if constexpr (
130126
std::is_same_v<cuda_complex<T>, float2> && COMPUTE_CAPABILITY_90_OR_HIGHER) {
131127
atomicAdd(address, res);
@@ -150,7 +146,7 @@ template<typename T> auto arrayrange(int n, T *a, cudaStream_t stream) {
150146

151147
// Writes out w = half-width and c = center of an interval enclosing all a[n]'s
152148
// Only chooses a nonzero center if this increases w by less than fraction
153-
// ARRAYWIDCEN_GROWFRAC defined in defs.h.
149+
// ARRAYWIDCEN_GROWFRAC defined in common/constants.h.
154150
// This prevents rephasings which don't grow nf by much. 6/8/17
155151
// If n==0, w and c are not finite.
156152
template<typename T> auto arraywidcen(int n, T *a, cudaStream_t stream) {
@@ -180,41 +176,27 @@ auto set_nhg_type3(T S, T X, const cufinufft_opts &opts,
180176
else
181177
Ssafe = std::max(Ssafe, T(1) / X);
182178
// use the safe X and S...
183-
T nfd = 2.0 * opts.upsampfac * Ssafe * Xsafe / M_PI + nss;
179+
T nfd = 2.0 * opts.upsampfac * Ssafe * Xsafe / PI + nss;
184180
if (!std::isfinite(nfd)) nfd = 0.0; // use FLT to catch inf
185181
auto nf = (int)nfd;
186182
// printf("initial nf=%lld, ns=%d\n",*nf,spopts.nspread);
187183
// catch too small nf, and nan or +-inf, otherwise spread fails...
188184
if (nf < 2 * spopts.nspread) nf = 2 * spopts.nspread;
189-
if (nf < MAX_NF) // otherwise will fail anyway
190-
nf = utils::next235beven(nf, 1); // expensive at huge nf
185+
if (nf < MAX_NF) // otherwise will fail anyway
186+
nf = next235beven(nf, 1); // expensive at huge nf
191187
// Note: b is 1 because type 3 uses a type 2 plan, so it should not need the extra
192188
// condition that seems to be used by Block Gather as type 2 are only GM-sort
193-
auto h = 2 * T(M_PI) / nf; // upsampled grid spacing
189+
auto h = 2 * T(PI) / nf; // upsampled grid spacing
194190
auto gam = T(nf) / (2.0 * opts.upsampfac * Ssafe); // x scale fac to x'
195191
return std::make_tuple(nf, h, gam);
196192
}
197193

198-
// Generalized dispatcher for any function requiring ns-based dispatch
199-
template<typename Func, typename T, int ns, typename... Args>
200-
int dispatch_ns(Func &&func, int target_ns, Args &&...args) {
201-
if constexpr (ns > MAX_NSPREAD) {
202-
return FINUFFT_ERR_METHOD_NOTVALID; // Stop recursion
203-
} else {
204-
if (target_ns == ns) {
205-
return std::forward<Func>(func).template operator()<ns>(
206-
std::forward<Args>(args)...);
207-
}
208-
return dispatch_ns<Func, T, ns + 1>(std::forward<Func>(func), target_ns,
209-
std::forward<Args>(args)...);
210-
}
211-
}
212-
213-
// Wrapper function that starts the dispatch recursion
194+
// Wrapper around the generic dispatcher for nspread-based dispatch
214195
template<typename Func, typename T, typename... Args>
215-
int launch_dispatch_ns(Func &&func, int target_ns, Args &&...args) {
216-
return dispatch_ns<Func, T, MIN_NSPREAD>(std::forward<Func>(func), target_ns,
217-
std::forward<Args>(args)...);
196+
auto launch_dispatch_ns(Func &&func, int target_ns, Args &&...args) {
197+
using NsSeq = make_range<MIN_NSPREAD, MAX_NSPREAD>;
198+
auto params = std::make_tuple(DispatchParam<NsSeq>{target_ns});
199+
return dispatch(std::forward<Func>(func), params, std::forward<Args>(args)...);
218200
}
219201

220202
/**

include/finufft/finufft_core.h

Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -4,67 +4,10 @@
44
#include <xsimd/xsimd.hpp>
55

66
#include <array>
7+
#include <common/common.h>
78
#include <finufft_errors.h>
89
#include <memory>
910

10-
/* IMPORTANT: for Windows compilers, you should add a line
11-
#define FINUFFT_DLL
12-
here if you are compiling/using FINUFFT as a DLL,
13-
in order to do the proper importing/exporting, or
14-
alternatively compile with -DFINUFFT_DLL or the equivalent
15-
command-line flag. This is not necessary under MinGW/Cygwin, where
16-
libtool does the imports/exports automatically.
17-
Alternatively use include(GenerateExportHeader) and
18-
generate_export_header(finufft) to auto generate an header containing
19-
these defines.The main reason is that if msvc changes the way it deals
20-
with it in the future we just need to update cmake for it to work
21-
instead of having a check on the msvc version. */
22-
#if defined(FINUFFT_DLL) && (defined(_WIN32) || defined(__WIN32__))
23-
#if defined(dll_EXPORTS)
24-
#define FINUFFT_EXPORT __declspec(dllexport)
25-
#else
26-
#define FINUFFT_EXPORT __declspec(dllimport)
27-
#endif
28-
#else
29-
#define FINUFFT_EXPORT
30-
#endif
31-
32-
/* specify calling convention (Windows only)
33-
The cdecl calling convention is actually not the default in all but a very
34-
few C/C++ compilers.
35-
If the user code changes the default compiler calling convention, may need
36-
this when generating DLL. */
37-
#if defined(_WIN32) || defined(__WIN32__)
38-
#define FINUFFT_CDECL __cdecl
39-
#else
40-
#define FINUFFT_CDECL
41-
#endif
42-
43-
// inline macro, to force inlining of small functions
44-
// this avoids the use of macros to implement functions
45-
#if defined(_MSC_VER)
46-
#define FINUFFT_ALWAYS_INLINE __forceinline inline
47-
#define FINUFFT_NEVER_INLINE __declspec(noinline)
48-
#define FINUFFT_RESTRICT __restrict
49-
#define FINUFFT_UNREACHABLE __assume(0)
50-
#define FINUFFT_UNLIKELY(x) (x)
51-
#define FINUFFT_LIKELY(x) (x)
52-
#elif defined(__GNUC__) || defined(__clang__)
53-
#define FINUFFT_ALWAYS_INLINE __attribute__((always_inline)) inline
54-
#define FINUFFT_NEVER_INLINE __attribute__((noinline))
55-
#define FINUFFT_RESTRICT __restrict__
56-
#define FINUFFT_UNREACHABLE __builtin_unreachable()
57-
#define FINUFFT_UNLIKELY(x) __builtin_expect(!!(x), 0)
58-
#define FINUFFT_LIKELY(x) __builtin_expect(!!(x), 1)
59-
#else
60-
#define FINUFFT_ALWAYS_INLINE inline
61-
#define FINUFFT_NEVER_INLINE
62-
#define FINUFFT_RESTRICT
63-
#define FINUFFT_UNREACHABLE
64-
#define FINUFFT_UNLIKELY(x) (x)
65-
#define FINUFFT_LIKELY(x) (x)
66-
#endif
67-
6811
// All indexing in library that potentially can exceed 2^31 uses 64-bit signed.
6912
// This includes all calling arguments (eg M,N) that could be huge someday.
7013
using BIGINT = int64_t;
@@ -75,20 +18,6 @@ using UBIGINT = uint64_t;
7518
// Library version (is a string)
7619
#define FINUFFT_VER "2.5.0dev"
7720

78-
// Smallest possible kernel spread width per dimension, in fine grid points
79-
// (used only in spreadinterp.cpp)
80-
inline constexpr int MIN_NSPREAD = 2;
81-
82-
// Largest possible kernel spread width per dimension, in fine grid points
83-
// (used only in spreadinterp.cpp)
84-
inline constexpr int MAX_NSPREAD = 16;
85-
86-
// Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3
87-
inline constexpr double ARRAYWIDCEN_GROWFRAC = 0.1;
88-
89-
// Max number of positive quadr nodes for kernel FT (used only in common.cpp)
90-
inline constexpr int MAX_NQUAD = 100;
91-
9221
// Internal (nf1 etc) array allocation size that immediately raises error.
9322
// (Note: next235 takes 1s for 1e11, so it is also to prevent hang here.)
9423
// Increase this if you need >10TB (!) RAM...
@@ -98,10 +27,6 @@ inline constexpr BIGINT MAX_NF = BIGINT(1e12);
9827
// values for M = nj (also nk in type 3)...
9928
inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14);
10029

101-
// We define our own PI here because M_PI is not actually part of standard C++
102-
inline constexpr double PI = 3.14159265358979329;
103-
inline constexpr double INV_2PI = 0.159154943091895336;
104-
10530
// ----- OpenMP macros which also work when omp not present -----
10631
// Allows compile-time switch off of openmp, so compilation without any openmp
10732
// is done (Note: _OPENMP is automatically set by -fopenmp compile flag)

include/finufft/finufft_utils.hpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@
33

44
#pragma once
55

6-
#include <cmath>
7-
// for CNTime...
8-
// (use chrono since the interface is portable between linux and windows)
9-
#include <chrono>
10-
116
#include "finufft_core.h"
7+
#include <cmath>
8+
#include <common/common.h>
129

1310
namespace finufft::utils {
1411

@@ -31,24 +28,22 @@ FINUFFT_EXPORT FINUFFT_ALWAYS_INLINE void FINUFFT_CDECL arraywidcen(BIGINT n, co
3128
T *w, T *c)
3229
// Writes out w = half-width and c = center of an interval enclosing all a[n]'s
3330
// Only chooses a nonzero center if this increases w by less than fraction
34-
// ARRAYWIDCEN_GROWFRAC defined in finufft_core.h.
31+
// ARRAYWIDCEN_GROWFRAC defined in common/constants.h.
3532
// This prevents rephasings which don't grow nf by much. 6/8/17
3633
// If n==0, w and c are not finite.
3734
{
3835
T lo, hi;
3936
arrayrange(n, a, &lo, &hi);
4037
*w = (hi - lo) / 2;
4138
*c = (hi + lo) / 2;
42-
if (std::abs(*c) < ARRAYWIDCEN_GROWFRAC * (*w)) {
39+
if (std::abs(*c) < common::ARRAYWIDCEN_GROWFRAC * (*w)) {
4340
*w += std::abs(*c);
4441
*c = 0.0;
4542
}
4643
}
4744

4845
// routines in finufft_utils.cpp ...
4946
FINUFFT_EXPORT BIGINT next235even(BIGINT n);
50-
FINUFFT_EXPORT void gaussquad(int n, double *xgl, double *wgl);
51-
FINUFFT_EXPORT std::tuple<double, double> leg_eval(int n, double x);
5247

5348
// jfm's timer class
5449
class FINUFFT_EXPORT CNTime {

include/finufft/test_defs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <finufft.h>
1616

1717
// convenient private finufft internals that tests need
18+
#include <common/common.h>
1819
#include <finufft/finufft_core.h>
1920
#include <finufft/finufft_utils.hpp>
2021
#include <memory>
@@ -36,6 +37,7 @@ using CPX = std::complex<FLT>;
3637

3738
// -------------- Math consts (not in math.h) and useful math macros ----------
3839
#include <cmath>
40+
using ::finufft::common::PI;
3941

4042
// either-precision unit imaginary number...
4143
#define IMA (CPX(0.0, 1.0))

makefile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ STATICLIB = lib-static/$(LIBNAME).a
136136
ABSDYNLIB = $(FINUFFT)$(DYNLIB)
137137

138138
# spreader objs
139-
SOBJS = src/finufft_utils.o src/spreadinterp.o
139+
SOBJS = src/finufft_utils.o src/utils.o src/spreadinterp.o
140140

141141
# all lib dual-precision objs (note DUCC_OBJS empty if unused)
142142
OBJS = $(SOBJS) src/fft.o src/finufft_core.o src/c_interface.o fortran/finufftfort.o $(DUCC_OBJS)
@@ -174,7 +174,7 @@ usage:
174174
@echo "Also see docs/install.rst and docs/README"
175175

176176
# collect headers for implicit depends (we don't separate public from private)
177-
HEADERS = $(wildcard include/*.h include/finufft/*.h)
177+
HEADERS = $(wildcard include/*.h include/finufft/*.h include/common/*.h)
178178

179179
# implicit rules for objects (note -o ensures writes to correct dir)
180180
%.o: %.cpp $(HEADERS)
@@ -262,10 +262,10 @@ test/%: test/%.cpp $(DYNLIB)
262262
test/%f: test/%.cpp $(DYNLIB)
263263
$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE $< $(ABSDYNLIB) $(LIBSFFT) -o $@
264264
# low-level tests that are cleaner if depend on only specific objects...
265-
test/testutils: test/testutils.cpp src/finufft_utils.o
266-
$(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutils
267-
test/testutilsf: test/testutils.cpp src/finufft_utils.o
268-
$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutilsf
265+
test/testutils: test/testutils.cpp src/finufft_utils.o src/utils.o
266+
$(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/finufft_utils.o src/utils.o $(LIBS) -o test/testutils
267+
test/testutilsf: test/testutils.cpp src/finufft_utils.o src/utils.o
268+
$(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/finufft_utils.o src/utils.o $(LIBS) -o test/testutilsf
269269

270270
# make sure all double-prec test executables ready for testing
271271
TESTS := $(basename $(wildcard test/*.cpp))

src/cuda/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
set(PRECISION_INDEPENDENT_SRC precision_independent.cu utils.cpp)
1+
set(PRECISION_INDEPENDENT_SRC precision_independent.cu ../utils.cpp)
22

33
set(PRECISION_DEPENDENT_SRC
44
spreadinterp.cpp

0 commit comments

Comments
 (0)