Skip to content

Commit 3b854c0

Browse files
committed
Merge remote-tracking branch 'cufinufft' into cuda
2 parents 40b6316 + 8697c1a commit 3b854c0

File tree

120 files changed

+17267
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+17267
-0
lines changed

.clang-format

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
Language: Cpp
3+
BasedOnStyle: LLVM
4+
TabWidth: 4
5+
ColumnLimit: 120
6+
IndentWidth: 4
7+
AlwaysBreakTemplateDeclarations: true
8+
...

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ docs/_build
3333

3434
build/
3535
.vscode/
36+
37+
cufinufft/python/cufinufft/docs/_build
38+
cufinufft/python/cufinufft/docs/_static
39+
cufinufft/python/cufinufft/docs/_templates

.gitmodules

Whitespace-only changes.

cufinufft/.bumpversion.cfg

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[bumpversion]
2+
current_version = 1.2
3+
parse = (?P<major>\d+)\.(?P<minor>\d+)
4+
serialize =
5+
{major}.{minor}
6+
commit = True
7+
tag = True
8+
9+
[bumpversion:file:setup.py]
10+
search = version='{current_version}'
11+
replace = version='{new_version}'
12+
13+
[bumpversion:file:README.md]
14+
search = v{current_version}
15+
replace = v{new_version}
16+
17+
[bumpversion:file:python/cufinufft/README.md]
18+
search = v{current_version}
19+
replace = v{new_version}
20+
21+
[bumpversion:file:python/cufinufft/docs/conf.py]
22+
search = release = '{current_version}'
23+
replace = release = '{new_version}'
24+
25+
[bumpversion:file:python/cufinufft/__init__.py]
26+
search = __version__ = '{current_version}'
27+
replace = __version__ = '{new_version}'
28+
29+
[bumpversion:file:ci/distribution_helper.sh]
30+
search = cufinufft_version={current_version}
31+
replace = cufinufft_version={new_version}

cufinufft/CHANGELOG

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
List of features / changes made / release notes, in reverse chronological order
2+
3+
* Move second half of onedim_fseries_kernel() to GPU (with a simple heuristic
4+
basing on nf1 to switch between the CPU and the GPU version).
5+
* Melody fixed bug in MAX_NF being 0 due to typecasting 1e11 to int (thanks
6+
Elliot Slaughter for catching that).
7+
* Melody fixed kernel eval so done w*d not w^d times, speeds up 2d a little, 3d
8+
quite a lot! (PR#130)
9+
* Melody added 1D support for both types 1 (GM-sort and SM methods) 2 (GM-sort),
10+
in C++/CUDA and their test executables (but not Python interface).
11+
12+
v 1.2 (02/17/21)
13+
14+
* Warning: Following are Python interface changes -- not backwards compatible
15+
with v 1.1 (See examples/example2d1,2many.py for updated usage)
16+
17+
- Made opts a kwarg dict instead of an object:
18+
def __init__(self, ... , opts=None, dtype=np.float32)
19+
=> def __init__(self, ... , dtype=np.float32, **kwargs)
20+
- Renamed arguments in plan creation `__init__`:
21+
ntransforms => n_trans, tol => eps
22+
- Changed order of arguments in plan creation `__init__`:
23+
def __init__(self, ... ,isign, eps, ntransforms, opts, dtype)
24+
=> def __init__(self, ... ,ntransforms, eps, isign, opts, dtype)
25+
- Removed M in `set_pts` arguments:
26+
def set_pts(self, M, kx, ky=None, kz=None)
27+
=> def set_pts(self, kx, ky=None, kz=None)
28+
29+
* Python: added multi-gpu support (in beta)
30+
* Python: added more unit tests (wrong input, kwarg args, multi-gpu)
31+
* Fixed various memory leaks
32+
* Added index bound check in 2D spread kernels (Spread_2d_Subprob(_Horner))
33+
* Added spread/interp tests to `make check`
34+
* Fixed user request tolerance (eps) to kernel width (w) calculation
35+
* Default kernel evaluation method set to 0, ie exp(sqrt()), since faster
36+
* Removed outdated benchmark codes, cleaner spread/interp tests
37+
38+
v 1.1 (09/22/20)
39+
40+
* Python: extended the mode tuple to 3D and reorder from C/python
41+
ndarray.shape style input (nZ, nY, nX) to to the (F) order expected by the
42+
low level library (nX, nY, nZ).
43+
* Added bound checking on the bin size
44+
* Dual-precision support of spread/interp tests
45+
* Improved documentation of spread/interp tests
46+
* Added dummy call of cuFFTPlan1d to avoid timing the constant cost of cuFFT
47+
library.
48+
* Added heuristic decision of maximum batch size (number of vectors with the
49+
same nupts to transform at the same time)
50+
* Reported execution throughput in the test codes
51+
* Fixed timing in the tests code
52+
* Professionalized handling of too-small-eps (requested tolerance)
53+
* Rewrote README.md and added cuFINUFFT logo.
54+
* Support of advanced Makefile usage, e.g. make -site=olcf_summit
55+
* Removed FFTW dependency
56+
57+
v 1.0 (07/29/20)

cufinufft/CMakeLists.txt

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
cmake_minimum_required(VERSION 3.18)
2+
3+
project(
4+
cufinufft
5+
LANGUAGES C CXX CUDA
6+
)
7+
8+
find_package(CUDAToolkit REQUIRED)
9+
10+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
11+
set(CMAKE_CUDA_ARCHITECTURES 60;61;70;75)
12+
endif()
13+
14+
set(CMAKE_CXX_STANDARD 14)
15+
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
16+
17+
set(PRECISION_INDEPENDENT_SRC src/precision_independent.cu src/profile.cu contrib/legendre_rule_fast.c contrib/utils.cpp)
18+
set(PRECISION_DEPENDENT_SRC
19+
src/spreadinterp.cpp
20+
src/1d/spreadinterp1d.cu src/1d/cufinufft1d.cu
21+
src/1d/spread1d_wrapper.cu src/1d/interp1d_wrapper.cu
22+
src/2d/spreadinterp2d.cu src/2d/cufinufft2d.cu
23+
src/2d/spread2d_wrapper.cu src/2d/spread2d_wrapper_paul.cu
24+
src/2d/interp2d_wrapper.cu src/memtransfer_wrapper.cu
25+
src/deconvolve_wrapper.cu src/cufinufft.cu src/common.cu
26+
src/3d/spreadinterp3d.cu src/3d/spread3d_wrapper.cu
27+
src/3d/interp3d_wrapper.cu src/3d/cufinufft3d.cu
28+
)
29+
30+
set(CUFINUFFT_INCLUDE_DIRS
31+
${PROJECT_SOURCE_DIR}/include
32+
${PROJECT_SOURCE_DIR}/include/cufinufft/contrib/cuda_samples
33+
)
34+
35+
add_library(cufinufft_common_objects OBJECT ${PRECISION_INDEPENDENT_SRC})
36+
target_include_directories(cufinufft_common_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
37+
set_property(TARGET cufinufft_common_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
38+
39+
add_library(cufinufft_32_objects OBJECT ${PRECISION_DEPENDENT_SRC})
40+
target_include_directories(cufinufft_32_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
41+
target_compile_definitions(cufinufft_32_objects PUBLIC CUFINUFFT_SINGLE)
42+
set_property(TARGET cufinufft_32_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
43+
44+
add_library(cufinufft_64_objects OBJECT ${PRECISION_DEPENDENT_SRC})
45+
target_include_directories(cufinufft_64_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
46+
set_property(TARGET cufinufft_64_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
47+
48+
add_library(cufinufft SHARED
49+
$<TARGET_OBJECTS:cufinufft_common_objects>
50+
$<TARGET_OBJECTS:cufinufft_32_objects>
51+
$<TARGET_OBJECTS:cufinufft_64_objects>
52+
)
53+
target_link_libraries(cufinufft CUDA::cudart CUDA::cufft CUDA::nvToolsExt)
54+
add_library(cufinufft_static STATIC
55+
$<TARGET_OBJECTS:cufinufft_common_objects>
56+
$<TARGET_OBJECTS:cufinufft_32_objects>
57+
$<TARGET_OBJECTS:cufinufft_64_objects>
58+
)
59+
target_link_libraries(cufinufft_static CUDA::cudart_static CUDA::cufft_static CUDA::nvToolsExt)
60+
set_property(TARGET cufinufft_static PROPERTY OUTPUT_NAME cufinufft)
61+
62+
add_subdirectory(test)
63+
add_subdirectory(examples)

cufinufft/Jenkinsfile

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
pipeline {
2+
agent none
3+
options {
4+
disableConcurrentBuilds()
5+
buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20'))
6+
timeout(time: 1, unit: 'HOURS')
7+
}
8+
stages {
9+
stage('main') {
10+
agent {
11+
dockerfile {
12+
filename 'ci/docker/cuda10.1/Dockerfile-x86_64'
13+
args '--gpus 1'
14+
}
15+
}
16+
environment {
17+
HOME = "$WORKSPACE/build"
18+
PYBIN = "/opt/python/cp38-cp38/bin"
19+
}
20+
steps {
21+
sh '${PYBIN}/python3 -m venv $HOME'
22+
sh '''#!/bin/bash -ex
23+
source $HOME/bin/activate
24+
LIBRARY_PATH=/io/lib python3 -m pip install -e .
25+
python3 -m pip install pytest
26+
python3 -m pytest
27+
'''
28+
sh 'make check'
29+
}
30+
}
31+
}
32+
post {
33+
failure {
34+
emailext subject: '$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS',
35+
body: '''$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS
36+
37+
Check console output at $BUILD_URL to view full results.
38+
39+
Building $BRANCH_NAME for $CAUSE
40+
$JOB_DESCRIPTION
41+
42+
Chages:
43+
$CHANGES
44+
45+
End of build log:
46+
${BUILD_LOG,maxLines=200}
47+
''',
48+
recipientProviders: [
49+
[$class: 'DevelopersRecipientProvider'],
50+
],
51+
replyTo: '$DEFAULT_REPLYTO',
52+
53+
}
54+
}
55+
}

cufinufft/LICENSE

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
Copyright (C) 2018-2021 The Simons Foundation, Inc. - All Rights Reserved.
2+
3+
Lead developer: Yu-Hsuan Melody Shih (New York University).
4+
5+
Other developers: (see github site for full list)
6+
7+
Garrett Wright (Princeton)
8+
Joakim Anden (KTH)
9+
Johannes Blaschke (LBNL)
10+
Alex Barnett (CCM, Flatiron Institute)
11+
12+
This project came out of Melody's 2018 and 2019 summer internships at
13+
the Flatiron Institute, advised by Alex Barnett.
14+
15+
------
16+
17+
cuFINUFFT is licensed under the Apache License, Version 2.0 (the
18+
"License"); you may not use this file except in compliance with the
19+
License. You may obtain a copy of the License at
20+
21+
http://www.apache.org/licenses/LICENSE-2.0
22+
23+
Unless required by applicable law or agreed to in writing, software
24+
distributed under the License is distributed on an "AS IS" BASIS,
25+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26+
See the License for the specific language governing permissions and
27+
limitations under the License.
28+
29+
------
30+
31+
Certain parts of this repository are contributed by others.
32+
For their license info, see:
33+
34+
contrib/legendre_rule_fast.license
35+
fortran/cmcl_license.txt
36+
37+
------
38+
39+
If you find this library useful, or it helps you in creating software
40+
or publications, please let us know, and acknowledge that fact by citing our
41+
repository:
42+
43+
https://github.com/flatironinstitute/cufinufft
44+
45+
and the publication:
46+
47+
cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs, Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, Alex H. Barnett. PDSEC2021 conference (best paper prize). https://arxiv.org/abs/2102.08463

cufinufft/MANIFEST.in

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
include python/cufinufft/README.md
2+
include python/cufinufft/requirements.txt

0 commit comments

Comments
 (0)