flatironinstitute
diff --git a/‎.clang-format
+8 b/‎.clang-format
+8
diff --git a/‎.gitignore
+4 b/‎.gitignore
+4
diff --git a/‎.gitmodules b/‎.gitmodules
diff --git a/‎cufinufft/.bumpversion.cfg
+31 b/‎cufinufft/.bumpversion.cfg
+31
diff --git a/‎cufinufft/CHANGELOG
+57 b/‎cufinufft/CHANGELOG
+57
diff --git a/‎cufinufft/CMakeLists.txt
+63 b/‎cufinufft/CMakeLists.txt
+63
diff --git a/‎cufinufft/Jenkinsfile
+55 b/‎cufinufft/Jenkinsfile
+55
diff --git a/‎cufinufft/LICENSE
+47 b/‎cufinufft/LICENSE
+47
diff --git a/‎cufinufft/MANIFEST.in
+2 b/‎cufinufft/MANIFEST.in
+2
@@ -0,0 +1,8 @@
+---
+Language:        Cpp
+BasedOnStyle:  LLVM
+TabWidth:        4
+ColumnLimit:    120
+IndentWidth: 4
+AlwaysBreakTemplateDeclarations: true
+...
@@ -33,3 +33,7 @@ docs/_build
 
 build/
 .vscode/
+
+cufinufft/python/cufinufft/docs/_build
+cufinufft/python/cufinufft/docs/_static
+cufinufft/python/cufinufft/docs/_templates
@@ -0,0 +1,31 @@
+[bumpversion]
+current_version = 1.2
+parse = (?P<major>\d+)\.(?P<minor>\d+)
+serialize = 
+	{major}.{minor}
+commit = True
+tag = True
+
+[bumpversion:file:setup.py]
+search = version='{current_version}'
+replace = version='{new_version}'
+
+[bumpversion:file:README.md]
+search = v{current_version}
+replace = v{new_version}
+
+[bumpversion:file:python/cufinufft/README.md]
+search = v{current_version}
+replace = v{new_version}
+
+[bumpversion:file:python/cufinufft/docs/conf.py]
+search = release = '{current_version}'
+replace = release = '{new_version}'
+
+[bumpversion:file:python/cufinufft/__init__.py]
+search = __version__ = '{current_version}'
+replace = __version__ = '{new_version}'
+
+[bumpversion:file:ci/distribution_helper.sh]
+search = cufinufft_version={current_version}
+replace = cufinufft_version={new_version}
@@ -0,0 +1,57 @@
+List of features / changes made / release notes, in reverse chronological order
+
+* Move second half of onedim_fseries_kernel() to GPU (with a simple heuristic 
+  basing on nf1 to switch between the CPU and the GPU version).
+* Melody fixed bug in MAX_NF being 0 due to typecasting 1e11 to int (thanks
+  Elliot Slaughter for catching that).
+* Melody fixed kernel eval so done w*d not w^d times, speeds up 2d a little, 3d
+  quite a lot! (PR#130)
+* Melody added 1D support for both types 1 (GM-sort and SM methods) 2 (GM-sort),
+  in C++/CUDA and their test executables (but not Python interface).
+
+v 1.2 (02/17/21)
+
+* Warning: Following are Python interface changes -- not backwards compatible
+  with v 1.1 (See examples/example2d1,2many.py for updated usage)
+
+    - Made opts a kwarg dict instead of an object:
+         def __init__(self, ... , opts=None, dtype=np.float32)
+      => def __init__(self, ... , dtype=np.float32, **kwargs)
+    - Renamed arguments in plan creation `__init__`:
+         ntransforms => n_trans, tol => eps
+    - Changed order of arguments in plan creation `__init__`:
+         def __init__(self, ... ,isign, eps, ntransforms, opts, dtype)
+      => def __init__(self, ... ,ntransforms, eps, isign, opts, dtype)
+    - Removed M in `set_pts` arguments:
+         def set_pts(self, M, kx, ky=None, kz=None)
+      => def set_pts(self, kx, ky=None, kz=None)
+
+* Python: added multi-gpu support (in beta)
+* Python: added more unit tests (wrong input, kwarg args, multi-gpu)
+* Fixed various memory leaks
+* Added index bound check in 2D spread kernels (Spread_2d_Subprob(_Horner))
+* Added spread/interp tests to `make check`
+* Fixed user request tolerance (eps) to kernel width (w) calculation
+* Default kernel evaluation method set to 0, ie exp(sqrt()), since faster
+* Removed outdated benchmark codes, cleaner spread/interp tests
+
+v 1.1 (09/22/20)
+
+* Python: extended the mode tuple to 3D and reorder from C/python
+  ndarray.shape style input (nZ, nY, nX) to to the (F) order expected by the
+  low level library (nX, nY, nZ).
+* Added bound checking on the bin size
+* Dual-precision support of spread/interp tests
+* Improved documentation of spread/interp tests
+* Added dummy call of cuFFTPlan1d to avoid timing the constant cost of cuFFT
+  library.
+* Added heuristic decision of maximum batch size (number of vectors with the
+  same nupts to transform at the same time)
+* Reported execution throughput in the test codes
+* Fixed timing in the tests code
+* Professionalized handling of too-small-eps (requested tolerance)
+* Rewrote README.md and added cuFINUFFT logo.
+* Support of advanced Makefile usage, e.g. make -site=olcf_summit
+* Removed FFTW dependency
+
+v 1.0 (07/29/20)
@@ -0,0 +1,63 @@
+cmake_minimum_required(VERSION 3.18)
+
+project(
+  cufinufft
+  LANGUAGES C CXX CUDA
+)
+
+find_package(CUDAToolkit REQUIRED)
+
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CMAKE_CUDA_ARCHITECTURES 60;61;70;75)
+endif()
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+set(PRECISION_INDEPENDENT_SRC src/precision_independent.cu src/profile.cu contrib/legendre_rule_fast.c contrib/utils.cpp)
+set(PRECISION_DEPENDENT_SRC
+  src/spreadinterp.cpp
+  src/1d/spreadinterp1d.cu src/1d/cufinufft1d.cu
+  src/1d/spread1d_wrapper.cu src/1d/interp1d_wrapper.cu
+  src/2d/spreadinterp2d.cu src/2d/cufinufft2d.cu
+  src/2d/spread2d_wrapper.cu src/2d/spread2d_wrapper_paul.cu
+  src/2d/interp2d_wrapper.cu src/memtransfer_wrapper.cu
+  src/deconvolve_wrapper.cu src/cufinufft.cu src/common.cu
+  src/3d/spreadinterp3d.cu src/3d/spread3d_wrapper.cu
+  src/3d/interp3d_wrapper.cu src/3d/cufinufft3d.cu
+)
+
+set(CUFINUFFT_INCLUDE_DIRS
+  ${PROJECT_SOURCE_DIR}/include
+  ${PROJECT_SOURCE_DIR}/include/cufinufft/contrib/cuda_samples
+)
+
+add_library(cufinufft_common_objects OBJECT ${PRECISION_INDEPENDENT_SRC})
+target_include_directories(cufinufft_common_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
+set_property(TARGET cufinufft_common_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+add_library(cufinufft_32_objects OBJECT ${PRECISION_DEPENDENT_SRC})
+target_include_directories(cufinufft_32_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
+target_compile_definitions(cufinufft_32_objects PUBLIC CUFINUFFT_SINGLE)
+set_property(TARGET cufinufft_32_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+add_library(cufinufft_64_objects OBJECT ${PRECISION_DEPENDENT_SRC})
+target_include_directories(cufinufft_64_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
+set_property(TARGET cufinufft_64_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+add_library(cufinufft SHARED
+  $<TARGET_OBJECTS:cufinufft_common_objects>
+  $<TARGET_OBJECTS:cufinufft_32_objects>
+  $<TARGET_OBJECTS:cufinufft_64_objects>
+)
+target_link_libraries(cufinufft CUDA::cudart CUDA::cufft CUDA::nvToolsExt)
+add_library(cufinufft_static STATIC
+  $<TARGET_OBJECTS:cufinufft_common_objects>
+  $<TARGET_OBJECTS:cufinufft_32_objects>
+  $<TARGET_OBJECTS:cufinufft_64_objects>
+)
+target_link_libraries(cufinufft_static CUDA::cudart_static CUDA::cufft_static CUDA::nvToolsExt)
+set_property(TARGET cufinufft_static PROPERTY OUTPUT_NAME cufinufft)
+
+add_subdirectory(test)
+add_subdirectory(examples)
@@ -0,0 +1,55 @@
+pipeline {
+  agent none
+  options {
+    disableConcurrentBuilds()
+    buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20'))
+    timeout(time: 1, unit: 'HOURS')
+  }
+  stages {
+    stage('main') {
+      agent {
+         dockerfile {
+            filename 'ci/docker/cuda10.1/Dockerfile-x86_64'
+            args '--gpus 1'
+         }
+      }
+      environment {
+    HOME = "$WORKSPACE/build"
+    PYBIN = "/opt/python/cp38-cp38/bin"
+      }
+      steps {
+    sh '${PYBIN}/python3 -m venv $HOME'
+    sh '''#!/bin/bash -ex
+      source $HOME/bin/activate
+      LIBRARY_PATH=/io/lib python3 -m pip install -e .
+      python3 -m pip install pytest
+      python3 -m pytest
+    '''
+    sh 'make check'
+      }
+    }
+  }
+  post {
+    failure {
+      emailext subject: '$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS',
+           body: '''$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS
+
+Check console output at $BUILD_URL to view full results.
+
+Building $BRANCH_NAME for $CAUSE
+$JOB_DESCRIPTION
+
+Chages:
+$CHANGES
+
+End of build log:
+${BUILD_LOG,maxLines=200}
+''',
+           recipientProviders: [
+         [$class: 'DevelopersRecipientProvider'],
+           ],
+           replyTo: '$DEFAULT_REPLYTO',
+           to: '[email protected]'
+    }
+  }
+}
@@ -0,0 +1,47 @@
+Copyright (C) 2018-2021 The Simons Foundation, Inc. - All Rights Reserved.
+
+Lead developer: Yu-Hsuan Melody Shih (New York University).
+
+Other developers: (see github site for full list)
+
+Garrett Wright (Princeton)
+Joakim Anden (KTH)
+Johannes Blaschke (LBNL)
+Alex Barnett (CCM, Flatiron Institute)
+
+This project came out of Melody's 2018 and 2019 summer internships at
+the Flatiron Institute, advised by Alex Barnett.
+
+------
+
+cuFINUFFT is licensed under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with the
+License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+------
+
+Certain parts of this repository are contributed by others.
+For their license info, see:
+
+contrib/legendre_rule_fast.license
+fortran/cmcl_license.txt
+
+------
+
+If you find this library useful, or it helps you in creating software
+or publications, please let us know, and acknowledge that fact by citing our
+repository:
+
+  https://github.com/flatironinstitute/cufinufft
+
+and the publication:
+
+cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs, Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, Alex H. Barnett. PDSEC2021 conference (best paper prize). https://arxiv.org/abs/2102.08463
@@ -0,0 +1,2 @@
+include python/cufinufft/README.md
+include python/cufinufft/requirements.txt
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+include python/cufinufft/README.md`
	`2`	`+include python/cufinufft/requirements.txt`