BUG: Accuracy Issues (#80)

* Squash a number of bugs * Fix up tests * Add new config * Add test data * Add test docs * Remove unused backend * Remove unused imports * Remove references to cuda
exafmm · Jun 9, 2021 · f6caf9c · f6caf9c
1 parent e14c712
commit f6caf9c
Show file tree

Hide file tree

Showing 16 changed files with 216 additions and 535 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ PyExaFMM
 
 [![Anaconda-Server Badge](https://img.shields.io/conda/v/skailasa/pyexafmm.svg)](https://anaconda.org/skailasa/pyexafmm) [![Anaconda-Server Badge](https://anaconda.org/skailasa/pyexafmm/badges/latest_release_date.svg)](https://anaconda.org/skailasa/pyexafmm) [![Anaconda-Server Badge](https://anaconda.org/skailasa/pyexafmm/badges/platforms.svg)](https://anaconda.org/skailasa/pyexafmm)
 
-PyExaFMM is an adaptive particle kernel-independent FMM based on [1], written in pure Python with some extensions. Representing a compromise between portability, ease of use, and performance. Optimisations are currently implemented  using Numba, Numpy, and CUDA acceleration.
+PyExaFMM is an adaptive particle kernel-independent FMM based on [1], written in pure Python with some extensions. Representing a compromise between portability, ease of use, and performance. Optimisations are currently implemented using Numba.
 
 The goal of PyExaFMM is to develop a highly performant implementation of the adaptive particle FMM written in Python, as the utility of FMM algorithms are hindered by their relatively complex implementation, especially for achieving high-performance.
 
@@ -14,13 +14,8 @@ PyExaFMM compares favourably on realistic problem sizes with naive direct evalua
 
 <img src="static/performance.png" alt="Laplace" width="800">
 
-
 Here we use an order 5 multipole expansion, an order 6 local expansion, and constrain leaf nodes to contain a maximum of 100 particles. The target rank in our M2L operator compression is kept at 1. The benchmark was run on an Intel i7-9750H processor.
 
-## System Requirements
-
-An NVidia GPU is required, as PyExaFMM is accellerated with CUDA.
-
 ## Install
 
 Download from Anaconda cloud into a conda/mini-conda environment:
@@ -60,19 +55,19 @@ This is done via a `config.json` file, PyExaFMM will look for this in your **cur
 
 ```json
 {
-    "experiment": "fmm",
-    "npoints": 100000,
+    "experiment": "test",
+    "npoints": 10000,
     "data_type": "random",
     "order_equivalent": 5,
-    "order_check": 6,
+    "order_check": 5,
     "kernel": "laplace",
     "backend": "numba",
     "alpha_inner": 1.05,
-    "alpha_outer": 2.95,
+    "alpha_outer": 2.9,
     "max_level": 10,
-    "max_points": 100,
-    "target_rank": 1,
-    "cond": 1e-16
+    "max_points": 150,
+    "target_rank": 10,
+    "tol": 1e-4
 }
 ```
 
@@ -89,7 +84,7 @@ This is done via a `config.json` file, PyExaFMM will look for this in your **cur
 | `alpha_outer`	| Relative size of outer surface's radius.           |
 | `max_level`   | Depth of octree to use in simulations.             |
 | `target_rank` | Target rank in low-rank compression of M2L matrix. |
-| `cond` | Threshold under which to ignore singular values in randomised SVD. |
+| `tol` | Threshold under which to ignore singular values in SVD taken to invert check-to-equivalent matrices |
 
 PyExaFMM provides some simple test-data generation functions, which can be configured for. However, to use your own data, simply create a HDF5 file, with the same name as `experiment` in your configuration file, with the following group hierarchy,
 

diff --git a/fmm/backend/api.py b/fmm/backend/api.py
@@ -2,7 +2,6 @@
 Interface for compute backends.
 """
 import fmm.backend.numba as numba_backend
-import fmm.backend.openmp as openmp_backend
 
 BACKEND = {
     "numba": {
@@ -15,16 +14,5 @@
         "m2t": numba_backend.m2t,
         "near_field_u_list": numba_backend.near_field_u_list,
         "near_field_node": numba_backend.near_field_node,
-    },
-    "openmp": {
-        "p2m": openmp_backend.p2m,
-        "m2m": openmp_backend.m2m,
-        "m2l": openmp_backend.m2l,
-        "l2l": openmp_backend.l2l,
-        "s2l": openmp_backend.s2l,
-        "l2t": openmp_backend.l2t,
-        "m2t": openmp_backend.m2t,
-        "near_field_u_list": openmp_backend.near_field,
-        "near_field_node": openmp_backend.near_field,
     }
 }
diff --git a/fmm/backend/numba.py b/fmm/backend/numba.py
@@ -319,7 +319,6 @@ def m2l_core(
     scale : np.float32
         Precomputed kernel scale for this level.
     """
-
     nv_list = len(v_list)
 
     # Index of local expansion
@@ -328,16 +327,16 @@ def m2l_core(
     for i in range(nv_list):
 
         source = v_list[i]
-        midx = key_to_index[source]*nequivalent_points
 
         transfer_vector = morton.find_transfer_vector(target, source)
-        u_idx = hash_to_index[transfer_vector]
-        u_lidx = u_idx*ncheck_points
-
-        u_sub = u[u_lidx:u_lidx+ncheck_points]
+        v_idx = hash_to_index[transfer_vector]
+        v_lidx = v_idx*ncheck_points
+        v_ridx = v_lidx+ncheck_points
+        vt_sub = np.copy(vt[:, v_lidx:v_ridx])
 
-        multipole_expansion = multipole_expansions[midx:midx+nequivalent_points]
-        local_expansions[lidx:lidx+nequivalent_points] += scale*(dc2e_inv @ (u_sub @ (s @ ( vt @ multipole_expansion))))
+        m_lidx = key_to_index[source]*nequivalent_points
+        m_ridx = m_lidx+nequivalent_points
+        local_expansions[lidx:lidx+nequivalent_points] += (scale*dc2e_inv) @ (u @ (s @ (vt_sub @ multipole_expansions[m_lidx:m_ridx])))
 
 
 @numba.njit(cache=True, parallel=True)
@@ -367,7 +366,7 @@ def m2l(
     u : np.array(np.float32)
         Compressed left singular vectors of SVD of M2L Gram matrix for nodes at this level.
     s : np.array(np.float32)
-        Compressed singular values of SVD of M2L Gram matrix for nodes at this level.
+        Compressed singular values of SVD of M2L Gram matrix for nodes at this level.`
     vt : np.array(np.float32)
         Compressed right singular vectors of SVD of M2L Gram matrix for nodes at this level.
     dc2e_inv : np.array(shape=(n_equivalent, n_check), dtype=np.float64)
@@ -443,13 +442,13 @@ def l2l(
     parent = morton.find_parent(key)
     parent_idx = key_to_index[parent]
     parent_lidx = parent_idx*nequivalent_points
-    parent_ridx = (parent_idx+1)*nequivalent_points
+    parent_ridx = parent_lidx+nequivalent_points
     parent_equivalent_density = local_expansions[parent_lidx:parent_ridx]
 
     # Compute expansion index
     child_idx = key_to_index[key]
     child_lidx = child_idx*nequivalent_points
-    child_ridx = (child_idx+1)*nequivalent_points
+    child_ridx = child_lidx+nequivalent_points
 
     # Compute operator index
     operator_idx = key == morton.find_siblings(key)
@@ -515,9 +514,10 @@ def s2l(
     p2p_function : function
         Function handle for kernel P2P.
     """
-    level = np.int32(morton.find_level(key))
-    scale = np.int32(scale_function(level))
-    center = morton.find_physical_center_from_key(key, x0, r0).astype(np.float32)
+
+    level = morton.find_level(key)
+    scale = scale_function(level)
+    center = morton.find_physical_center_from_key(key, x0, r0)
 
     key_idx = key_to_index[key]
     key_lidx = key_idx*nequivalent_points
@@ -601,10 +601,10 @@ def m2t(
 
         source_idx = key_to_index[source]
         source_lidx = source_idx*nequivalent_points
-        source_ridx = (source_idx+1)*nequivalent_points
+        source_ridx = source_lidx+nequivalent_points
 
-        source_level = np.int32(morton.find_level(source))
-        source_center = morton.find_physical_center_from_key(source, x0, r0).astype(np.float32)
+        source_level = morton.find_level(source)
+        source_center = morton.find_physical_center_from_key(source, x0, r0)
 
         upward_equivalent_surface = surface.scale_surface(
             surf=equivalent_surface,
@@ -673,8 +673,8 @@ def l2t(
     source_lidx = (source_idx)*nequivalent_points
     source_ridx = source_lidx+nequivalent_points
 
-    level = np.int32(morton.find_level(key))
-    center = morton.find_physical_center_from_key(key, x0, r0).astype(np.float32)
+    level = morton.find_level(key)
+    center = morton.find_physical_center_from_key(key, x0, r0)
 
     downward_equivalent_surface = surface.scale_surface(
         equivalent_surface,
@@ -878,6 +878,8 @@ def near_field_node(
         p2p_function
     ):
     """
+    Evaluate all near field particles for source particles within a given
+        target node
 
     Parameters:
     -----------

diff --git a/fmm/backend/openmp.py b/fmm/backend/openmp.py
diff --git a/fmm/fmm.py b/fmm/fmm.py
@@ -180,7 +180,7 @@ def downward_pass(self):
 
             # Keys at this level
             keys = self.complete[self.complete_levels == level]
-            scale = self.scale_function(level)
+            scale = np.float32(self.scale_function(level))
 
             # V List interactions
             # M2L operator stored in terms of its SVD components for each level
@@ -218,8 +218,6 @@ def downward_pass(self):
 
             for key in keys:
 
-                idx = self.key_to_index[key]
-
                 # Translate local expansion from the node's parent
                 self.backend['l2l'](
                     key=key,