tests passing

creamy-seas · Jul 20, 2020 · a42f8fd · a42f8fd
1 parent 18e5333
commit a42f8fd
Show file tree

Hide file tree

Showing 10 changed files with 398 additions and 337 deletions.
diff --git a/app_builder.py b/app_builder.py
diff --git a/entrypoint.py b/entrypoint.py
@@ -1,4 +1,5 @@
 import math
+import itertools
 
 pi = math.pi
 
@@ -8,10 +9,15 @@
 
 from functions.potential import potential_function_cuda
 from kernels.potential_evaluator import PotentialEvaluator
+from utils.array_stacker import ArrayStacker
 
 # Parameters for simulation ###################################################
 NUMBER_OF_PHI_POINTS = 100
-NUMBER_OF_FIELD_POINTS = 50
+NUMBER_OF_FIELD_POINTS = 40
+NUMBER_OF_FIELD_POINTS_PER_RUN = 20
+NUMBER_OF_FIELD_RUNS = (
+    NUMBER_OF_FIELD_POINTS - 1
+) // NUMBER_OF_FIELD_POINTS_PER_RUN + 1
 ALPHA = 1
 LOWER = -0.5
 UPPER = 1.5
@@ -20,68 +26,46 @@
 phixx_array = np.linspace(-pi, pi, NUMBER_OF_PHI_POINTS)
 
 # Kernels #####################################################################
-potential_evaluator = PotentialEvaluator(
-    NUMBER_OF_FIELD_POINTS, NUMBER_OF_PHI_POINTS, potential_function_cuda
-)
-THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads()
-BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS, NUMBER_OF_FIELD_POINTS)
+potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
+THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads(8)
+BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN)
 potential_evaluator.verify_blocks_per_grid(BLOCKS_PER_GRID)
+
 # Execution ###################################################################
 DEVICE_lr_array = cuda.to_device(lr_array)
 DEVICE_phixx_array = cuda.to_device(phixx_array)
 DEVICE_potential_array = cuda.device_array(
     shape=(
-        NUMBER_OF_FIELD_POINTS,
-        NUMBER_OF_FIELD_POINTS,
+        NUMBER_OF_FIELD_POINTS_PER_RUN,
+        NUMBER_OF_FIELD_POINTS_PER_RUN,
         NUMBER_OF_PHI_POINTS,
         NUMBER_OF_PHI_POINTS,
         NUMBER_OF_PHI_POINTS,
     ),
     dtype=np.float32,
 )
 
-# potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
-#     DEVICE_phixx_array, DEVICE_lr_array, ALPHA, DEVICE_potential_array
-# )
-# print(DEVICE_potential_array.copy_to_host().shape)
-
-@cuda.jit(device=True)
-def dummy_potential_function(phi_array, L: float, R: float, alpha: float):
-    return (
-        alpha * 10 ** 5
-        + phi_array[0] * 10 ** 4
-        + phi_array[1] * 10 ** 3
-        + phi_array[2] * 10 ** 2
-        + L * 10 ** 1
-        + R * 10 ** 0
-    )
-
-@cuda.jit
-def kernel_wrapped_func(
-        phi01: float,
-        phi02: float,
-        phi03: float,
-        L: float,
-        R: float,
-        alpha: float,
-        store_value,
+# Go through teach of the field section and evaluate ##########################
+FIELD_SECTIONS = [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)]
+for (L_RUN, R_RUN) in itertools.product(
+    range(0, NUMBER_OF_FIELD_RUNS), range(0, NUMBER_OF_FIELD_RUNS)
 ):
-    store_value[0] = dummy_potential_function(
-        (phi01, phi02, phi03), L, R, alpha
+    print(
+        f"🦑 Running (L={L_RUN}/{NUMBER_OF_FIELD_RUNS - 1}), (R={R_RUN}/{NUMBER_OF_FIELD_RUNS - 1})"
+    )
+    L_OFFSET = int(L_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
+    R_OFFSET = int(R_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
+    potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
+        DEVICE_phixx_array,
+        DEVICE_lr_array,
+        L_OFFSET,
+        R_OFFSET,
+        ALPHA,
+        DEVICE_potential_array,
     )
 
-import numpy as np
-store_result = cuda.device_array(shape=(1), dtype=np.float32)
-kernel_wrapped_func[1,1](1, 2, 3, 4, 5, 6, store_result)
-
+    FIELD_SECTIONS[L_RUN][R_RUN] = DEVICE_potential_array.copy_to_host()
 
-assert store_result.copy_to_host()[0] == 612345
+TOTAL_FIELD = ArrayStacker.stack_into_square(FIELD_SECTIONS)
 
-#     kernel[NUMBER_OF_FIELD_POINTS, THREADS_PER_BLOCK](
-#         , DEVICE_potential_array
-#     )
-#     # potential_array =
-#     # DEVICE_potential_array_to_minimize = DEVICE_potential_array.copy_to_host()
-#     print("Original")
-#
-#     print(DEVICE_potential_array.copy_to_host()[0][0][1])
+print(TOTAL_FIELD)
diff --git a/flycheck_entrypoint.py b/flycheck_entrypoint.py
diff --git a/kernels/potential_evaluator.py b/kernels/potential_evaluator.py
@@ -1,4 +1,4 @@
-from typing import List, Callable, Tuple
+from typing import List, Callable, Tuple, Optional
 
 from numba import cuda
 from numba.cuda.cudadrv.devicearray import DeviceNDArray
@@ -8,30 +8,30 @@
 
 class PotentialEvaluator:
     def __init__(
-        self,
-        number_of_field_points: int,
-        number_of_phi_points: int,
-        potential_function_cuda: Callable,
+        self, number_of_phi_points: int, potential_function_cuda: Callable,
     ):
         self.NUMBER_OF_PHI_POINTS = number_of_phi_points
-        self.NUMBER_OF_FIELD_POINTS = number_of_field_points
         self.potential_function_cuda = potential_function_cuda
 
         self.kernel = self.kernel_wrapper()
         self.gpu_info = gpu_check()
 
-    def allocate_max_threads(self) -> Tuple[int, int, int]:
+    def allocate_max_threads(
+        self, user_defined_number: Optional[int]=None
+    ) -> Tuple[int, int, int]:
         print(
             f"""Thread parameters:
         > Max threads per block: {self.gpu_info['max_threads_per_block']}
         > Max threads in x: {self.gpu_info['max_block_dim_x']}
         > Max threads in y: {self.gpu_info['max_block_dim_y']}
         > Max threads in z: {self.gpu_info['max_block_dim_z']}"""
         )
-
         max_threads_approximation = int(
             self.gpu_info["max_threads_per_block"] ** (1 / 3)
         )
+        if user_defined_number is not None:
+            max_threads_approximation = user_defined_number
+
         max_thread_allocation = (
             min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
             min(max_threads_approximation, self.gpu_info["max_block_dim_y"]),
@@ -63,20 +63,24 @@ def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
         return True
 
     def kernel_wrapper(self):
-        NUMBER_OF_FIELD_POINTS = self.NUMBER_OF_FIELD_POINTS
         NUMBER_OF_PHI_POINTS = self.NUMBER_OF_PHI_POINTS
         potential_function_cuda = self.potential_function_cuda
 
         @cuda.jit
         def kernel(
             phixx_array: List[float],
             lr_array: List[float],
+            L_offset: int,
+            R_offset: int,
             alpha: float,
             array_out: DeviceNDArray,
         ):
             """
             phixx_array:        array of the values that phi01, phi02, phi03
             lr_array:           array of the values for phil and phir
+            L_offset, R_offset: because of finite memory on device, we will launch this
+                                function multiple times but with different offsets to cover the whole
+                                lr_array
             alpha:              variables parametr
             array_out:          allocate either with cuda.device_array or passing in a numpy array
 
@@ -88,19 +92,25 @@ def kernel(
             phi03_idx = cuda.threadIdx.z
             L = cuda.blockIdx.x
             R = cuda.blockIdx.y
+            L_offset = int(L + L_offset)
+            R_offset = int(R + R_offset)
 
             # Traverse over the full grid
             while phi01_idx < NUMBER_OF_PHI_POINTS:
                 while phi02_idx < NUMBER_OF_PHI_POINTS:
                     while phi03_idx < NUMBER_OF_PHI_POINTS:
-                        array_out[L][R][phi01_idx][phi02_idx][phi03_idx] = potential_function_cuda(
+                        L_FIELD = lr_array[L_offset]
+                        R_FIELD = lr_array[R_offset]
+                        array_out[L][R][phi01_idx][phi02_idx][
+                            phi03_idx
+                        ] = potential_function_cuda(
                             (
                                 phixx_array[phi01_idx],
                                 phixx_array[phi02_idx],
                                 phixx_array[phi03_idx],
                             ),
-                            lr_array[L],
-                            lr_array[R],
+                            L_FIELD,
+                            R_FIELD,
                             alpha,
                         )