first simulations - there is a shift which causes problems

creamy-seas · Jul 20, 2020 · 80190d7 · 80190d7
1 parent a42f8fd
commit 80190d7
Show file tree

Hide file tree

Showing 10 changed files with 427 additions and 53 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 __pycache__/
+.ipynb_checkpoints/
diff --git a/Makefile b/Makefile
@@ -1,5 +1,12 @@
-run:
+jupyter:
+	jupyter notebook --no-browser --port 8888 --allow-root --NotebookApp.token='' --NotebookApp.password=''
+cuda:
 	@@time(\
-		time python potential_minimum_gridsearch.py \
+		time python entrypoint.py \
+	)
+	@echo -e "\n🐬 Finished\n"
+cpu:
+	@@time(\
+		time python potential_python.py \
 	)
 	@echo -e "\n🐬 Finished\n"
diff --git a/entrypoint.py b/entrypoint.py
@@ -1,20 +1,23 @@
 import math
 import itertools
+from collections import defaultdict
 
 pi = math.pi
 
 import numpy as np
 from numba import cuda
 from numba.cuda.cudadrv.devicearray import DeviceNDArray
+import matplotlib.pyplot as plt
 
 from functions.potential import potential_function_cuda
 from kernels.potential_evaluator import PotentialEvaluator
+from kernels.potential_minimum_searcher import PotentialMinimumSearcher
 from utils.array_stacker import ArrayStacker
 
 # Parameters for simulation ###################################################
-NUMBER_OF_PHI_POINTS = 100
-NUMBER_OF_FIELD_POINTS = 40
-NUMBER_OF_FIELD_POINTS_PER_RUN = 20
+NUMBER_OF_PHI_POINTS = 10
+NUMBER_OF_FIELD_POINTS = 10
+NUMBER_OF_FIELD_POINTS_PER_RUN = 10
 NUMBER_OF_FIELD_RUNS = (
     NUMBER_OF_FIELD_POINTS - 1
 ) // NUMBER_OF_FIELD_POINTS_PER_RUN + 1
@@ -26,10 +29,13 @@
 phixx_array = np.linspace(-pi, pi, NUMBER_OF_PHI_POINTS)
 
 # Kernels #####################################################################
-potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
-THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads(8)
 BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN)
-potential_evaluator.verify_blocks_per_grid(BLOCKS_PER_GRID)
+
+potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
+THREADS_PER_BLOCK_potential_evaluation = potential_evaluator.allocate_max_threads(8)
+
+potential_minimum_searcher = PotentialMinimumSearcher(NUMBER_OF_PHI_POINTS)
+THREADS_PER_BLOCK_potential_search = potential_minimum_searcher.allocate_max_threads()
 
 # Execution ###################################################################
 DEVICE_lr_array = cuda.to_device(lr_array)
@@ -44,9 +50,14 @@
     ),
     dtype=np.float32,
 )
+DEVICE_grid_search_result_array = cuda.device_array(
+    shape=(NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN, 4),
+    dtype=np.float32,
+)
 
 # Go through teach of the field section and evaluate ##########################
-FIELD_SECTIONS = [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)]
+quadrants = defaultdict(lambda: [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)])
+
 for (L_RUN, R_RUN) in itertools.product(
     range(0, NUMBER_OF_FIELD_RUNS), range(0, NUMBER_OF_FIELD_RUNS)
 ):
@@ -55,17 +66,45 @@
     )
     L_OFFSET = int(L_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
     R_OFFSET = int(R_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
-    potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
+    potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK_potential_evaluation](
         DEVICE_phixx_array,
         DEVICE_lr_array,
         L_OFFSET,
         R_OFFSET,
         ALPHA,
         DEVICE_potential_array,
     )
+    potential_minimum_searcher.kernel[
+        BLOCKS_PER_GRID, THREADS_PER_BLOCK_potential_search
+    ](DEVICE_potential_array, DEVICE_grid_search_result_array)
+
+
+    grid_search_result_array = DEVICE_grid_search_result_array.copy_to_host()
+    quadrants["potential"][L_RUN][R_RUN] = grid_search_result_array[:,:,0]
+    quadrants["phi01"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,1].astype(int)]
+    quadrants["phi02"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,2].astype(int)]
+    quadrants["phi03"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,3].astype(int)]
 
-    FIELD_SECTIONS[L_RUN][R_RUN] = DEVICE_potential_array.copy_to_host()
+result = {}
+for key, value in quadrants.items():
+    print(f"🦑--------------------{key}--------------------")
+    result[key] = ArrayStacker.stack_into_square(value)
+    print(result[key])
 
-TOTAL_FIELD = ArrayStacker.stack_into_square(FIELD_SECTIONS)
 
-print(TOTAL_FIELD)
+###############################################################################
+#   sudo yum install python36-tkinter and do ssh -X to print on own computer  #
+###############################################################################
+fig, ax = plt.subplots(1, 1, sharex=True, sharey=True, figsize=(5, 5))
+
+im = ax.imshow(
+    # result["potential"],
+    result["phi01"],
+    extent = [LOWER, UPPER, LOWER, UPPER],
+    origin= 'lower',
+    cmap='cividis',
+    # cmap='YlGnBu'
+    # interpolation='spline36'
+)
+cbar = fig.colorbar(im)
+plt.show()
diff --git a/entrypoint_notebook.ipynb b/entrypoint_notebook.ipynb
diff --git a/kernels/potential_evaluator.py b/kernels/potential_evaluator.py
@@ -17,15 +17,17 @@ def __init__(
         self.gpu_info = gpu_check()
 
     def allocate_max_threads(
-        self, user_defined_number: Optional[int]=None
+            self, user_defined_number: Optional[int] = None,
+            verbose=False
     ) -> Tuple[int, int, int]:
-        print(
-            f"""Thread parameters:
+        if verbose:
+            print(
+                f"""Thread parameters:
         > Max threads per block: {self.gpu_info['max_threads_per_block']}
         > Max threads in x: {self.gpu_info['max_block_dim_x']}
         > Max threads in y: {self.gpu_info['max_block_dim_y']}
         > Max threads in z: {self.gpu_info['max_block_dim_z']}"""
-        )
+            )
         max_threads_approximation = int(
             self.gpu_info["max_threads_per_block"] ** (1 / 3)
         )
@@ -37,17 +39,18 @@ def allocate_max_threads(
             min(max_threads_approximation, self.gpu_info["max_block_dim_y"]),
             min(max_threads_approximation, self.gpu_info["max_block_dim_z"]),
         )
-        print(f"🐳 Allocating (THREADS_PER_BLOCK = {max_thread_allocation})")
+        print(f"🐳 {'Allocating':<20} THREADS_PER_BLOCK = {max_thread_allocation}")
 
         return max_thread_allocation
 
-    def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
-        print(
-            f"""Block parameters:
+    def verify_blocks_per_grid(self, blocks_per_grid: Tuple, verbose=False) -> bool:
+        if verbose:
+            print(
+                f"""Block parameters:
         > Max blocks in x: {self.gpu_info['max_grid_dim_x']}
         > Max blocks in y: {self.gpu_info['max_grid_dim_y']}
         > Max blocks in z: {self.gpu_info['max_grid_dim_z']}"""
-        )
+            )
         for (block_dim, max_dim) in zip(
             blocks_per_grid,
             [
@@ -59,7 +62,7 @@ def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
             if block_dim > max_dim:
                 print("🦑 Allocating too many blocks")
                 return False
-        print(f"🐳 Verified BLOCKS_PER_GRID={blocks_per_grid}")
+        print(f"🐳 {'Verified':<20} BLOCKS_PER_GRID={blocks_per_grid}")
         return True
 
     def kernel_wrapper(self):
@@ -78,9 +81,11 @@ def kernel(
             """
             phixx_array:        array of the values that phi01, phi02, phi03
             lr_array:           array of the values for phil and phir
-            L_offset, R_offset: because of finite memory on device, we will launch this
-                                function multiple times but with different offsets to cover the whole
-                                lr_array
+            L_offset, R_offset: because of finite memory on device, grid search is performed
+                                on separate qudrants of the field.
+                                In order to a global lr_array, this offset
+                                if introduced to access elements for the different quadrants
+
             alpha:              variables parametr
             array_out:          allocate either with cuda.device_array or passing in a numpy array
 

diff --git a/kernels/potential_minimum_search.py → kernels/potential_minimum_searcher.py b/kernels/potential_minimum_search.py → kernels/potential_minimum_searcher.py
@@ -16,15 +16,17 @@ def __init__(self, number_of_phi_points: int):
         self.gpu_info = gpu_check()
 
     def allocate_max_threads(
-        self, user_defined_number: Optional[int] = None
+            self, user_defined_number: Optional[int] = None,
+            verbose=False
     ) -> Tuple[int, int]:
-        print(
-            f"""Thread parameters:
+        if verbose:
+            print(
+                f"""Thread parameters:
         > Max threads per block: {self.gpu_info['max_threads_per_block']}
         > Max threads in x: {self.gpu_info['max_block_dim_x']}
         > Max threads in y: {self.gpu_info['max_block_dim_y']}
         > Max threads in z: {self.gpu_info['max_block_dim_z']}"""
-        )
+            )
         max_threads_approximation = int(
             self.gpu_info["max_threads_per_block"] ** (1 / 2)
         )
@@ -35,7 +37,7 @@ def allocate_max_threads(
             min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
             min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
         )
-        print(f"🐳 Allocating (THREADS_PER_BLOCK = {max_thread_allocation})")
+        print(f"🐳 {'Allocating':<20} THREADS_PER_BLOCK = {max_thread_allocation}")
 
         return max_thread_allocation
 
@@ -44,18 +46,15 @@ def kernel_wrapper(self):
 
         @cuda.jit
         def kernel(
-            potential_grid: DeviceNDArray,
-            # L_offset: int,
-            # R_offset: int,
-            array_out: DeviceNDArray,
+            potential_grid: DeviceNDArray, array_out: DeviceNDArray,
         ):
             """Take a 5D grid loaded into memory and find the minimum for each L-R point
 
-            L_offset, R_offset: because of finite memory on device, we might need an offset
-            potential_grid:     array allocated on the device that stores the results of
-                                previous potential evaluation. L, R index the field that
-                                it was evaluated at
+            potential_grid:     array with the evaluated potential values at each L,R point
+            array_out:          array with a [min_potential, min_phi01, min_phi02, min_phi03] for
+                                each L,R point
             """
+
             L = cuda.blockIdx.x
             R = cuda.blockIdx.y
 
@@ -99,7 +98,7 @@ def kernel(
                 phi01_idx += cuda.blockDim.x
             cuda.syncthreads()
 
-            # # Project from line to points (go across the line) ################
+            # Project from line to points (go across the line) ################
             for (phi01_idx, potential) in enumerate(potential_grid[L][R][:, 0, 0]):
                 array_out[L][R][0] = potential_grid[L][R][0][0][0]
                 if potential < potential_grid[L][R][0][0][0]:

diff --git a/potential_python.py b/potential_python.py
@@ -5,7 +5,6 @@
 import numpy as np
 from scipy import optimize
 
-NUMBA_TUPLE_TYPE = nb.typeof((0.0, 0.0, 0.0, 0.0))
 FLUX = float
 FLUX_NUMBER = int
 cos = np.cos
@@ -61,8 +60,8 @@ def minimize(L: FLUX, R: FLUX, alpha: float, number_of_phi_points: int = 100):
 ###############################################################################
 #                              Common parameters                              #
 ###############################################################################
-NUMBER_OF_PHI_POINTS = 50
-NROWSCOLS = 2
+NUMBER_OF_PHI_POINTS = 10
+NROWSCOLS = 10
 ALPHA = 1
 LOWER = -0.5
 UPPER = 1.5

diff --git a/tests/test_potential_minimum_search.py → tests/test_potential_minimum_searcher.py b/tests/test_potential_minimum_search.py → tests/test_potential_minimum_searcher.py
@@ -2,12 +2,12 @@
 from unittest.mock import Mock
 from unittest.mock import patch
 
-from kernels.potential_minimum_search import PotentialMinimumSearcher
+from kernels.potential_minimum_searcher import PotentialMinimumSearcher
 import numpy as np
 from numba import cuda
 
 
-class TestPotentialEvaluator(unittest.TestCase):
+class TestPotentialMinimumSearcher(unittest.TestCase):
     def setUp(self):
 
         self.NUMBER_OF_PHI_POINTS = 2

diff --git a/utils/array_stacker.py b/utils/array_stacker.py
@@ -4,6 +4,12 @@
 
 
 class ArrayStacker:
+    @staticmethod
+    def extract_ith_entry(arrays_to_extract: List[List[np.ndarray]]):
+        """Extract the ith entry of an array"""
+
+        pass
+
     @classmethod
     def stack_into_square(cls, arrays_to_stack: List[List[np.ndarray]]) -> np.ndarray:
         cls.square_check(arrays_to_stack)

diff --git a/utils/info.py b/utils/info.py
@@ -15,14 +15,14 @@ def gpu_check():
             "max_grid_dim_y": device.MAX_GRID_DIM_Y,
             "max_grid_dim_z": device.MAX_GRID_DIM_Z,
         }
-        print(f"🦑 Found device {str(device)}")
-        print(parameters)
+        # print(f"🦑 Found device {str(device)}")
+        # print(parameters)
 
-        print(
-            f"🦑 Max shared memory cells for int16: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.int16.bitwidth}"
-        )
-        print(
-            f"🦑 Max shared memory cells for float32: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.float32.bitwidth}"
-        )
+        # print(
+        # f"🦑 Max shared memory cells for int16: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.int16.bitwidth}"
+        # )
+        # print(
+        # f"🦑 Max shared memory cells for float32: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.float32.bitwidth}"
+        # )
         return parameters
     raise RuntimeError("Missing GPU")