Skip to content

Commit

Permalink
first simulations - there is a shift which causes problems
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilya Antonov committed Jul 20, 2020
1 parent a42f8fd commit 80190d7
Show file tree
Hide file tree
Showing 10 changed files with 427 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
__pycache__/
.ipynb_checkpoints/
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
run:
jupyter:
jupyter notebook --no-browser --port 8888 --allow-root --NotebookApp.token='' --NotebookApp.password=''
cuda:
@@time(\
time python potential_minimum_gridsearch.py \
time python entrypoint.py \
)
@echo -e "\n🐬 Finished\n"
cpu:
@@time(\
time python potential_python.py \
)
@echo -e "\n🐬 Finished\n"
61 changes: 50 additions & 11 deletions entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import math
import itertools
from collections import defaultdict

pi = math.pi

import numpy as np
from numba import cuda
from numba.cuda.cudadrv.devicearray import DeviceNDArray
import matplotlib.pyplot as plt

from functions.potential import potential_function_cuda
from kernels.potential_evaluator import PotentialEvaluator
from kernels.potential_minimum_searcher import PotentialMinimumSearcher
from utils.array_stacker import ArrayStacker

# Parameters for simulation ###################################################
NUMBER_OF_PHI_POINTS = 100
NUMBER_OF_FIELD_POINTS = 40
NUMBER_OF_FIELD_POINTS_PER_RUN = 20
NUMBER_OF_PHI_POINTS = 10
NUMBER_OF_FIELD_POINTS = 10
NUMBER_OF_FIELD_POINTS_PER_RUN = 10
NUMBER_OF_FIELD_RUNS = (
NUMBER_OF_FIELD_POINTS - 1
) // NUMBER_OF_FIELD_POINTS_PER_RUN + 1
Expand All @@ -26,10 +29,13 @@
phixx_array = np.linspace(-pi, pi, NUMBER_OF_PHI_POINTS)

# Kernels #####################################################################
potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads(8)
BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN)
potential_evaluator.verify_blocks_per_grid(BLOCKS_PER_GRID)

potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
THREADS_PER_BLOCK_potential_evaluation = potential_evaluator.allocate_max_threads(8)

potential_minimum_searcher = PotentialMinimumSearcher(NUMBER_OF_PHI_POINTS)
THREADS_PER_BLOCK_potential_search = potential_minimum_searcher.allocate_max_threads()

# Execution ###################################################################
DEVICE_lr_array = cuda.to_device(lr_array)
Expand All @@ -44,9 +50,14 @@
),
dtype=np.float32,
)
DEVICE_grid_search_result_array = cuda.device_array(
shape=(NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN, 4),
dtype=np.float32,
)

# Go through teach of the field section and evaluate ##########################
FIELD_SECTIONS = [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)]
quadrants = defaultdict(lambda: [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)])

for (L_RUN, R_RUN) in itertools.product(
range(0, NUMBER_OF_FIELD_RUNS), range(0, NUMBER_OF_FIELD_RUNS)
):
Expand All @@ -55,17 +66,45 @@
)
L_OFFSET = int(L_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
R_OFFSET = int(R_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK_potential_evaluation](
DEVICE_phixx_array,
DEVICE_lr_array,
L_OFFSET,
R_OFFSET,
ALPHA,
DEVICE_potential_array,
)
potential_minimum_searcher.kernel[
BLOCKS_PER_GRID, THREADS_PER_BLOCK_potential_search
](DEVICE_potential_array, DEVICE_grid_search_result_array)


grid_search_result_array = DEVICE_grid_search_result_array.copy_to_host()
quadrants["potential"][L_RUN][R_RUN] = grid_search_result_array[:,:,0]
quadrants["phi01"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,1].astype(int)]
quadrants["phi02"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,2].astype(int)]
quadrants["phi03"][L_RUN][R_RUN] = phixx_array[grid_search_result_array[:,:,3].astype(int)]

FIELD_SECTIONS[L_RUN][R_RUN] = DEVICE_potential_array.copy_to_host()
result = {}
for key, value in quadrants.items():
print(f"🦑--------------------{key}--------------------")
result[key] = ArrayStacker.stack_into_square(value)
print(result[key])

TOTAL_FIELD = ArrayStacker.stack_into_square(FIELD_SECTIONS)

print(TOTAL_FIELD)
###############################################################################
# sudo yum install python36-tkinter and do ssh -X to print on own computer #
###############################################################################
fig, ax = plt.subplots(1, 1, sharex=True, sharey=True, figsize=(5, 5))

im = ax.imshow(
# result["potential"],
result["phi01"],
extent = [LOWER, UPPER, LOWER, UPPER],
origin= 'lower',
cmap='cividis',
# cmap='YlGnBu'
# interpolation='spline36'
)
cbar = fig.colorbar(im)
plt.show()
318 changes: 318 additions & 0 deletions entrypoint_notebook.ipynb

Large diffs are not rendered by default.

31 changes: 18 additions & 13 deletions kernels/potential_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ def __init__(
self.gpu_info = gpu_check()

def allocate_max_threads(
self, user_defined_number: Optional[int]=None
self, user_defined_number: Optional[int] = None,
verbose=False
) -> Tuple[int, int, int]:
print(
f"""Thread parameters:
if verbose:
print(
f"""Thread parameters:
> Max threads per block: {self.gpu_info['max_threads_per_block']}
> Max threads in x: {self.gpu_info['max_block_dim_x']}
> Max threads in y: {self.gpu_info['max_block_dim_y']}
> Max threads in z: {self.gpu_info['max_block_dim_z']}"""
)
)
max_threads_approximation = int(
self.gpu_info["max_threads_per_block"] ** (1 / 3)
)
Expand All @@ -37,17 +39,18 @@ def allocate_max_threads(
min(max_threads_approximation, self.gpu_info["max_block_dim_y"]),
min(max_threads_approximation, self.gpu_info["max_block_dim_z"]),
)
print(f"🐳 Allocating (THREADS_PER_BLOCK = {max_thread_allocation})")
print(f"🐳 {'Allocating':<20} THREADS_PER_BLOCK = {max_thread_allocation}")

return max_thread_allocation

def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
print(
f"""Block parameters:
def verify_blocks_per_grid(self, blocks_per_grid: Tuple, verbose=False) -> bool:
if verbose:
print(
f"""Block parameters:
> Max blocks in x: {self.gpu_info['max_grid_dim_x']}
> Max blocks in y: {self.gpu_info['max_grid_dim_y']}
> Max blocks in z: {self.gpu_info['max_grid_dim_z']}"""
)
)
for (block_dim, max_dim) in zip(
blocks_per_grid,
[
Expand All @@ -59,7 +62,7 @@ def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
if block_dim > max_dim:
print("🦑 Allocating too many blocks")
return False
print(f"🐳 Verified BLOCKS_PER_GRID={blocks_per_grid}")
print(f"🐳 {'Verified':<20} BLOCKS_PER_GRID={blocks_per_grid}")
return True

def kernel_wrapper(self):
Expand All @@ -78,9 +81,11 @@ def kernel(
"""
phixx_array: array of the values that phi01, phi02, phi03
lr_array: array of the values for phil and phir
L_offset, R_offset: because of finite memory on device, we will launch this
function multiple times but with different offsets to cover the whole
lr_array
L_offset, R_offset: because of finite memory on device, grid search is performed
on separate qudrants of the field.
In order to a global lr_array, this offset
if introduced to access elements for the different quadrants
alpha: variables parametr
array_out: allocate either with cuda.device_array or passing in a numpy array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ def __init__(self, number_of_phi_points: int):
self.gpu_info = gpu_check()

def allocate_max_threads(
self, user_defined_number: Optional[int] = None
self, user_defined_number: Optional[int] = None,
verbose=False
) -> Tuple[int, int]:
print(
f"""Thread parameters:
if verbose:
print(
f"""Thread parameters:
> Max threads per block: {self.gpu_info['max_threads_per_block']}
> Max threads in x: {self.gpu_info['max_block_dim_x']}
> Max threads in y: {self.gpu_info['max_block_dim_y']}
> Max threads in z: {self.gpu_info['max_block_dim_z']}"""
)
)
max_threads_approximation = int(
self.gpu_info["max_threads_per_block"] ** (1 / 2)
)
Expand All @@ -35,7 +37,7 @@ def allocate_max_threads(
min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
)
print(f"🐳 Allocating (THREADS_PER_BLOCK = {max_thread_allocation})")
print(f"🐳 {'Allocating':<20} THREADS_PER_BLOCK = {max_thread_allocation}")

return max_thread_allocation

Expand All @@ -44,18 +46,15 @@ def kernel_wrapper(self):

@cuda.jit
def kernel(
potential_grid: DeviceNDArray,
# L_offset: int,
# R_offset: int,
array_out: DeviceNDArray,
potential_grid: DeviceNDArray, array_out: DeviceNDArray,
):
"""Take a 5D grid loaded into memory and find the minimum for each L-R point
L_offset, R_offset: because of finite memory on device, we might need an offset
potential_grid: array allocated on the device that stores the results of
previous potential evaluation. L, R index the field that
it was evaluated at
potential_grid: array with the evaluated potential values at each L,R point
array_out: array with a [min_potential, min_phi01, min_phi02, min_phi03] for
each L,R point
"""

L = cuda.blockIdx.x
R = cuda.blockIdx.y

Expand Down Expand Up @@ -99,7 +98,7 @@ def kernel(
phi01_idx += cuda.blockDim.x
cuda.syncthreads()

# # Project from line to points (go across the line) ################
# Project from line to points (go across the line) ################
for (phi01_idx, potential) in enumerate(potential_grid[L][R][:, 0, 0]):
array_out[L][R][0] = potential_grid[L][R][0][0][0]
if potential < potential_grid[L][R][0][0][0]:
Expand Down
5 changes: 2 additions & 3 deletions potential_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
from scipy import optimize

NUMBA_TUPLE_TYPE = nb.typeof((0.0, 0.0, 0.0, 0.0))
FLUX = float
FLUX_NUMBER = int
cos = np.cos
Expand Down Expand Up @@ -61,8 +60,8 @@ def minimize(L: FLUX, R: FLUX, alpha: float, number_of_phi_points: int = 100):
###############################################################################
# Common parameters #
###############################################################################
NUMBER_OF_PHI_POINTS = 50
NROWSCOLS = 2
NUMBER_OF_PHI_POINTS = 10
NROWSCOLS = 10
ALPHA = 1
LOWER = -0.5
UPPER = 1.5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from unittest.mock import Mock
from unittest.mock import patch

from kernels.potential_minimum_search import PotentialMinimumSearcher
from kernels.potential_minimum_searcher import PotentialMinimumSearcher
import numpy as np
from numba import cuda


class TestPotentialEvaluator(unittest.TestCase):
class TestPotentialMinimumSearcher(unittest.TestCase):
def setUp(self):

self.NUMBER_OF_PHI_POINTS = 2
Expand Down
6 changes: 6 additions & 0 deletions utils/array_stacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@


class ArrayStacker:
@staticmethod
def extract_ith_entry(arrays_to_extract: List[List[np.ndarray]]):
"""Extract the ith entry of an array"""

pass

@classmethod
def stack_into_square(cls, arrays_to_stack: List[List[np.ndarray]]) -> np.ndarray:
cls.square_check(arrays_to_stack)
Expand Down
16 changes: 8 additions & 8 deletions utils/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ def gpu_check():
"max_grid_dim_y": device.MAX_GRID_DIM_Y,
"max_grid_dim_z": device.MAX_GRID_DIM_Z,
}
print(f"🦑 Found device {str(device)}")
print(parameters)
# print(f"🦑 Found device {str(device)}")
# print(parameters)

print(
f"🦑 Max shared memory cells for int16: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.int16.bitwidth}"
)
print(
f"🦑 Max shared memory cells for float32: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.float32.bitwidth}"
)
# print(
# f"🦑 Max shared memory cells for int16: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.int16.bitwidth}"
# )
# print(
# f"🦑 Max shared memory cells for float32: {device.MAX_SHARED_MEMORY_PER_BLOCK / nb.float32.bitwidth}"
# )
return parameters
raise RuntimeError("Missing GPU")

0 comments on commit 80190d7

Please sign in to comment.