Skip to content

Commit

Permalink
tests passing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilya Antonov committed Jul 20, 2020
1 parent 18e5333 commit a42f8fd
Show file tree
Hide file tree
Showing 10 changed files with 398 additions and 337 deletions.
27 changes: 0 additions & 27 deletions app_builder.py

This file was deleted.

80 changes: 32 additions & 48 deletions entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import math
import itertools

pi = math.pi

Expand All @@ -8,10 +9,15 @@

from functions.potential import potential_function_cuda
from kernels.potential_evaluator import PotentialEvaluator
from utils.array_stacker import ArrayStacker

# Parameters for simulation ###################################################
NUMBER_OF_PHI_POINTS = 100
NUMBER_OF_FIELD_POINTS = 50
NUMBER_OF_FIELD_POINTS = 40
NUMBER_OF_FIELD_POINTS_PER_RUN = 20
NUMBER_OF_FIELD_RUNS = (
NUMBER_OF_FIELD_POINTS - 1
) // NUMBER_OF_FIELD_POINTS_PER_RUN + 1
ALPHA = 1
LOWER = -0.5
UPPER = 1.5
Expand All @@ -20,68 +26,46 @@
phixx_array = np.linspace(-pi, pi, NUMBER_OF_PHI_POINTS)

# Kernels #####################################################################
potential_evaluator = PotentialEvaluator(
NUMBER_OF_FIELD_POINTS, NUMBER_OF_PHI_POINTS, potential_function_cuda
)
THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads()
BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS, NUMBER_OF_FIELD_POINTS)
potential_evaluator = PotentialEvaluator(NUMBER_OF_PHI_POINTS, potential_function_cuda)
THREADS_PER_BLOCK = potential_evaluator.allocate_max_threads(8)
BLOCKS_PER_GRID = (NUMBER_OF_FIELD_POINTS_PER_RUN, NUMBER_OF_FIELD_POINTS_PER_RUN)
potential_evaluator.verify_blocks_per_grid(BLOCKS_PER_GRID)

# Execution ###################################################################
DEVICE_lr_array = cuda.to_device(lr_array)
DEVICE_phixx_array = cuda.to_device(phixx_array)
DEVICE_potential_array = cuda.device_array(
shape=(
NUMBER_OF_FIELD_POINTS,
NUMBER_OF_FIELD_POINTS,
NUMBER_OF_FIELD_POINTS_PER_RUN,
NUMBER_OF_FIELD_POINTS_PER_RUN,
NUMBER_OF_PHI_POINTS,
NUMBER_OF_PHI_POINTS,
NUMBER_OF_PHI_POINTS,
),
dtype=np.float32,
)

# potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
# DEVICE_phixx_array, DEVICE_lr_array, ALPHA, DEVICE_potential_array
# )
# print(DEVICE_potential_array.copy_to_host().shape)

@cuda.jit(device=True)
def dummy_potential_function(phi_array, L: float, R: float, alpha: float):
return (
alpha * 10 ** 5
+ phi_array[0] * 10 ** 4
+ phi_array[1] * 10 ** 3
+ phi_array[2] * 10 ** 2
+ L * 10 ** 1
+ R * 10 ** 0
)

@cuda.jit
def kernel_wrapped_func(
phi01: float,
phi02: float,
phi03: float,
L: float,
R: float,
alpha: float,
store_value,
# Go through teach of the field section and evaluate ##########################
FIELD_SECTIONS = [[None] * NUMBER_OF_FIELD_RUNS for i in range(0, NUMBER_OF_FIELD_RUNS)]
for (L_RUN, R_RUN) in itertools.product(
range(0, NUMBER_OF_FIELD_RUNS), range(0, NUMBER_OF_FIELD_RUNS)
):
store_value[0] = dummy_potential_function(
(phi01, phi02, phi03), L, R, alpha
print(
f"🦑 Running (L={L_RUN}/{NUMBER_OF_FIELD_RUNS - 1}), (R={R_RUN}/{NUMBER_OF_FIELD_RUNS - 1})"
)
L_OFFSET = int(L_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
R_OFFSET = int(R_RUN * NUMBER_OF_FIELD_POINTS_PER_RUN)
potential_evaluator.kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
DEVICE_phixx_array,
DEVICE_lr_array,
L_OFFSET,
R_OFFSET,
ALPHA,
DEVICE_potential_array,
)

import numpy as np
store_result = cuda.device_array(shape=(1), dtype=np.float32)
kernel_wrapped_func[1,1](1, 2, 3, 4, 5, 6, store_result)

FIELD_SECTIONS[L_RUN][R_RUN] = DEVICE_potential_array.copy_to_host()

assert store_result.copy_to_host()[0] == 612345
TOTAL_FIELD = ArrayStacker.stack_into_square(FIELD_SECTIONS)

# kernel[NUMBER_OF_FIELD_POINTS, THREADS_PER_BLOCK](
# , DEVICE_potential_array
# )
# # potential_array =
# # DEVICE_potential_array_to_minimize = DEVICE_potential_array.copy_to_host()
# print("Original")
#
# print(DEVICE_potential_array.copy_to_host()[0][0][1])
print(TOTAL_FIELD)
87 changes: 0 additions & 87 deletions flycheck_entrypoint.py

This file was deleted.

34 changes: 22 additions & 12 deletions kernels/potential_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Callable, Tuple
from typing import List, Callable, Tuple, Optional

from numba import cuda
from numba.cuda.cudadrv.devicearray import DeviceNDArray
Expand All @@ -8,30 +8,30 @@

class PotentialEvaluator:
def __init__(
self,
number_of_field_points: int,
number_of_phi_points: int,
potential_function_cuda: Callable,
self, number_of_phi_points: int, potential_function_cuda: Callable,
):
self.NUMBER_OF_PHI_POINTS = number_of_phi_points
self.NUMBER_OF_FIELD_POINTS = number_of_field_points
self.potential_function_cuda = potential_function_cuda

self.kernel = self.kernel_wrapper()
self.gpu_info = gpu_check()

def allocate_max_threads(self) -> Tuple[int, int, int]:
def allocate_max_threads(
self, user_defined_number: Optional[int]=None
) -> Tuple[int, int, int]:
print(
f"""Thread parameters:
> Max threads per block: {self.gpu_info['max_threads_per_block']}
> Max threads in x: {self.gpu_info['max_block_dim_x']}
> Max threads in y: {self.gpu_info['max_block_dim_y']}
> Max threads in z: {self.gpu_info['max_block_dim_z']}"""
)

max_threads_approximation = int(
self.gpu_info["max_threads_per_block"] ** (1 / 3)
)
if user_defined_number is not None:
max_threads_approximation = user_defined_number

max_thread_allocation = (
min(max_threads_approximation, self.gpu_info["max_block_dim_x"]),
min(max_threads_approximation, self.gpu_info["max_block_dim_y"]),
Expand Down Expand Up @@ -63,20 +63,24 @@ def verify_blocks_per_grid(self, blocks_per_grid: Tuple) -> bool:
return True

def kernel_wrapper(self):
NUMBER_OF_FIELD_POINTS = self.NUMBER_OF_FIELD_POINTS
NUMBER_OF_PHI_POINTS = self.NUMBER_OF_PHI_POINTS
potential_function_cuda = self.potential_function_cuda

@cuda.jit
def kernel(
phixx_array: List[float],
lr_array: List[float],
L_offset: int,
R_offset: int,
alpha: float,
array_out: DeviceNDArray,
):
"""
phixx_array: array of the values that phi01, phi02, phi03
lr_array: array of the values for phil and phir
L_offset, R_offset: because of finite memory on device, we will launch this
function multiple times but with different offsets to cover the whole
lr_array
alpha: variables parametr
array_out: allocate either with cuda.device_array or passing in a numpy array
Expand All @@ -88,19 +92,25 @@ def kernel(
phi03_idx = cuda.threadIdx.z
L = cuda.blockIdx.x
R = cuda.blockIdx.y
L_offset = int(L + L_offset)
R_offset = int(R + R_offset)

# Traverse over the full grid
while phi01_idx < NUMBER_OF_PHI_POINTS:
while phi02_idx < NUMBER_OF_PHI_POINTS:
while phi03_idx < NUMBER_OF_PHI_POINTS:
array_out[L][R][phi01_idx][phi02_idx][phi03_idx] = potential_function_cuda(
L_FIELD = lr_array[L_offset]
R_FIELD = lr_array[R_offset]
array_out[L][R][phi01_idx][phi02_idx][
phi03_idx
] = potential_function_cuda(
(
phixx_array[phi01_idx],
phixx_array[phi02_idx],
phixx_array[phi03_idx],
),
lr_array[L],
lr_array[R],
L_FIELD,
R_FIELD,
alpha,
)

Expand Down
Loading

0 comments on commit a42f8fd

Please sign in to comment.