More Robustly Typing Constants / Letting Numba Parallelize For PERFT …

…Speedups - More robust typing of some constant values (most notably the number 1 on line 205 of batch_first/__init__.py wasn't explicitly typed as np.uint64, and ended up causing an issue on my Windows OS boot, which resulted in failed tests (is now resolved)) - Turned on Numba's forced loop parallelization in a few places, specifically to improve PERFT scores (for devices like my i7-6850k running at stock settings)
SamRagusa · Aug 14, 2020 · ef6db6a · ef6db6a
1 parent f95dc47
commit ef6db6a
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 10 deletions.
diff --git a/batch_first/__init__.py b/batch_first/__init__.py
@@ -202,7 +202,7 @@ def generate_move_filter_table():
     BB_A6, BB_B6, BB_C6, BB_D6, BB_E6, BB_F6, BB_G6, BB_H6,
     BB_A7, BB_B7, BB_C7, BB_D7, BB_E7, BB_F7, BB_G7, BB_H7,
     BB_A8, BB_B8, BB_C8, BB_D8, BB_E8, BB_F8, BB_G8, BB_H8
-] = np.array([1 << sq for sq in SQUARES], dtype=np.uint64)
+] = np.array([np.uint64(1) << sq for sq in SQUARES], dtype=np.uint64)
 
 BB_CORNERS = BB_A1 | BB_H1 | BB_A8 | BB_H8
 
@@ -219,7 +219,7 @@ def generate_move_filter_table():
     BB_FILE_F,
     BB_FILE_G,
     BB_FILE_H
-] = np.array([0x0101010101010101 << i for i in range(8)], dtype=np.uint64)
+] = np.array([np.uint64(0x0101010101010101) << np.uint8(i) for i in range(8)], dtype=np.uint64)
 
 BB_RANKS = [
     BB_RANK_1,
@@ -230,7 +230,7 @@ def generate_move_filter_table():
     BB_RANK_6,
     BB_RANK_7,
     BB_RANK_8
-] = np.array([0xff << (8 * i) for i in range(8)], dtype=np.uint64)
+] = np.array([np.uint64(0xff) << np.uint8(8 * i) for i in range(8)], dtype=np.uint64)
 
 
 BB_BACKRANKS = BB_RANK_1 | BB_RANK_8

diff --git a/batch_first/numba_board.py b/batch_first/numba_board.py
@@ -239,7 +239,7 @@ def _to_chess960_tuple(board_state, move):
     return move[0], move[1], move[2]
 
 
-@njit
+@njit(parallel=True)
 def push_moves(struct_array, move_array):
     """
     Pushes the given moves for the given boards (makes the moves), while doing this it also incrementally updates
@@ -263,7 +263,7 @@ def push_moves(struct_array, move_array):
     when some refactoring is done this may happen automatically (things like storing occupied_w and occupied_b
     as an array so it can be indexed with turn).
     """
-    for j in range(len(struct_array)):
+    for j in nb.prange(len(struct_array)):
         move_from_square, move_to_square, move_promotion = _to_chess960_tuple(struct_array[j], move_array[j])
 
         # Reset ep square.
@@ -1150,9 +1150,9 @@ def is_legal_move(board_scalar, move):
     return is_pseudo_legal_move(board_scalar, move) and not is_into_check(board_scalar, move[0], move[1])
 
 
-@njit
+@njit(parallel=True)
 def perft_test_move_gen_helper(struct_array):
-    for j in range(len(struct_array)):
+    for j in nb.prange(len(struct_array)):
         king = msb(struct_array[j]['kings'] & struct_array[j]['occupied_co'][struct_array[j]['turn']])
 
         blockers = _slider_blockers(struct_array[j], king)
@@ -1169,8 +1169,8 @@ def perft_test_move_gen_helper(struct_array):
                 struct_array[j]['unexplored_moves'][legal_move_index] = struct_array[j]['unexplored_moves'][i]
                 legal_move_index += 1
 
-        struct_array[j]['unexplored_moves'][legal_move_index:struct_array[j]['children_left'], :] = 255
-        struct_array[j]['children_left'] = legal_move_index
+        struct_array[j]['unexplored_moves'][legal_move_index:struct_array[j]['children_left']] = 255
+        struct_array['children_left'][j] = legal_move_index
 
 
 def perft_test(struct_array, depth, print_info=False):
@@ -1199,4 +1199,4 @@ def perft_test(struct_array, depth, print_info=False):
 
     push_moves(repeated_struct_array, legal_moves)
 
-    return perft_test(repeated_struct_array, depth - 1)
+    return perft_test(repeated_struct_array, depth - 1, print_info)