Skip to content

Commit b010d4a

Browse files
committed
implemented 2d perforamnce tests
1 parent 1e80f60 commit b010d4a

File tree

3 files changed

+322
-1
lines changed

3 files changed

+322
-1
lines changed
File renamed without changes.

doc/articles/first_true_2d.py

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
2+
3+
4+
import os
5+
import sys
6+
import timeit
7+
import typing as tp
8+
from itertools import repeat
9+
10+
from arraykit import first_true_2d
11+
import arraykit as ak
12+
13+
import matplotlib.pyplot as plt
14+
import numpy as np
15+
import pandas as pd
16+
17+
sys.path.append(os.getcwd())
18+
19+
20+
21+
class ArrayProcessor:
22+
NAME = ''
23+
SORT = -1
24+
25+
def __init__(self, array: np.ndarray):
26+
self.array = array
27+
28+
#-------------------------------------------------------------------------------
29+
class AKFirstTrueAxis0Forward(ArrayProcessor):
30+
NAME = 'ak.first_true_2d(forward=True, axis=0)'
31+
SORT = 0
32+
33+
def __call__(self):
34+
_ = first_true_2d(self.array, forward=True, axis=0)
35+
36+
class AKFirstTrueAxis1Forward(ArrayProcessor):
37+
NAME = 'ak.first_true_2d(forward=True, axis=1)'
38+
SORT = 0
39+
40+
def __call__(self):
41+
_ = first_true_2d(self.array, forward=True, axis=1)
42+
43+
class AKFirstTrueAxis0Reverse(ArrayProcessor):
44+
NAME = 'ak.first_true_2d(forward=False, axis=0)'
45+
SORT = 1
46+
47+
def __call__(self):
48+
_ = first_true_2d(self.array, forward=False, axis=0)
49+
50+
class AKFirstTrueAxis1Reverse(ArrayProcessor):
51+
NAME = 'ak.first_true_2d(forward=False, axis=1)'
52+
SORT = 1
53+
54+
def __call__(self):
55+
_ = first_true_2d(self.array, forward=False, axis=1)
56+
57+
58+
class NPNonZero(ArrayProcessor):
59+
NAME = 'np.nonzero()'
60+
SORT = 3
61+
62+
def __call__(self):
63+
x, y = np.nonzero(self.array)
64+
# list(zip(x, y)) # simulate iteration
65+
66+
67+
class NPArgMaxAxis0(ArrayProcessor):
68+
NAME = 'np.any(axis=0), np.argmax(axis=0)'
69+
SORT = 4
70+
71+
def __call__(self):
72+
_ = ~np.any(self.array, axis=0)
73+
_ = np.argmax(self.array, axis=0)
74+
75+
class NPArgMaxAxis1(ArrayProcessor):
76+
NAME = 'np.any(axis=1), np.argmax(axis=1)'
77+
SORT = 4
78+
79+
def __call__(self):
80+
_ = ~np.any(self.array, axis=1)
81+
_ = np.argmax(self.array, axis=1)
82+
83+
84+
85+
#-------------------------------------------------------------------------------
86+
NUMBER = 100
87+
88+
def seconds_to_display(seconds: float) -> str:
89+
seconds /= NUMBER
90+
if seconds < 1e-4:
91+
return f'{seconds * 1e6: .1f} (µs)'
92+
if seconds < 1e-1:
93+
return f'{seconds * 1e3: .1f} (ms)'
94+
return f'{seconds: .1f} (s)'
95+
96+
97+
def plot_performance(frame):
98+
fixture_total = len(frame['fixture'].unique())
99+
cat_total = len(frame['size'].unique())
100+
processor_total = len(frame['cls_processor'].unique())
101+
fig, axes = plt.subplots(cat_total, fixture_total)
102+
103+
# cmap = plt.get_cmap('terrain')
104+
cmap = plt.get_cmap('plasma')
105+
106+
color = cmap(np.arange(processor_total) / processor_total)
107+
108+
# category is the size of the array
109+
for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
110+
for fixture_count, (fixture_label, fixture) in enumerate(
111+
cat.groupby('fixture')):
112+
ax = axes[cat_count][fixture_count]
113+
114+
# set order
115+
fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
116+
fixture = fixture.sort_values('sort')
117+
118+
results = fixture['time'].values.tolist()
119+
names = [cls.NAME for cls in fixture['cls_processor']]
120+
# x = np.arange(len(results))
121+
names_display = names
122+
post = ax.bar(names_display, results, color=color)
123+
124+
density, position = fixture_label.split('-')
125+
# cat_label is the size of the array
126+
title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[density]}\n{FixtureFactory.POSITION_TO_DISPLAY[position]}'
127+
128+
ax.set_title(title, fontsize=6)
129+
ax.set_box_aspect(0.75) # makes taller tan wide
130+
time_max = fixture['time'].max()
131+
ax.set_yticks([0, time_max * 0.5, time_max])
132+
ax.set_yticklabels(['',
133+
seconds_to_display(time_max * .5),
134+
seconds_to_display(time_max),
135+
], fontsize=6)
136+
# ax.set_xticks(x, names_display, rotation='vertical')
137+
ax.tick_params(
138+
axis='x',
139+
which='both',
140+
bottom=False,
141+
top=False,
142+
labelbottom=False,
143+
)
144+
145+
fig.set_size_inches(9, 3.5) # width, height
146+
fig.legend(post, names_display, loc='center right', fontsize=6)
147+
# horizontal, vertical
148+
fig.text(.05, .96, f'first_true_2d() Performance: {NUMBER} Iterations', fontsize=10)
149+
fig.text(.05, .90, get_versions(), fontsize=6)
150+
151+
fp = '/tmp/first_true.png'
152+
plt.subplots_adjust(
153+
left=0.075,
154+
bottom=0.05,
155+
right=0.75,
156+
top=0.85,
157+
wspace=1, # width
158+
hspace=0.1,
159+
)
160+
# plt.rcParams.update({'font.size': 22})
161+
plt.savefig(fp, dpi=300)
162+
163+
if sys.platform.startswith('linux'):
164+
os.system(f'eog {fp}&')
165+
else:
166+
os.system(f'open {fp}')
167+
168+
169+
#-------------------------------------------------------------------------------
170+
171+
class FixtureFactory:
172+
NAME = ''
173+
174+
@staticmethod
175+
def get_array(size: int) -> np.ndarray:
176+
return np.full(size, False, dtype=bool)
177+
178+
def _get_array_filled(
179+
size: int,
180+
start_third: int, # 1 or 2
181+
density: float, # less than 1
182+
) -> np.ndarray:
183+
a = FixtureFactory.get_array(size)
184+
count = size * density
185+
start = int(len(a) * (start_third/3))
186+
length = len(a) - start
187+
step = int(length / count)
188+
fill = np.arange(start, len(a), step)
189+
a[fill] = True
190+
return a
191+
192+
@classmethod
193+
def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
194+
array = cls.get_array(size)
195+
return cls.NAME, array
196+
197+
DENSITY_TO_DISPLAY = {
198+
'single': '1 True',
199+
'tenth': '10% True',
200+
'third': '33% True',
201+
}
202+
203+
POSITION_TO_DISPLAY = {
204+
'first_third': 'Fill 1/3 to End',
205+
'second_third': 'Fill 2/3 to End',
206+
}
207+
208+
209+
class FFSingleFirstThird(FixtureFactory):
210+
NAME = 'single-first_third'
211+
212+
@staticmethod
213+
def get_array(size: int) -> np.ndarray:
214+
a = FixtureFactory.get_array(size)
215+
a[int(len(a) * (1/3))] = True
216+
return a
217+
218+
class FFSingleSecondThird(FixtureFactory):
219+
NAME = 'single-second_third'
220+
221+
@staticmethod
222+
def get_array(size: int) -> np.ndarray:
223+
a = FixtureFactory.get_array(size)
224+
a[int(len(a) * (2/3))] = True
225+
return a
226+
227+
228+
class FFTenthPostFirstThird(FixtureFactory):
229+
NAME = 'tenth-first_third'
230+
231+
@classmethod
232+
def get_array(cls, size: int) -> np.ndarray:
233+
return cls._get_array_filled(size, start_third=1, density=.1)
234+
235+
236+
class FFTenthPostSecondThird(FixtureFactory):
237+
NAME = 'tenth-second_third'
238+
239+
@classmethod
240+
def get_array(cls, size: int) -> np.ndarray:
241+
return cls._get_array_filled(size, start_third=2, density=.1)
242+
243+
244+
class FFThirdPostFirstThird(FixtureFactory):
245+
NAME = 'third-first_third'
246+
247+
@classmethod
248+
def get_array(cls, size: int) -> np.ndarray:
249+
return cls._get_array_filled(size, start_third=1, density=1/3)
250+
251+
252+
class FFThirdPostSecondThird(FixtureFactory):
253+
NAME = 'third-second_third'
254+
255+
@classmethod
256+
def get_array(cls, size: int) -> np.ndarray:
257+
return cls._get_array_filled(size, start_third=2, density=1/3)
258+
259+
260+
def get_versions() -> str:
261+
import platform
262+
return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
263+
264+
265+
CLS_PROCESSOR = (
266+
AKFirstTrueAxis0Forward,
267+
AKFirstTrueAxis1Forward,
268+
AKFirstTrueAxis0Reverse,
269+
AKFirstTrueAxis1Reverse,
270+
NPNonZero,
271+
NPArgMaxAxis0,
272+
NPArgMaxAxis1
273+
)
274+
275+
CLS_FF = (
276+
FFSingleFirstThird,
277+
FFSingleSecondThird,
278+
FFTenthPostFirstThird,
279+
FFTenthPostSecondThird,
280+
FFThirdPostFirstThird,
281+
FFThirdPostSecondThird,
282+
)
283+
284+
285+
def run_test():
286+
records = []
287+
for size in (100_000, 1_000_000, 10_000_000):
288+
for ff in CLS_FF:
289+
fixture_label, fixture = ff.get_label_array(size)
290+
# TEMP
291+
fixture = fixture.reshape(size // 10, 10)
292+
293+
for cls in CLS_PROCESSOR:
294+
runner = cls(fixture)
295+
296+
record = [cls, NUMBER, fixture_label, size]
297+
print(record)
298+
try:
299+
result = timeit.timeit(
300+
f'runner()',
301+
globals=locals(),
302+
number=NUMBER)
303+
except OSError:
304+
result = np.nan
305+
finally:
306+
pass
307+
record.append(result)
308+
records.append(record)
309+
310+
f = pd.DataFrame.from_records(records,
311+
columns=('cls_processor', 'number', 'fixture', 'size', 'time')
312+
)
313+
print(f)
314+
plot_performance(f)
315+
316+
if __name__ == '__main__':
317+
318+
run_test()
319+
320+
321+

src/_arraykit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3454,7 +3454,7 @@ first_true_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
34543454
return NULL;
34553455
}
34563456
if (PyArray_NDIM(array) != 2) {
3457-
PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
3457+
PyErr_SetString(PyExc_ValueError, "Array must be 2-dimensional");
34583458
return NULL;
34593459
}
34603460
if (PyArray_TYPE(array) != NPY_BOOL) {

0 commit comments

Comments
 (0)