Skip to content

Commit 433160d

Browse files
authored
Merge pull request #84 from static-frame/81/first-true
`first_true_1d`
2 parents 69e9d3f + b010d4a commit 433160d

File tree

7 files changed

+1118
-2
lines changed

7 files changed

+1118
-2
lines changed

doc/articles/first_true.png

146 KB
Loading

doc/articles/first_true_1d.py

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
2+
3+
4+
import os
5+
import sys
6+
import timeit
7+
import typing as tp
8+
from itertools import repeat
9+
10+
from arraykit import first_true_1d
11+
import arraykit as ak
12+
13+
import matplotlib.pyplot as plt
14+
import numpy as np
15+
import pandas as pd
16+
17+
sys.path.append(os.getcwd())
18+
19+
20+
21+
class ArrayProcessor:
22+
NAME = ''
23+
SORT = -1
24+
25+
def __init__(self, array: np.ndarray):
26+
self.array = array
27+
28+
#-------------------------------------------------------------------------------
29+
class AKFirstTrue(ArrayProcessor):
30+
NAME = 'ak.first_true_1d()'
31+
SORT = 0
32+
33+
def __call__(self):
34+
_ = first_true_1d(self.array, forward=True)
35+
36+
class PYLoop(ArrayProcessor):
37+
NAME = 'Python Loop'
38+
SORT = 0
39+
40+
def __call__(self):
41+
for i, e in enumerate(self.array):
42+
if e == True:
43+
break
44+
45+
46+
class NPNonZero(ArrayProcessor):
47+
NAME = 'np.nonzero()'
48+
SORT = 3
49+
50+
def __call__(self):
51+
_ = np.nonzero(self.array)[0][0]
52+
53+
class NPArgMax(ArrayProcessor):
54+
NAME = 'np.argmax()'
55+
SORT = 1
56+
57+
def __call__(self):
58+
_ = np.argmax(self.array)
59+
60+
class NPNotAnyArgMax(ArrayProcessor):
61+
NAME = 'np.any(), np.argmax()'
62+
SORT = 2
63+
64+
def __call__(self):
65+
_ = not np.any(self.array)
66+
_ = np.argmax(self.array)
67+
68+
#-------------------------------------------------------------------------------
69+
NUMBER = 200
70+
71+
def seconds_to_display(seconds: float) -> str:
72+
seconds /= NUMBER
73+
if seconds < 1e-4:
74+
return f'{seconds * 1e6: .1f} (µs)'
75+
if seconds < 1e-1:
76+
return f'{seconds * 1e3: .1f} (ms)'
77+
return f'{seconds: .1f} (s)'
78+
79+
80+
def plot_performance(frame):
81+
fixture_total = len(frame['fixture'].unique())
82+
cat_total = len(frame['size'].unique())
83+
processor_total = len(frame['cls_processor'].unique())
84+
fig, axes = plt.subplots(cat_total, fixture_total)
85+
86+
# cmap = plt.get_cmap('terrain')
87+
cmap = plt.get_cmap('plasma')
88+
89+
color = cmap(np.arange(processor_total) / processor_total)
90+
91+
# category is the size of the array
92+
for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
93+
for fixture_count, (fixture_label, fixture) in enumerate(
94+
cat.groupby('fixture')):
95+
ax = axes[cat_count][fixture_count]
96+
97+
# set order
98+
fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
99+
fixture = fixture.sort_values('sort')
100+
101+
results = fixture['time'].values.tolist()
102+
names = [cls.NAME for cls in fixture['cls_processor']]
103+
# x = np.arange(len(results))
104+
names_display = names
105+
post = ax.bar(names_display, results, color=color)
106+
107+
density, position = fixture_label.split('-')
108+
# cat_label is the size of the array
109+
title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[density]}\n{FixtureFactory.POSITION_TO_DISPLAY[position]}'
110+
111+
ax.set_title(title, fontsize=6)
112+
ax.set_box_aspect(0.75) # makes taller tan wide
113+
time_max = fixture['time'].max()
114+
ax.set_yticks([0, time_max * 0.5, time_max])
115+
ax.set_yticklabels(['',
116+
seconds_to_display(time_max * .5),
117+
seconds_to_display(time_max),
118+
], fontsize=6)
119+
# ax.set_xticks(x, names_display, rotation='vertical')
120+
ax.tick_params(
121+
axis='x',
122+
which='both',
123+
bottom=False,
124+
top=False,
125+
labelbottom=False,
126+
)
127+
128+
fig.set_size_inches(9, 3.5) # width, height
129+
fig.legend(post, names_display, loc='center right', fontsize=8)
130+
# horizontal, vertical
131+
fig.text(.05, .96, f'first_true_1d() Performance: {NUMBER} Iterations', fontsize=10)
132+
fig.text(.05, .90, get_versions(), fontsize=6)
133+
134+
fp = '/tmp/first_true.png'
135+
plt.subplots_adjust(
136+
left=0.075,
137+
bottom=0.05,
138+
right=0.80,
139+
top=0.85,
140+
wspace=1, # width
141+
hspace=0.1,
142+
)
143+
# plt.rcParams.update({'font.size': 22})
144+
plt.savefig(fp, dpi=300)
145+
146+
if sys.platform.startswith('linux'):
147+
os.system(f'eog {fp}&')
148+
else:
149+
os.system(f'open {fp}')
150+
151+
152+
#-------------------------------------------------------------------------------
153+
154+
class FixtureFactory:
155+
NAME = ''
156+
157+
@staticmethod
158+
def get_array(size: int) -> np.ndarray:
159+
return np.full(size, False, dtype=bool)
160+
161+
def _get_array_filled(
162+
size: int,
163+
start_third: int, # 1 or 2
164+
density: float, # less than 1
165+
) -> np.ndarray:
166+
a = FixtureFactory.get_array(size)
167+
count = size * density
168+
start = int(len(a) * (start_third/3))
169+
length = len(a) - start
170+
step = int(length / count)
171+
fill = np.arange(start, len(a), step)
172+
a[fill] = True
173+
return a
174+
175+
@classmethod
176+
def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
177+
array = cls.get_array(size)
178+
return cls.NAME, array
179+
180+
DENSITY_TO_DISPLAY = {
181+
'single': '1 True',
182+
'tenth': '10% True',
183+
'third': '33% True',
184+
}
185+
186+
POSITION_TO_DISPLAY = {
187+
'first_third': 'Fill 1/3 to End',
188+
'second_third': 'Fill 2/3 to End',
189+
}
190+
191+
192+
class FFSingleFirstThird(FixtureFactory):
193+
NAME = 'single-first_third'
194+
195+
@staticmethod
196+
def get_array(size: int) -> np.ndarray:
197+
a = FixtureFactory.get_array(size)
198+
a[int(len(a) * (1/3))] = True
199+
return a
200+
201+
class FFSingleSecondThird(FixtureFactory):
202+
NAME = 'single-second_third'
203+
204+
@staticmethod
205+
def get_array(size: int) -> np.ndarray:
206+
a = FixtureFactory.get_array(size)
207+
a[int(len(a) * (2/3))] = True
208+
return a
209+
210+
211+
class FFTenthPostFirstThird(FixtureFactory):
212+
NAME = 'tenth-first_third'
213+
214+
@classmethod
215+
def get_array(cls, size: int) -> np.ndarray:
216+
return cls._get_array_filled(size, start_third=1, density=.1)
217+
218+
219+
class FFTenthPostSecondThird(FixtureFactory):
220+
NAME = 'tenth-second_third'
221+
222+
@classmethod
223+
def get_array(cls, size: int) -> np.ndarray:
224+
return cls._get_array_filled(size, start_third=2, density=.1)
225+
226+
227+
class FFThirdPostFirstThird(FixtureFactory):
228+
NAME = 'third-first_third'
229+
230+
@classmethod
231+
def get_array(cls, size: int) -> np.ndarray:
232+
return cls._get_array_filled(size, start_third=1, density=1/3)
233+
234+
235+
class FFThirdPostSecondThird(FixtureFactory):
236+
NAME = 'third-second_third'
237+
238+
@classmethod
239+
def get_array(cls, size: int) -> np.ndarray:
240+
return cls._get_array_filled(size, start_third=2, density=1/3)
241+
242+
243+
def get_versions() -> str:
244+
import platform
245+
return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
246+
247+
248+
CLS_PROCESSOR = (
249+
AKFirstTrue,
250+
NPNonZero,
251+
NPArgMax,
252+
NPNotAnyArgMax,
253+
# PYLoop,
254+
)
255+
256+
CLS_FF = (
257+
FFSingleFirstThird,
258+
FFSingleSecondThird,
259+
FFTenthPostFirstThird,
260+
FFTenthPostSecondThird,
261+
FFThirdPostFirstThird,
262+
FFThirdPostSecondThird,
263+
)
264+
265+
266+
def run_test():
267+
records = []
268+
for size in (100_000, 1_000_000, 10_000_000):
269+
for ff in CLS_FF:
270+
fixture_label, fixture = ff.get_label_array(size)
271+
for cls in CLS_PROCESSOR:
272+
runner = cls(fixture)
273+
274+
record = [cls, NUMBER, fixture_label, size]
275+
print(record)
276+
try:
277+
result = timeit.timeit(
278+
f'runner()',
279+
globals=locals(),
280+
number=NUMBER)
281+
except OSError:
282+
result = np.nan
283+
finally:
284+
pass
285+
record.append(result)
286+
records.append(record)
287+
288+
f = pd.DataFrame.from_records(records,
289+
columns=('cls_processor', 'number', 'fixture', 'size', 'time')
290+
)
291+
print(f)
292+
plot_performance(f)
293+
294+
if __name__ == '__main__':
295+
296+
run_test()
297+
298+
299+

0 commit comments

Comments
 (0)