Skip to content

Commit 111484b

Browse files
authored
Merge pull request #159 from static-frame/158/trimap-cast
TriMap improvements
2 parents 66e7316 + 6968560 commit 111484b

File tree

8 files changed

+1364
-313
lines changed

8 files changed

+1364
-313
lines changed

doc/articles/first_true.png

-146 KB
Binary file not shown.

doc/articles/nonzero_1d.py

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
2+
3+
4+
import os
5+
import sys
6+
import timeit
7+
import typing as tp
8+
9+
from arraykit import nonzero_1d
10+
import arraykit as ak
11+
12+
import matplotlib.pyplot as plt
13+
import numpy as np
14+
import pandas as pd
15+
16+
sys.path.append(os.getcwd())
17+
18+
19+
20+
class ArrayProcessor:
21+
NAME = ''
22+
SORT = -1
23+
24+
def __init__(self, array: np.ndarray):
25+
self.array = array
26+
27+
#-------------------------------------------------------------------------------
28+
class AKNonZero(ArrayProcessor):
29+
NAME = 'ak.nonzero_1d()'
30+
SORT = 0
31+
32+
def __call__(self):
33+
_ = nonzero_1d(self.array)
34+
35+
class NPNonZero(ArrayProcessor):
36+
NAME = 'np.nonzero()'
37+
SORT = 1
38+
39+
def __call__(self):
40+
_ = np.nonzero(self.array)[0]
41+
42+
class NPNonZeroInt64Convert(ArrayProcessor):
43+
NAME = 'np.nonzero().astype(np.int64)'
44+
SORT = 3
45+
46+
def __call__(self):
47+
_ = np.nonzero(self.array)[0].astype(np.int64)
48+
49+
50+
#-------------------------------------------------------------------------------
51+
NUMBER = 400
52+
53+
def seconds_to_display(seconds: float) -> str:
54+
seconds /= NUMBER
55+
if seconds < 1e-4:
56+
return f'{seconds * 1e6: .1f} (µs)'
57+
if seconds < 1e-1:
58+
return f'{seconds * 1e3: .1f} (ms)'
59+
return f'{seconds: .1f} (s)'
60+
61+
62+
def plot_performance(frame):
63+
fixture_total = len(frame['fixture'].unique())
64+
cat_total = len(frame['size'].unique())
65+
processor_total = len(frame['cls_processor'].unique())
66+
fig, axes = plt.subplots(cat_total, fixture_total)
67+
68+
# cmap = plt.get_cmap('terrain')
69+
cmap = plt.get_cmap('plasma')
70+
71+
color = cmap(np.arange(processor_total) / max(processor_total, 3))
72+
73+
# category is the size of the array
74+
for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
75+
# each fixture is a collection of tests for one display
76+
fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')}
77+
for fixture_count, (fixture_label, fixture) in enumerate(fixtures.items()):
78+
ax = axes[cat_count][fixture_count]
79+
80+
# set order
81+
fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
82+
fixture = fixture.sort_values('sort')
83+
84+
results = fixture['time'].values.tolist()
85+
names = [cls.NAME for cls in fixture['cls_processor']]
86+
# x = np.arange(len(results))
87+
names_display = names
88+
post = ax.bar(names_display, results, color=color)
89+
90+
# density, position = fixture_label.split('-')
91+
# cat_label is the size of the array
92+
title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}'
93+
94+
ax.set_title(title, fontsize=6)
95+
ax.set_box_aspect(0.75) # makes taller tan wide
96+
time_max = fixture['time'].max()
97+
ax.set_yticks([0, time_max * 0.5, time_max])
98+
ax.set_yticklabels(['',
99+
seconds_to_display(time_max * .5),
100+
seconds_to_display(time_max),
101+
], fontsize=6)
102+
# ax.set_xticks(x, names_display, rotation='vertical')
103+
ax.tick_params(
104+
axis='x',
105+
which='both',
106+
bottom=False,
107+
top=False,
108+
labelbottom=False,
109+
)
110+
111+
fig.set_size_inches(9, 4) # width, height
112+
fig.legend(post, names_display, loc='center right', fontsize=6)
113+
# horizontal, vertical
114+
fig.text(.05, .96, f'nonzero_1d() Performance: {NUMBER} Iterations', fontsize=10)
115+
fig.text(.05, .90, get_versions(), fontsize=6)
116+
117+
fp = '/tmp/nonzero.png'
118+
plt.subplots_adjust(
119+
left=0.075,
120+
bottom=0.05,
121+
right=0.80,
122+
top=0.85,
123+
wspace=0.9, # width
124+
hspace=0.2,
125+
)
126+
# plt.rcParams.update({'font.size': 22})
127+
plt.savefig(fp, dpi=300)
128+
129+
if sys.platform.startswith('linux'):
130+
os.system(f'eog {fp}&')
131+
else:
132+
os.system(f'open {fp}')
133+
134+
135+
#-------------------------------------------------------------------------------
136+
137+
class FixtureFactory:
138+
NAME = ''
139+
140+
@staticmethod
141+
def get_array(size: int) -> np.ndarray:
142+
return np.full(size, False, dtype=bool)
143+
144+
def _get_array_filled(
145+
size: int,
146+
start_third: int, #0, 1 or 2
147+
density: float, # less than 1
148+
) -> np.ndarray:
149+
a = FixtureFactory.get_array(size)
150+
count = size * density
151+
start = int(len(a) * (start_third/3))
152+
length = len(a) - start
153+
step = max(int(length / count), 1)
154+
fill = np.arange(start, len(a), step)
155+
a[fill] = True
156+
return a
157+
158+
@classmethod
159+
def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
160+
array = cls.get_array(size)
161+
return cls.NAME, array
162+
163+
DENSITY_TO_DISPLAY = {
164+
'single': '1 True',
165+
'quarter': '25% True',
166+
'half': '50% True',
167+
'full': '100% True',
168+
}
169+
170+
# POSITION_TO_DISPLAY = {
171+
# 'first_third': 'Fill 1/3 to End',
172+
# 'second_third': 'Fill 2/3 to End',
173+
# }
174+
175+
176+
class FFSingle(FixtureFactory):
177+
NAME = 'single'
178+
179+
@staticmethod
180+
def get_array(size: int) -> np.ndarray:
181+
a = FixtureFactory.get_array(size)
182+
a[len(a) // 2] = True
183+
return a
184+
185+
class FFQuarter(FixtureFactory):
186+
NAME = 'quarter'
187+
188+
@classmethod
189+
def get_array(cls, size: int) -> np.ndarray:
190+
return cls._get_array_filled(size, start_third=0, density=0.25)
191+
192+
class FFHalf(FixtureFactory):
193+
NAME = 'half'
194+
195+
@classmethod
196+
def get_array(cls, size: int) -> np.ndarray:
197+
return cls._get_array_filled(size, start_third=0, density=0.5)
198+
199+
200+
class FFFull(FixtureFactory):
201+
NAME = 'full'
202+
203+
@classmethod
204+
def get_array(cls, size: int) -> np.ndarray:
205+
return cls._get_array_filled(size, start_third=0, density=1)
206+
207+
208+
def get_versions() -> str:
209+
import platform
210+
return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
211+
212+
213+
CLS_PROCESSOR = (
214+
AKNonZero,
215+
NPNonZero,
216+
NPNonZeroInt64Convert,
217+
)
218+
219+
CLS_FF = (
220+
FFSingle,
221+
FFQuarter,
222+
FFHalf,
223+
FFFull,
224+
)
225+
226+
227+
def run_test():
228+
records = []
229+
for size in (100_000, 1_000_000, 10_000_000):
230+
for ff in CLS_FF:
231+
fixture_label, fixture = ff.get_label_array(size)
232+
for cls in CLS_PROCESSOR:
233+
runner = cls(fixture)
234+
235+
record = [cls, NUMBER, fixture_label, size]
236+
print(record)
237+
try:
238+
result = timeit.timeit(
239+
f'runner()',
240+
globals=locals(),
241+
number=NUMBER)
242+
except OSError:
243+
result = np.nan
244+
finally:
245+
pass
246+
record.append(result)
247+
records.append(record)
248+
249+
f = pd.DataFrame.from_records(records,
250+
columns=('cls_processor', 'number', 'fixture', 'size', 'time')
251+
)
252+
print(f)
253+
plot_performance(f)
254+
255+
if __name__ == '__main__':
256+
257+
run_test()
258+
259+
260+

src/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@
2828
from ._arraykit import first_true_1d as first_true_1d
2929
from ._arraykit import first_true_2d as first_true_2d
3030
from ._arraykit import slice_to_ascending_slice as slice_to_ascending_slice
31+
from ._arraykit import nonzero_1d as nonzero_1d

src/__init__.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class TriMap:
5050
def register_one(self, src_from: int, dst_from: int) -> None: ...
5151
def register_unmatched_dst(self) -> None: ...
5252
def register_many(self, src_from: int, dst_from: np.ndarray) -> None: ...
53+
def finalize(self) -> None: ...
5354
def is_many(self) -> bool: ...
5455
def src_no_fill(self) -> bool: ...
5556
def dst_no_fill(self) -> bool: ...
@@ -158,4 +159,5 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) ->
158159

159160
def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
160161
def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
162+
def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
161163
def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ...

0 commit comments

Comments
 (0)