Skip to content

Commit 37ea968

Browse files
authored
Merge pull request #160 from gettodaze/22/add_array2d_to_1d
22/add array2d to 1d
2 parents 1264dce + 74b478d commit 37ea968

File tree

6 files changed

+764
-1
lines changed

6 files changed

+764
-1
lines changed

doc/articles/array2d_to_1d.py

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
import os
2+
import sys
3+
import timeit
4+
import typing as tp
5+
6+
from arraykit import array2d_to_array1d
7+
import arraykit as ak
8+
9+
import matplotlib.pyplot as plt
10+
import numpy as np
11+
import pandas as pd
12+
13+
sys.path.append(os.getcwd())
14+
15+
class ArrayProcessor:
16+
NAME = ''
17+
SORT = -1
18+
19+
def __init__(self, array: np.ndarray):
20+
self.array = array
21+
22+
#-------------------------------------------------------------------------------
23+
class AKArray2D1D(ArrayProcessor):
24+
NAME = 'ak.array2d_to_array1d()'
25+
SORT = 0
26+
27+
def __call__(self):
28+
_ = array2d_to_array1d(self.array)
29+
30+
class PyArray2D1D(ArrayProcessor):
31+
NAME = 'Python construction'
32+
SORT = 1
33+
34+
def __call__(self):
35+
post = np.empty(self.array.shape[0], dtype=object)
36+
for i, row in enumerate(self.array):
37+
post[i] = tuple(row)
38+
post.flags.writeable = False
39+
40+
#-------------------------------------------------------------------------------
41+
NUMBER = 200
42+
43+
def seconds_to_display(seconds: float) -> str:
44+
seconds /= NUMBER
45+
if seconds < 1e-4:
46+
return f'{seconds * 1e6: .1f} (µs)'
47+
if seconds < 1e-1:
48+
return f'{seconds * 1e3: .1f} (ms)'
49+
return f'{seconds: .1f} (s)'
50+
51+
52+
def plot_performance(frame):
53+
fixture_total = len(frame['fixture'].unique())
54+
cat_total = len(frame['size'].unique())
55+
processor_total = len(frame['cls_processor'].unique())
56+
fig, axes = plt.subplots(cat_total, fixture_total)
57+
58+
# cmap = plt.get_cmap('terrain')
59+
cmap = plt.get_cmap('plasma')
60+
61+
color = cmap(np.arange(processor_total) / max(processor_total, 3))
62+
63+
# category is the size of the array
64+
for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
65+
# each fixture is a collection of tests for one display
66+
fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')}
67+
for fixture_count, (fixture_label, fixture) in enumerate(
68+
(k, fixtures[k]) for k in FixtureFactory.DENSITY_TO_DISPLAY):
69+
ax = axes[cat_count][fixture_count]
70+
71+
# set order
72+
fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
73+
fixture = fixture.sort_values('sort')
74+
75+
results = fixture['time'].values.tolist()
76+
names = [cls.NAME for cls in fixture['cls_processor']]
77+
# x = np.arange(len(results))
78+
names_display = names
79+
post = ax.bar(names_display, results, color=color)
80+
81+
# density, position = fixture_label.split('-')
82+
# cat_label is the size of the array
83+
title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}'
84+
85+
ax.set_title(title, fontsize=6)
86+
ax.set_box_aspect(0.75) # makes taller than wide
87+
time_max = fixture['time'].max()
88+
ax.set_yticks([0, time_max * 0.5, time_max])
89+
ax.set_yticklabels(['',
90+
seconds_to_display(time_max * .5),
91+
seconds_to_display(time_max),
92+
], fontsize=4)
93+
# ax.set_xticks(x, names_display, rotation='vertical')
94+
ax.tick_params(
95+
axis='x',
96+
which='both',
97+
bottom=False,
98+
top=False,
99+
labelbottom=False,
100+
)
101+
102+
fig.set_size_inches(8, 4) # width, height
103+
fig.legend(post, names_display, loc='center right', fontsize=6)
104+
# horizontal, vertical
105+
fig.text(.05, .96, f'array2d_to_array1d() Performance: {NUMBER} Iterations', fontsize=10)
106+
fig.text(.05, .90, get_versions(), fontsize=6)
107+
108+
fp = '/tmp/array2d_to_array1d.png'
109+
plt.subplots_adjust(
110+
left=0.05,
111+
bottom=0.05,
112+
right=0.8,
113+
top=0.85,
114+
wspace=0.9, # width
115+
hspace=0.5,
116+
)
117+
# plt.rcParams.update({'font.size': 22})
118+
plt.savefig(fp, dpi=300)
119+
120+
if sys.platform.startswith('linux'):
121+
os.system(f'eog {fp}&')
122+
else:
123+
os.system(f'open {fp}')
124+
125+
126+
#-------------------------------------------------------------------------------
127+
128+
class FixtureFactory:
129+
NAME = ''
130+
131+
@staticmethod
132+
def get_array(size: int, width_ratio: int) -> np.ndarray:
133+
return np.arange(size).reshape(size // width_ratio, width_ratio)
134+
135+
@classmethod
136+
def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
137+
array = cls.get_array(size)
138+
return cls.NAME, array
139+
140+
DENSITY_TO_DISPLAY = {
141+
'column-2': '2 Column',
142+
'column-5': '5 Column',
143+
'column-10': '10 Column',
144+
'column-20': '20 Column',
145+
}
146+
147+
# POSITION_TO_DISPLAY = {
148+
# 'first_third': 'Fill 1/3 to End',
149+
# 'second_third': 'Fill 2/3 to End',
150+
# }
151+
152+
153+
class FFC2(FixtureFactory):
154+
NAME = 'column-2'
155+
156+
@staticmethod
157+
def get_array(size: int) -> np.ndarray:
158+
a = FixtureFactory.get_array(size, 2)
159+
return a
160+
161+
class FFC5(FixtureFactory):
162+
NAME = 'column-5'
163+
164+
@staticmethod
165+
def get_array(size: int) -> np.ndarray:
166+
a = FixtureFactory.get_array(size, 5)
167+
return a
168+
169+
class FFC10(FixtureFactory):
170+
NAME = 'column-10'
171+
172+
@staticmethod
173+
def get_array(size: int) -> np.ndarray:
174+
a = FixtureFactory.get_array(size, 10)
175+
return a
176+
177+
class FFC20(FixtureFactory):
178+
NAME = 'column-20'
179+
180+
@staticmethod
181+
def get_array(size: int) -> np.ndarray:
182+
a = FixtureFactory.get_array(size, 20)
183+
return a
184+
185+
def get_versions() -> str:
186+
import platform
187+
return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
188+
189+
190+
CLS_PROCESSOR = (
191+
AKArray2D1D,
192+
PyArray2D1D,
193+
)
194+
195+
CLS_FF = (
196+
FFC2,
197+
FFC5,
198+
FFC10,
199+
FFC20,
200+
)
201+
202+
203+
def run_test():
204+
records = []
205+
for size in (1_000, 10_000, 100_000, 1_000_000):
206+
for ff in CLS_FF:
207+
fixture_label, fixture = ff.get_label_array(size)
208+
for cls in CLS_PROCESSOR:
209+
runner = cls(fixture)
210+
211+
record = [cls, NUMBER, fixture_label, size]
212+
print(record)
213+
try:
214+
result = timeit.timeit(
215+
f'runner()',
216+
globals=locals(),
217+
number=NUMBER)
218+
except OSError:
219+
result = np.nan
220+
finally:
221+
pass
222+
record.append(result)
223+
records.append(record)
224+
225+
f = pd.DataFrame.from_records(records,
226+
columns=('cls_processor', 'number', 'fixture', 'size', 'time')
227+
)
228+
print(f)
229+
plot_performance(f)
230+
231+
if __name__ == '__main__':
232+
233+
run_test()
234+
235+
236+

0 commit comments

Comments
 (0)