Skip to content

Commit 1c30fc8

Browse files
committed
Using qiita.allocation_equations table in util.py
1 parent dd12143 commit 1c30fc8

File tree

3 files changed

+99
-55
lines changed

3 files changed

+99
-55
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
INSERT INTO qiita.allocation_equations(equation_name, expression)
2+
VALUES ('mem_model1', 'k * np.log(x) + x * a + b'),
3+
('mem_model2', 'k * np.log(x) + b * np.log(x)**2 + a'),
4+
('mem_model3', 'k * np.log(x) + b * np.log(x)**2 + a * np.log(x)**3'),
5+
('mem_model4', 'k * np.log(x) + b * np.log(x)**2 + a * np.log(x)**2.5'),
6+
('time_model1', 'a + b + np.log(x) * k'),
7+
('time_model2', 'a + b * x + np.log(x) * k'),
8+
('time_model3', 'a + b * np.log(x)**2 + np.log(x) * k'),
9+
('time_model4', 'a * np.log(x)**3 + b * np.log(x)**2 + np.log(x) * k');

qiita_db/test/test_util.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -1343,42 +1343,45 @@ def test_minimize_const(self):
13431343
self.df[self.col_name] = self.df.samples * self.df['columns']
13441344
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
13451345

1346-
bm, options = qdb.util._resource_allocation_plot_helper(
1347-
self.df, axs[0], 'MaxRSSRaw', qdb.util.MODELS_MEM, self.col_name)
1346+
mem_models, time_models = qdb.util._retrieve_equations()
1347+
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
1348+
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
13481349
# check that the algorithm chooses correct model for MaxRSSRaw and
13491350
# has 0 failures
13501351
k, a, b = options.x
13511352
failures_df = qdb.util._resource_allocation_success_failures(
13521353
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1]
13531354
failures = failures_df.shape[0]
1354-
self.assertEqual(bm, qdb.util.mem_model3,
1355+
1356+
self.assertEqual(bm_name, 'mem_model3',
1357+
msg=f"""Best memory model
1358+
doesn't match
1359+
{bm_name} != 'mem_model3'""")
1360+
self.assertEqual(bm, mem_models['mem_model3'],
13551361
msg=f"""Best memory model
13561362
doesn't match
13571363
Coefficients:{k} {a} {b}
1358-
{qdb.util.mem_model1}, "qdb.util.mem_model1"
1359-
{qdb.util.mem_model2}, "qdb.util.mem_model2"
1360-
{qdb.util.mem_model3}, "qdb.util.mem_model3"
1361-
{qdb.util.mem_model4}, "qdb.util.mem_model4"
13621364
""")
13631365
self.assertEqual(failures, 0, "Number of failures must be 0")
13641366

13651367
# check that the algorithm chooses correct model for ElapsedRaw and
13661368
# has 1 failure
1367-
bm, options = qdb.util._resource_allocation_plot_helper(
1368-
self.df, axs[1], 'ElapsedRaw', qdb.util.MODELS_TIME, self.col_name)
1369+
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
1370+
self.df, axs[1], 'ElapsedRaw', time_models, self.col_name)
13691371
k, a, b = options.x
13701372
failures_df = qdb.util._resource_allocation_success_failures(
13711373
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1]
13721374
failures = failures_df.shape[0]
13731375

1376+
self.assertEqual(bm_name, 'time_model1',
1377+
msg=f"""Best time model
1378+
doesn't match
1379+
{bm_name} != 'time_model1'""")
1380+
13741381
self.assertEqual(bm, qdb.util.time_model1,
13751382
msg=f"""Best time model
13761383
doesn't match
13771384
Coefficients:{k} {a} {b}
1378-
{qdb.util.time_model1}, "qdb.util.time_model1"
1379-
{qdb.util.time_model2}, "qdb.util.time_model2"
1380-
{qdb.util.time_model3}, "qdb.util.time_model3"
1381-
{qdb.util.time_model4}, "qdb.util.time_model4"
13821385
""")
13831386
self.assertEqual(failures, 1, "Number of failures must be 1")
13841387

qiita_db/util.py

+74-42
Original file line numberDiff line numberDiff line change
@@ -81,41 +81,23 @@
8181
from json import loads
8282
from scipy.optimize import minimize
8383

84-
# memory constant functions defined for @resource_allocation_plot
85-
mem_model1 = (lambda x, k, a, b: k * np.log(x) + x * a + b)
86-
mem_model2 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 + a)
87-
mem_model3 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 +
88-
a * np.log(x)**3)
89-
mem_model4 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 +
90-
a * np.log(x)**2.5)
91-
MODELS_MEM = [mem_model1, mem_model2, mem_model3, mem_model4]
92-
93-
# time constant functions defined for @resource_allocation_plot
94-
time_model1 = (lambda x, k, a, b: a + b + np.log(x) * k)
95-
time_model2 = (lambda x, k, a, b: a + b * x + np.log(x) * k)
96-
time_model3 = (lambda x, k, a, b: a + b * np.log(x)**2 + np.log(x) * k)
97-
time_model4 = (lambda x, k, a, b: a * np.log(x)**3 + b * np.log(x)**2 +
98-
np.log(x) * k)
99-
100-
MODELS_TIME = [time_model1, time_model2, time_model3, time_model4]
101-
10284

10385
def get_model_name(model):
104-
if model == mem_model1:
86+
if model == 'mem_model1':
10587
return "k * log(x) + x * a + b"
106-
elif model == mem_model2:
88+
elif model == 'mem_model2':
10789
return "k * log(x) + b * log(x)^2 + a"
108-
elif model == mem_model3:
90+
elif model == 'mem_model3':
10991
return "k * log(x) + b * log(x)^2 + a * log(x)^3"
110-
elif model == mem_model4:
92+
elif model == 'mem_model4':
11193
return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
112-
elif model == time_model1:
94+
elif model == 'time_model1':
11395
return "a + b + log(x) * k"
114-
elif model == time_model2:
96+
elif model == 'time_model2':
11597
return "a + b * x + log(x) * k"
116-
elif model == time_model3:
98+
elif model == 'time_model3':
11799
return "a + b * log(x)^2 + log(x) * k"
118-
elif model == time_model4:
100+
elif model == 'time_model4':
119101
return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
120102
else:
121103
return "Unknown model"
@@ -2387,19 +2369,63 @@ def resource_allocation_plot(df, col_name):
23872369
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
23882370

23892371
ax = axs[0]
2372+
mem_models, time_models = _retrieve_equations()
2373+
23902374
# models for memory
23912375
_resource_allocation_plot_helper(
2392-
df, ax, "MaxRSSRaw", MODELS_MEM, col_name)
2393-
2376+
df, ax, "MaxRSSRaw", mem_models, col_name)
23942377
ax = axs[1]
23952378
# models for time
23962379
_resource_allocation_plot_helper(
2397-
df, ax, "ElapsedRaw", MODELS_TIME, col_name)
2380+
df, ax, "ElapsedRaw", time_models, col_name)
23982381

23992382
return fig, axs
24002383

24012384

2385+
def _retrieve_equations():
2386+
'''
2387+
Helepr function for resource_allocation_plot.
2388+
Retrieves equations from db. Creates dictionary for memory and time models.
2389+
2390+
Returns
2391+
-------
2392+
tuple
2393+
dict
2394+
memory models - potential memory models for resource allocations
2395+
dict
2396+
time models - potential time models for resource allocations
2397+
'''
2398+
memory_models = {}
2399+
time_models = {}
2400+
with qdb.sql_connection.TRN:
2401+
sql = ''' SELECT * FROM qiita.allocation_equations; '''
2402+
qdb.sql_connection.TRN.add(sql)
2403+
res = qdb.sql_connection.TRN.execute_fetchindex()
2404+
for models in res:
2405+
if 'mem' in models[1]:
2406+
memory_models[models[1]] = lambda x, k, a, b: eval(models[2])
2407+
else:
2408+
time_models[models[2]] = lambda x, k, a, b: eval(models[2])
2409+
return (memory_models, time_models)
2410+
2411+
24022412
def retrieve_resource_data(cname, sname, version, columns):
2413+
'''
2414+
Retrieves resource data from db and constructs a DataFrame with relevant
2415+
fields.
2416+
2417+
Parameters
2418+
----------
2419+
cname - command name for which we retrieve the resources
2420+
sname - software name for which we retrieve the resources
2421+
version - version of sftware for whhich we retrieve the resources
2422+
columns - column names for the DataFrame returned by this function
2423+
2424+
Returns
2425+
-------
2426+
pd.DataFrame
2427+
DataFrame with resources.
2428+
'''
24032429
with qdb.sql_connection.TRN:
24042430
sql = """
24052431
SELECT
@@ -2457,8 +2483,8 @@ def _resource_allocation_plot_helper(
24572483
Specifies x axis for the graph
24582484
curr: str, required
24592485
Either MaxRSSRaw or ElapsedRaw (y axis)
2460-
models: list, required
2461-
List of functions that will be used for visualization
2486+
models: dictionary, required
2487+
Dictionary of functions that will be used for visualization
24622488
24632489
"""
24642490

@@ -2494,7 +2520,7 @@ def _resource_allocation_plot_helper(
24942520
ax.set_xlabel(col_name)
24952521

24962522
# 50 - number of maximum iterations, 3 - number of failures we tolerate
2497-
best_model, options = _resource_allocation_calculate(
2523+
best_model_name, best_model, options = _resource_allocation_calculate(
24982524
df, x_data, y_data, models, curr, col_name, 50, 3)
24992525
k, a, b = options.x
25002526
x_plot = np.array(sorted(df[col_name].unique()))
@@ -2522,22 +2548,24 @@ def _resource_allocation_plot_helper(
25222548
label="failures")
25232549
success_df['node_name'] = success_df['node_name'].fillna('unknown')
25242550
slurm_hosts = set(success_df['node_name'].tolist())
2525-
cmap = colormaps.get_cmap('Accent').resampled(len(slurm_hosts))
2526-
colors = [cmap(
2527-
i / (len(slurm_hosts) - 1)) for i in range(len(slurm_hosts))]
2551+
cmap = colormaps.get_cmap('Accent')
2552+
if len(slurm_hosts) > len(cmap.colors):
2553+
raise ValueError(f"""'Accent' colormap only has {len(cmap.colors)}
2554+
colors, but {len(slurm_hosts)} hosts are provided.""")
2555+
colors = cmap.colors[:len(slurm_hosts)]
25282556

25292557
for i, host in enumerate(slurm_hosts):
25302558
host_df = success_df[success_df['node_name'] == host]
25312559
ax.scatter(host_df[col_name], host_df[curr], color=colors[i], s=3,
25322560
label=host)
25332561
ax.set_title(
25342562
f'k||a||b: {k}||{a}||{b}\n'
2535-
f'model: {get_model_name(best_model)}\n'
2563+
f'model: {get_model_name(best_model_name)}\n'
25362564
f'real: {mini} || {maxi}\n'
25372565
f'calculated: {cmin} || {cmax}\n'
25382566
f'failures: {failures}')
25392567
ax.legend(loc='upper left')
2540-
return best_model, options
2568+
return best_model_name, best_model, options
25412569

25422570

25432571
def _resource_allocation_calculate(
@@ -2555,27 +2583,30 @@ def _resource_allocation_calculate(
25552583
current type (e.g. MaxRSSRaw)
25562584
col_name: str, required
25572585
Specifies x axis for the graph
2558-
models: list, required
2559-
List of functions that will be used for visualization
2586+
models: dictionary, required
2587+
Dictionary of functions that will be used for visualization
25602588
depth: int, required
25612589
Maximum number of iterations in binary search
25622590
tolerance: int, required,
25632591
Tolerance to number of failures possible to be considered as a model
25642592
25652593
Returns
25662594
----------
2595+
best_model_name: string
2596+
the name of the best model from the table
25672597
best_model: function
2568-
best fitting function for the current list models
2598+
best fitting function for the current dictionary models
25692599
best_result: object
25702600
object containing constants for the best model (e.g. k, a, b in kx+b*a)
25712601
"""
25722602

25732603
init = [1, 1, 1]
2604+
best_model_name = None
25742605
best_model = None
25752606
best_result = None
25762607
best_failures = np.inf
25772608
best_max = np.inf
2578-
for model in models:
2609+
for model_name, model in models.items():
25792610
# start values for binary search, where sl is left, sr is right
25802611
# penalty weight must be positive & non-zero, hence, sl >= 1.
25812612
# the upper bound for error can be an arbitrary large number
@@ -2646,9 +2677,10 @@ def _resource_allocation_calculate(
26462677
if min_max <= best_max:
26472678
best_failures = prev_failures
26482679
best_max = min_max
2680+
best_model_name = model_name
26492681
best_model = model
26502682
best_result = res
2651-
return best_model, best_result
2683+
return best_model_name, best_model, best_result
26522684

26532685

26542686
def _resource_allocation_custom_loss(params, x, y, model, p):

0 commit comments

Comments
 (0)