Skip to content

Commit 6f1a3d4

Browse files
authored
Resource Allocation data in Redis (#3430)
* Update to DB qiita.slurm_resource_allocations * qiita-cron-job initialize-resource-allocations-redis * Populate redis with resource-allocation data * Removed changes to qiita_pet This pull request should only contain changes with uploading data to redis. I accidentally commited some changes to qiita_pet here. * Minor changes * Updates to Antonio’s coments * Update meta_util.py
1 parent e34df41 commit 6f1a3d4

File tree

6 files changed

+181
-19
lines changed

6 files changed

+181
-19
lines changed

qiita_db/meta_util.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,18 @@
3737
from re import sub
3838
from json import loads, dump, dumps
3939

40-
from qiita_db.util import create_nested_path
40+
from qiita_db.util import create_nested_path, retrieve_resource_data
41+
from qiita_db.util import resource_allocation_plot
4142
from qiita_core.qiita_settings import qiita_config, r_client
4243
from qiita_core.configuration_manager import ConfigurationManager
4344
import qiita_db as qdb
4445

46+
# global constant list used in resource_allocation_page
47+
COLUMNS = [
48+
"sName", "sVersion", "cID", "cName", "processing_job_id",
49+
"parameters", "samples", "columns", "input_size", "extra_info",
50+
"MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"]
51+
4552

4653
def _get_data_fpids(constructor, object_id):
4754
"""Small function for getting filepath IDS associated with data object
@@ -546,3 +553,101 @@ def generate_plugin_releases():
546553
# important to "flush" variables to avoid errors
547554
r_client.delete(redis_key)
548555
f(redis_key, v)
556+
557+
558+
def get_software_commands(active):
559+
software_list = [s for s in qdb.software.Software.iter(active=active)]
560+
software_commands = defaultdict(lambda: defaultdict(list))
561+
562+
for software in software_list:
563+
sname = software.name
564+
sversion = software.version
565+
commands = software.commands
566+
567+
for command in commands:
568+
software_commands[sname][sversion].append(command.name)
569+
570+
return dict(software_commands)
571+
572+
573+
def update_resource_allocation_redis(active=True):
574+
"""Updates redis with plots and information about current software.
575+
576+
Parameters
577+
----------
578+
active: boolean, optional
579+
Defaults to True. Should only be False when testing.
580+
581+
"""
582+
time = datetime.now().strftime('%m-%d-%y')
583+
scommands = get_software_commands(active)
584+
redis_key = 'resources:commands'
585+
r_client.set(redis_key, str(scommands))
586+
587+
for sname, versions in scommands.items():
588+
for version, commands in versions.items():
589+
for cname in commands:
590+
591+
col_name = "samples * columns"
592+
df = retrieve_resource_data(cname, sname, version, COLUMNS)
593+
if len(df) == 0:
594+
continue
595+
596+
fig, axs = resource_allocation_plot(df, cname, sname, col_name)
597+
titles = [0, 0]
598+
images = [0, 0]
599+
600+
# Splitting 1 image plot into 2 separate for better layout.
601+
for i, ax in enumerate(axs):
602+
titles[i] = ax.get_title()
603+
ax.set_title("")
604+
# new_fig, new_ax – copy with either only memory plot or
605+
# only time
606+
new_fig = plt.figure()
607+
new_ax = new_fig.add_subplot(111)
608+
609+
scatter_data = ax.collections[0]
610+
new_ax.scatter(scatter_data.get_offsets()[:, 0],
611+
scatter_data.get_offsets()[:, 1],
612+
s=scatter_data.get_sizes(), label="data")
613+
614+
line = ax.lines[0]
615+
new_ax.plot(line.get_xdata(), line.get_ydata(),
616+
linewidth=1, color='orange')
617+
618+
if len(ax.collections) > 1:
619+
failure_data = ax.collections[1]
620+
new_ax.scatter(failure_data.get_offsets()[:, 0],
621+
failure_data.get_offsets()[:, 1],
622+
color='red', s=3, label="failures")
623+
624+
new_ax.set_xscale('log')
625+
new_ax.set_yscale('log')
626+
new_ax.set_xlabel(ax.get_xlabel())
627+
new_ax.set_ylabel(ax.get_ylabel())
628+
new_ax.legend(loc='upper left')
629+
630+
new_fig.tight_layout()
631+
plot = BytesIO()
632+
new_fig.savefig(plot, format='png')
633+
plot.seek(0)
634+
img = 'data:image/png;base64,' + quote(
635+
b64encode(plot.getvalue()).decode('ascii'))
636+
images[i] = img
637+
plt.close(new_fig)
638+
plt.close(fig)
639+
640+
# SID, CID, col_name
641+
values = [
642+
("img_mem", images[0], r_client.set),
643+
("img_time", images[1], r_client.set),
644+
('time', time, r_client.set),
645+
("title_mem", titles[0], r_client.set),
646+
("title_time", titles[1], r_client.set)
647+
]
648+
649+
for k, v, f in values:
650+
redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (
651+
cname, sname, version, col_name, k)
652+
r_client.delete(redis_key)
653+
f(redis_key, v)

qiita_db/test/test_meta_util.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,27 @@ def test_generate_plugin_releases(self):
519519
'-', '').replace(':', '').replace(' ', '-')
520520
self.assertEqual(tgz_obs, [time])
521521

522+
def test_update_resource_allocation_redis(self):
523+
cname = "Split libraries FASTQ"
524+
sname = "QIIMEq2"
525+
col_name = "samples * columns"
526+
version = "1.9.1"
527+
qdb.meta_util.update_resource_allocation_redis(False)
528+
title_mem_str = 'resources$#%s$#%s$#%s$#%s:%s' % (
529+
cname, sname, version, col_name, 'title_mem')
530+
title_mem = str(r_client.get(title_mem_str))
531+
self.assertTrue(
532+
"model: "
533+
"k * log(x) + "
534+
"b * log(x)^2 + "
535+
"a * log(x)^3" in title_mem
536+
)
537+
538+
title_time_str = 'resources$#%s$#%s$#%s$#%s:%s' % (
539+
cname, sname, version, col_name, 'title_time')
540+
title_time = str(r_client.get(title_time_str))
541+
self.assertTrue("model: a + b + log(x) * k" in title_time)
542+
522543

523544
if __name__ == '__main__':
524545
main()

qiita_db/test/test_util.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,8 +1311,9 @@ def test_quick_mounts_purge(self):
13111311

13121312
class ResourceAllocationPlotTests(TestCase):
13131313
def setUp(self):
1314-
self.CNAME = "Split libraries FASTQ"
1315-
self.SNAME = "QIIMEq2"
1314+
self.cname = "Split libraries FASTQ"
1315+
self.sname = "QIIMEq2"
1316+
self.version = "1.9.1"
13161317
self.col_name = 'samples * columns'
13171318
self.columns = [
13181319
"sName", "sVersion", "cID", "cName", "processing_job_id",
@@ -1321,13 +1322,13 @@ def setUp(self):
13211322

13221323
# df is a dataframe that represents a table with columns specified in
13231324
# self.columns
1324-
self.df = qdb.util._retrieve_resource_data(
1325-
self.CNAME, self.SNAME, self.columns)
1325+
self.df = qdb.util.retrieve_resource_data(
1326+
self.cname, self.sname, self.version, self.columns)
13261327

13271328
def test_plot_return(self):
13281329
# check the plot returns correct objects
13291330
fig1, axs1 = qdb.util.resource_allocation_plot(
1330-
self.df, self.CNAME, self.SNAME, self.col_name)
1331+
self.df, self.cname, self.sname, self.col_name)
13311332
self.assertIsInstance(
13321333
fig1, Figure,
13331334
"Returned object fig1 is not a Matplotlib Figure")
@@ -1338,13 +1339,13 @@ def test_plot_return(self):
13381339

13391340
def test_minimize_const(self):
13401341
self.df = self.df[
1341-
(self.df.cName == self.CNAME) & (self.df.sName == self.SNAME)]
1342+
(self.df.cName == self.cname) & (self.df.sName == self.sname)]
13421343
self.df.dropna(subset=['samples', 'columns'], inplace=True)
13431344
self.df[self.col_name] = self.df.samples * self.df['columns']
13441345
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
13451346

13461347
bm, options = qdb.util._resource_allocation_plot_helper(
1347-
self.df, axs[0], self.CNAME, self.SNAME, 'MaxRSSRaw',
1348+
self.df, axs[0], self.cname, self.sname, 'MaxRSSRaw',
13481349
qdb.util.MODELS_MEM, self.col_name)
13491350
# check that the algorithm chooses correct model for MaxRSSRaw and
13501351
# has 0 failures
@@ -1366,7 +1367,7 @@ def test_minimize_const(self):
13661367
# check that the algorithm chooses correct model for ElapsedRaw and
13671368
# has 1 failure
13681369
bm, options = qdb.util._resource_allocation_plot_helper(
1369-
self.df, axs[1], self.CNAME, self.SNAME, 'ElapsedRaw',
1370+
self.df, axs[1], self.cname, self.sname, 'ElapsedRaw',
13701371
qdb.util.MODELS_TIME, self.col_name)
13711372
k, a, b = options.x
13721373
failures_df = qdb.util._resource_allocation_failures(
@@ -1422,8 +1423,8 @@ def test_db_update(self):
14221423
qdb.util.update_resource_allocation_table(test=test_data)
14231424

14241425
for curr_cname, ids in types.items():
1425-
updated_df = qdb.util._retrieve_resource_data(
1426-
curr_cname, self.SNAME, self.columns)
1426+
updated_df = qdb.util.retrieve_resource_data(
1427+
curr_cname, self.sname, self.version, self.columns)
14271428
updated_ids_set = set(updated_df['processing_job_id'])
14281429
previous_ids_set = set(self.df['processing_job_id'])
14291430
for id in ids:

qiita_db/util.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,27 @@
9999
MODELS_TIME = [time_model1, time_model2, time_model3, time_model4]
100100

101101

102+
def get_model_name(model):
103+
if model == mem_model1:
104+
return "k * log(x) + x * a + b"
105+
elif model == mem_model2:
106+
return "k * log(x) + b * log(x)^2 + a"
107+
elif model == mem_model3:
108+
return "k * log(x) + b * log(x)^2 + a * log(x)^3"
109+
elif model == mem_model4:
110+
return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
111+
elif model == time_model1:
112+
return "a + b + log(x) * k"
113+
elif model == time_model2:
114+
return "a + b * x + log(x) * k"
115+
elif model == time_model3:
116+
return "a + b * log(x)^2 + log(x) * k"
117+
elif model == time_model4:
118+
return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
119+
else:
120+
return "Unknown model"
121+
122+
102123
def scrub_data(s):
103124
r"""Scrubs data fields of characters not allowed by PostgreSQL
104125
@@ -2381,7 +2402,7 @@ def resource_allocation_plot(df, cname, sname, col_name):
23812402
return fig, axs
23822403

23832404

2384-
def _retrieve_resource_data(cname, sname, columns):
2405+
def retrieve_resource_data(cname, sname, version, columns):
23852406
with qdb.sql_connection.TRN:
23862407
sql = """
23872408
SELECT
@@ -2411,9 +2432,10 @@ def _retrieve_resource_data(cname, sname, columns):
24112432
ON pr.processing_job_id = sra.processing_job_id
24122433
WHERE
24132434
sc.name = %s
2414-
AND s.name = %s;
2435+
AND s.name = %s
2436+
AND s.version = %s
24152437
"""
2416-
qdb.sql_connection.TRN.add(sql, sql_args=[cname, sname])
2438+
qdb.sql_connection.TRN.add(sql, sql_args=[cname, sname, version])
24172439
res = qdb.sql_connection.TRN.execute_fetchindex()
24182440
df = pd.DataFrame(res, columns=columns)
24192441
return df
@@ -2482,15 +2504,18 @@ def _resource_allocation_plot_helper(
24822504
y_plot = best_model(x_plot, k, a, b)
24832505
ax.plot(x_plot, y_plot, linewidth=1, color='orange')
24842506

2507+
cmin_value = min(y_plot)
2508+
cmax_value = max(y_plot)
2509+
24852510
maxi = naturalsize(df[curr].max(), gnu=True) if curr == "MaxRSSRaw" else \
24862511
timedelta(seconds=float(df[curr].max()))
2487-
cmax = naturalsize(max(y_plot), gnu=True) if curr == "MaxRSSRaw" else \
2488-
timedelta(seconds=float(max(y_plot)))
2512+
cmax = naturalsize(cmax_value, gnu=True) if curr == "MaxRSSRaw" else \
2513+
str(timedelta(seconds=round(cmax_value, 2))).rstrip('0').rstrip('.')
24892514

24902515
mini = naturalsize(df[curr].min(), gnu=True) if curr == "MaxRSSRaw" else \
24912516
timedelta(seconds=float(df[curr].min()))
2492-
cmin = naturalsize(min(y_plot), gnu=True) if curr == "MaxRSSRaw" else \
2493-
timedelta(seconds=float(min(y_plot)))
2517+
cmin = naturalsize(cmin_value, gnu=True) if curr == "MaxRSSRaw" else \
2518+
str(timedelta(seconds=round(cmin_value, 2))).rstrip('0').rstrip('.')
24942519

24952520
x_plot = np.array(df[col_name])
24962521
failures_df = _resource_allocation_failures(
@@ -2500,7 +2525,10 @@ def _resource_allocation_plot_helper(
25002525
ax.scatter(failures_df[col_name], failures_df[curr], color='red', s=3,
25012526
label="failures")
25022527

2503-
ax.set_title(f'{cname}: {sname}\n real: {mini} || {maxi}\n'
2528+
ax.set_title(
2529+
f'k||a||b: {k}||{a}||{b}\n'
2530+
f'model: {get_model_name(best_model)}\n'
2531+
f'real: {mini} || {maxi}\n'
25042532
f'calculated: {cmin} || {cmax}\n'
25052533
f'failures: {failures}')
25062534
ax.legend(loc='upper left')

scripts/all-qiita-cron-job

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ qiita-cron-job empty-trash-upload-folder
44
qiita-cron-job generate-biom-and-metadata-release
55
qiita-cron-job purge-filepaths
66
qiita-cron-job update-redis-stats
7+
qiita-cron-job update-resource-allocation-redis
78
qiita-cron-job generate-plugin-releases
89
qiita-cron-job purge-json-web-tokens

scripts/qiita-cron-job

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ from qiita_db.util import (
1616
quick_mounts_purge as qiita_quick_mounts_purge)
1717
from qiita_db.meta_util import (
1818
update_redis_stats as qiita_update_redis_stats,
19+
update_resource_allocation_redis as qiita_update_resource_allocation_redis,
1920
generate_biom_and_metadata_release as
2021
qiita_generate_biom_and_metadata_release,
2122
generate_plugin_releases as qiita_generate_plugin_releases)
@@ -48,6 +49,11 @@ def update_redis_stats():
4849
qiita_update_redis_stats()
4950

5051

52+
@commands.command()
53+
def update_resource_allocation_redis():
54+
qiita_update_resource_allocation_redis()
55+
56+
5157
@commands.command()
5258
def generate_biom_and_metadata_release():
5359
qiita_generate_biom_and_metadata_release('public')

0 commit comments

Comments
 (0)