Skip to content

Commit 55e460e

Browse files
authored
Test util db (#3406)
* Update to DB qiita.slurm_resource_allocations * connected tests to database * Update util.py * debugging changes to test * Update test_util.py * Update test_util.py * Tests update * Update test_meta_util.py * Updates to @antgonza comments * Updates to @charles-cowart comments
1 parent c0cdb4b commit 55e460e

File tree

6 files changed

+988
-28
lines changed

6 files changed

+988
-28
lines changed

qiita_db/support_files/patches/test_db_sql/92.sql

+927-3
Large diffs are not rendered by default.

qiita_db/test/test_meta_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def _get_daily_stats():
286286
('num_studies_ebi', b'1', r_client.get),
287287
('num_samples_ebi', b'27', r_client.get),
288288
('number_samples_ebi_prep', b'54', r_client.get),
289-
('num_processing_jobs', b'14', r_client.get)
289+
('num_processing_jobs', b'474', r_client.get)
290290
# not testing img/time for simplicity
291291
# ('img', r_client.get),
292292
# ('time', r_client.get)

qiita_db/test/test_software.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,20 @@ def test_processing_jobs(self):
494494
'6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f',
495495
'063e553b-327c-4818-ab4a-adfe58e49860',
496496
'ac653cb5-76a6-4a45-929e-eb9b2dee6b63']
497-
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
498-
self.assertCountEqual(qdb.software.Command(1).processing_jobs, exp)
497+
498+
jobs = qdb.software.Command(1).processing_jobs
499+
set_jobs = set(jobs)
500+
501+
# comparing the length of jobs and set_jobs, since there could've been
502+
# duplicates in the tests
503+
self.assertEqual(len(jobs), len(set_jobs))
504+
505+
exp = set([qdb.processing_job.ProcessingJob(j) for j in exp_jids])
506+
self.assertEqual(len(set_jobs & exp), len(exp_jids))
499507

500508
exp_jids = ['bcc7ebcd-39c1-43e4-af2d-822e3589f14d']
501509
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
502510
self.assertCountEqual(qdb.software.Command(2).processing_jobs, exp)
503-
504511
self.assertCountEqual(qdb.software.Command(4).processing_jobs, [])
505512

506513

qiita_db/test/test_user.py

-7
Original file line numberDiff line numberDiff line change
@@ -491,17 +491,10 @@ def test_jobs(self):
491491
limit=1, ignore_status=ignore_status), [
492492
PJ('b72369f9-a886-4193-8d3d-f7b504168e75')])
493493

494-
# no jobs
495-
self.assertEqual(qdb.user.User('[email protected]').jobs(
496-
ignore_status=ignore_status), [])
497-
498494
# generates expected jobs
499495
jobs = qdb.user.User('[email protected]').jobs()
500496
self.assertEqual(jobs, [])
501497

502-
# no jobs
503-
self.assertEqual(qdb.user.User('[email protected]').jobs(), [])
504-
505498
def test_update_email(self):
506499
user = qdb.user.User('[email protected]')
507500
with self.assertRaisesRegex(IncorrectEmailError, 'Bad email given:'):

qiita_db/test/test_util.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -1309,19 +1309,23 @@ def test_quick_mounts_purge(self):
13091309

13101310
class ResourceAllocationPlotTests(TestCase):
13111311
def setUp(self):
1312-
1313-
self.PATH_TO_DATA = ('./qiita_db/test/test_data/'
1314-
'jobs_2024-02-21.tsv.gz')
1315-
self.CNAME = "Validate"
1316-
self.SNAME = "Diversity types - alpha_vector"
1312+
self.CNAME = "Split libraries FASTQ"
1313+
self.SNAME = "QIIMEq2"
13171314
self.col_name = 'samples * columns'
1318-
self.df = pd.read_csv(self.PATH_TO_DATA, sep='\t',
1319-
dtype={'extra_info': str})
1315+
self.columns = [
1316+
"sName", "sVersion", "cID", "cName", "processing_job_id",
1317+
"parameters", "samples", "columns", "input_size", "extra_info",
1318+
"MaxRSSRaw", "ElapsedRaw"]
1319+
1320+
# df is a dataframe that represents a table with columns specified in
1321+
# self.columns
1322+
self.df = qdb.util._retrieve_resource_data(
1323+
self.CNAME, self.SNAME, self.columns)
13201324

13211325
def test_plot_return(self):
13221326
# check the plot returns correct objects
13231327
fig1, axs1 = qdb.util.resource_allocation_plot(
1324-
self.PATH_TO_DATA, self.CNAME, self.SNAME, self.col_name)
1328+
self.df, self.CNAME, self.SNAME, self.col_name)
13251329
self.assertIsInstance(
13261330
fig1, Figure,
13271331
"Returned object fig1 is not a Matplotlib Figure")
@@ -1346,7 +1350,7 @@ def test_minimize_const(self):
13461350
failures_df = qdb.util._resource_allocation_failures(
13471351
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
13481352
failures = failures_df.shape[0]
1349-
self.assertEqual(bm, qdb.util.mem_model4, msg="""Best memory model
1353+
self.assertEqual(bm, qdb.util.mem_model3, msg="""Best memory model
13501354
doesn't match""")
13511355
self.assertEqual(failures, 0, "Number of failures must be 0")
13521356

qiita_db/util.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@
7373
from email.mime.multipart import MIMEMultipart
7474
from email.mime.text import MIMEText
7575

76-
import pandas as pd
7776
from datetime import timedelta
7877
import matplotlib.pyplot as plt
7978
import numpy as np
79+
import pandas as pd
8080
from scipy.optimize import minimize
8181

8282
# memory constant functions defined for @resource_allocation_plot
@@ -2341,7 +2341,7 @@ def send_email(to, subject, body):
23412341
smtp.close()
23422342

23432343

2344-
def resource_allocation_plot(file, cname, sname, col_name):
2344+
def resource_allocation_plot(df, cname, sname, col_name):
23452345
"""Builds resource allocation plot for given filename and jobs
23462346
23472347
Parameters
@@ -2361,9 +2361,6 @@ def resource_allocation_plot(file, cname, sname, col_name):
23612361
Returns a matplotlib object with a plot
23622362
"""
23632363

2364-
df = pd.read_csv(file, sep='\t', dtype={'extra_info': str})
2365-
df['ElapsedRawTime'] = pd.to_timedelta(df.ElapsedRawTime)
2366-
df = df[(df.cName == cname) & (df.sName == sname)]
23672364
df.dropna(subset=['samples', 'columns'], inplace=True)
23682365
df[col_name] = df.samples * df['columns']
23692366
df[col_name] = df[col_name].astype(int)
@@ -2383,6 +2380,41 @@ def resource_allocation_plot(file, cname, sname, col_name):
23832380
return fig, axs
23842381

23852382

2383+
def _retrieve_resource_data(cname, sname, columns):
2384+
with qdb.sql_connection.TRN:
2385+
sql = """
2386+
SELECT
2387+
s.name AS sName,
2388+
s.version AS sVersion,
2389+
sc.command_id AS cID,
2390+
sc.name AS cName,
2391+
pr.processing_job_id AS processing_job_id,
2392+
pr.command_parameters AS parameters,
2393+
sra.samples AS samples,
2394+
sra.columns AS columns,
2395+
sra.input_size AS input_size,
2396+
sra.extra_info AS extra_info,
2397+
sra.memory_used AS memory_used,
2398+
sra.walltime_used AS walltime_used
2399+
FROM
2400+
qiita.processing_job pr
2401+
JOIN
2402+
qiita.software_command sc ON pr.command_id = sc.command_id
2403+
JOIN
2404+
qiita.software s ON sc.software_id = s.software_id
2405+
JOIN
2406+
qiita.slurm_resource_allocations sra
2407+
ON pr.processing_job_id = sra.processing_job_id
2408+
WHERE
2409+
sc.name = %s
2410+
AND s.name = %s;
2411+
"""
2412+
qdb.sql_connection.TRN.add(sql, sql_args=[cname, sname])
2413+
res = qdb.sql_connection.TRN.execute_fetchindex()
2414+
df = pd.DataFrame(res, columns=columns)
2415+
return df
2416+
2417+
23862418
def _resource_allocation_plot_helper(
23872419
df, ax, cname, sname, curr, models, col_name):
23882420
"""Helper function for resource allocation plot. Builds plot for MaxRSSRaw

0 commit comments

Comments
 (0)