Skip to content

Commit b558499

Browse files
committed
address @charles-cowart comment + some extra tests
1 parent 3e0d0c3 commit b558499

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

qiita_client/qiita_client.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
MAX_RETRIES = 3
2727
MIN_TIME_SLEEP = 180
2828
MAX_TIME_SLEEP = 360
29-
MIN_FILEPATH_SIZE = 100
29+
BLANK_FILE_THRESHOLD = 100
3030

3131

3232
class ArtifactInfo(object):
@@ -575,6 +575,10 @@ def artifact_and_preparation_files(self, artifact_id,
575575
576576
Returns
577577
-------
578+
dict
579+
files available in the artifact
580+
pandas.DataFrame
581+
the prep information file for that artifact
578582
579583
Raises
580584
------
@@ -621,7 +625,7 @@ def _process_files_per_sample_fastq(self, files, prep_info,
621625
used_prefixes = []
622626
for i, (fwd, rev) in enumerate(zip_longest(fwds, revs)):
623627
fwd_fn = basename(fwd['filepath'])
624-
file_smaller_than_min = fwd['size'] < MIN_FILEPATH_SIZE
628+
file_smaller_than_min = fwd['size'] < BLANK_FILE_THRESHOLD
625629

626630
# iterate over run prefixes and make sure only one matches
627631
run_prefix = None
@@ -647,7 +651,7 @@ def _process_files_per_sample_fastq(self, files, prep_info,
647651
# matches the run prefix:
648652
rev_fn = basename(rev['filepath'])
649653
if not file_smaller_than_min:
650-
file_smaller_than_min = rev['size'] < MIN_FILEPATH_SIZE
654+
file_smaller_than_min = rev['size'] < BLANK_FILE_THRESHOLD
651655
if not rev_fn.startswith(run_prefix):
652656
raise ValueError(
653657
'Reverse read does not match run prefix. run_prefix: '

qiita_client/tests/test_qiita_client.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,35 @@ def test_artifact_and_preparation_files(self):
309309
self.assertEqual(fobs, fexp)
310310
self.assertEqual(piobs.shape, (2, 1))
311311

312+
# just fwd
313+
files = {
314+
'raw_forward_seqs': [
315+
{'filepath': '/X/file_3_R1.fastq.gz', 'size': 101},
316+
{'filepath': '/X/file_1_R1.fastq.gz', 'size': 99},
317+
{'filepath': '/X/file_2_R1.fastq.gz', 'size': 101}]}
318+
prep_info = pd.DataFrame.from_dict({
319+
'run_prefix': {"sample.1": 'file_1',
320+
"sample.2": 'file_2',
321+
"sample.3": 'file_3'}}, dtype=str)
322+
prep_info.index.name = 'sample_name'
323+
fobs, piobs = self.tester._process_files_per_sample_fastq(
324+
files, prep_info, False)
325+
fexp = {
326+
'sample.1': ({'filepath': '/X/file_1_R1.fastq.gz', 'size': 99},
327+
None),
328+
'sample.2': ({'filepath': '/X/file_2_R1.fastq.gz', 'size': 101},
329+
None),
330+
'sample.3': ({'filepath': '/X/file_3_R1.fastq.gz', 'size': 101},
331+
None)}
332+
self.assertEqual(fobs, fexp)
333+
self.assertEqual(piobs.shape, (3, 1))
334+
335+
fobs, piobs = self.tester._process_files_per_sample_fastq(
336+
files, prep_info, True)
337+
del fexp['sample.1']
338+
self.assertEqual(fobs, fexp)
339+
self.assertEqual(piobs.shape, (2, 1))
340+
312341

313342
if __name__ == '__main__':
314343
main()

0 commit comments

Comments
 (0)