Skip to content

Commit 8cef17e

Browse files
antgonzawasademcmk3ElDeveloper
authored
rm create qiime mapping file (#3043)
* Version 092020 (#3034) * inject study_type in EBI and improvements to current automatic processing pipeline (#3023) * inject study_type in ebi and improvements to current automatic proecssing pipeline * addressing @ElDeveloper comments * some general fixes/additions for next release (#3026) * some general fixes/additions for next release * adding test for not None job.release_validator_job * fix #2839 * fix #2868 (#3028) * fix #2868 * 2nd round * fix errors * more changes * fix errors * fix ProcessingJobTest * fix PY_PATCH * add missing TRN.add * encapsulated_query -> perform_as_transaction * fix #3022 (#3030) * fix #3022 * adding tests * fix #2320 (#3031) * fix #2320 * adding prints to debug * children -> 1 * APIArtifactHandlerTest -> APIArtifactHandlerTests * configure_biom * qdb.util.activate_or_update_plugins * improving code * almost there * add values.template * fix filepaths * filepaths -> files * fixing errors * add prep.artifact insertion * addressing @ElDeveloper comments * fix artifact_definition active command * != -> == * Added three tutorial sections to the Qiita documentation (#3032) * Added three tutorial sections to the Qiita documentation: 'Retrieving Public Data for Own Analysis' and 'Processing public data retrieved with redbiom' to the redbiom tab, and 'Statistical Analysis to Justify Clinical Trial Sample Size Tutorial' to the analyzing samples tab. * Update redbiom.rst * Update redbiom.rst * Update redbiom.rst * Further updates to redbiom.rst and the Stats tutorial. * update redbiom.rst * Finished proof-reading * Placed all three tutorials/sections together under Introduction to the download and analysis of public Qiita data * added a new introduction, with links to the three sections * Added figures to stats tutorial and contexts explanation * Added figures to stats tutorial and contexts explanation * Apply suggestions from code review [skip ci] Co-authored-by: Yoshiki Vázquez Baeza <[email protected]> Co-authored-by: Antonio Gonzalez <[email protected]> Co-authored-by: Yoshiki Vázquez Baeza <[email protected]> * 092020 (#3033) * 092020 * connect artifact with job * rm INSERT qiita.artifact_processing_job * Apply suggestions from code review [skip ci] Co-authored-by: Yoshiki Vázquez Baeza <[email protected]> Co-authored-by: Yoshiki Vázquez Baeza <[email protected]> Co-authored-by: Daniel McDonald <[email protected]> Co-authored-by: Mirte Kuijpers <[email protected]> Co-authored-by: Yoshiki Vázquez Baeza <[email protected]> * rm create_qiime_mapping_file * fixing some tests * fixing more tests * fix even more tests * rm npt.assert_warns * qiime-map -> sample-file * update not_merged_samples.txt * adding @ElDeveloper changes Co-authored-by: Daniel McDonald <[email protected]> Co-authored-by: Mirte Kuijpers <[email protected]> Co-authored-by: Yoshiki Vázquez Baeza <[email protected]>
1 parent d1ddfbb commit 8cef17e

File tree

15 files changed

+89
-231
lines changed

15 files changed

+89
-231
lines changed

qiita_db/analysis.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,13 +1040,16 @@ def _build_mapping_file(self, samples, rename_dup_samples=False):
10401040
with qdb.sql_connection.TRN:
10411041
all_ids = set()
10421042
to_concat = []
1043+
sample_infos = dict()
10431044
for aid, samps in samples.items():
1044-
pt = qdb.artifact.Artifact(aid).prep_templates[0]
1045-
qiime_map_fp = pt.qiime_map_fp
1045+
artifact = qdb.artifact.Artifact(aid)
1046+
si = artifact.study.sample_template
1047+
if si not in sample_infos:
1048+
sample_infos[si] = si.to_dataframe()
1049+
pt = artifact.prep_templates[0]
1050+
pt_df = pt.to_dataframe()
10461051

1047-
# Parse the mapping file
1048-
qm = qdb.metadata_template.util.load_template_to_dataframe(
1049-
qiime_map_fp, index='#SampleID')
1052+
qm = pt_df.join(sample_infos[si], lsuffix="_prep")
10501053

10511054
# if we are not going to merge the duplicated samples
10521055
# append the aid to the sample name
@@ -1076,15 +1079,6 @@ def _build_mapping_file(self, samples, rename_dup_samples=False):
10761079

10771080
merged_map = pd.concat(to_concat)
10781081

1079-
# forcing QIIME column order
1080-
cols = merged_map.columns.values.tolist()
1081-
cols.remove('BarcodeSequence')
1082-
cols.remove('LinkerPrimerSequence')
1083-
cols.remove('Description')
1084-
cols = (['BarcodeSequence', 'LinkerPrimerSequence'] + cols +
1085-
['Description'])
1086-
merged_map = merged_map[cols]
1087-
10881082
# Save the mapping file
10891083
_, base_fp = qdb.util.get_mountpoint(self._table)[0]
10901084
mapping_fp = join(base_fp, "%d_analysis_mapping.txt" % self._id)

qiita_db/handlers/prep_template.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,25 @@ def get(self, prep_id):
6666
'investigation_type': prep info investigation type
6767
'study': study that the prep info belongs to
6868
'status': prep info status
69-
'qiime-map': the path to the qiime mapping file
69+
'sample-file': the path to the sample information file
7070
'prep-file': the path to the prep info file
7171
"""
7272
with qdb.sql_connection.TRN:
7373
pt = _get_prep_template(prep_id)
7474
prep_files = [fp for _, fp in pt.get_filepaths()
7575
if 'qiime' not in basename(fp)]
7676
artifact = pt.artifact.id if pt.artifact is not None else None
77+
sid = pt.study_id
7778
response = {
7879
'data_type': pt.data_type(),
7980
'artifact': artifact,
8081
'investigation_type': pt.investigation_type,
81-
'study': pt.study_id,
82+
'study': sid,
8283
'status': pt.status,
83-
'qiime-map': pt.qiime_map_fp,
84+
# get_filepaths returns an ordered list of [filepath_id,
85+
# filepath] and we want the last pair
86+
'sample-file': qdb.study.Study(
87+
sid).sample_template.get_filepaths()[0][1],
8488
# The first element in the prep_files is the newest
8589
# prep information file - hence the correct one
8690
'prep-file': prep_files[0]

qiita_db/handlers/tests/test_prep_template.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def test_get(self):
5454
self.assertEqual(obs['investigation_type'], 'Metagenomics')
5555
self.assertEqual(obs['study'], 1)
5656
self.assertEqual(obs['status'], 'private')
57-
self.assertTrue(obs['qiime-map'].startswith(
58-
path_builder('1_prep_1_qiime_')))
57+
self.assertTrue(obs['sample-file'].startswith(
58+
path_builder('1_')))
5959
self.assertTrue(obs['prep-file'].startswith(
6060
path_builder('1_prep_1_')))
6161

qiita_db/metadata_template/prep_template.py

Lines changed: 0 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,8 @@
99
from os.path import join
1010
from time import strftime
1111
from copy import deepcopy
12-
import warnings
1312
from skbio.util import find_duplicates
1413

15-
import pandas as pd
16-
1714
from qiita_core.exceptions import IncompetentQiitaDeveloperError
1815
import qiita_db as qdb
1916
from .constants import (PREP_TEMPLATE_COLUMNS, TARGET_GENE_DATA_TYPES,
@@ -519,116 +516,6 @@ def generate_files(self, samples=None, columns=None):
519516
fp_id = qdb.util.convert_to_id("prep_template", "filepath_type")
520517
self.add_filepath(fp, fp_id=fp_id)
521518

522-
# creating QIIME mapping file
523-
self.create_qiime_mapping_file()
524-
525-
def create_qiime_mapping_file(self):
526-
"""This creates the QIIME mapping file and links it in the db.
527-
528-
Returns
529-
-------
530-
filepath : str
531-
The filepath of the created QIIME mapping file
532-
533-
Raises
534-
------
535-
ValueError
536-
If the prep template is not a subset of the sample template
537-
QiitaDBWarning
538-
If the QIIME-required columns are not present in the template
539-
540-
Notes
541-
-----
542-
We cannot ensure that the QIIME-required columns are present in the
543-
metadata map. However, we have to generate a QIIME-compliant mapping
544-
file. Since the user may need a QIIME mapping file, but not these
545-
QIIME-required columns, we are going to create them and
546-
populate them with the value XXQIITAXX.
547-
"""
548-
with qdb.sql_connection.TRN:
549-
rename_cols = {
550-
'barcode': 'BarcodeSequence',
551-
'primer': 'LinkerPrimerSequence',
552-
'description': 'Description',
553-
}
554-
555-
if 'reverselinkerprimer' in self.categories():
556-
rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer'
557-
new_cols = ['BarcodeSequence', 'LinkerPrimerSequence',
558-
'ReverseLinkerPrimer']
559-
else:
560-
new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']
561-
562-
# Retrieve the latest sample template
563-
# Since we sorted the filepath retrieval, the first result contains
564-
# the filepath that we want. `retrieve_filepaths` returns a
565-
# 3-tuple, in which the fp is the second element
566-
sample_template_fp = qdb.util.retrieve_filepaths(
567-
"sample_template_filepath", "study_id", self.study_id,
568-
sort='descending')[0]['fp']
569-
570-
# reading files via pandas
571-
st = qdb.metadata_template.util.load_template_to_dataframe(
572-
sample_template_fp)
573-
pt = self.to_dataframe()
574-
575-
st_sample_names = set(st.index)
576-
pt_sample_names = set(pt.index)
577-
578-
if not pt_sample_names.issubset(st_sample_names):
579-
raise ValueError(
580-
"Prep template is not a sub set of the sample template, "
581-
"file: %s - samples: %s"
582-
% (sample_template_fp,
583-
', '.join(pt_sample_names-st_sample_names)))
584-
585-
mapping = pt.join(st, lsuffix="_prep")
586-
mapping.rename(columns=rename_cols, inplace=True)
587-
588-
# Pre-populate the QIIME-required columns with the value XXQIITAXX
589-
index = mapping.index
590-
placeholder = ['XXQIITAXX'] * len(index)
591-
missing = []
592-
for val in rename_cols.values():
593-
if val not in mapping:
594-
missing.append(val)
595-
mapping[val] = pd.Series(placeholder, index=index)
596-
597-
if missing:
598-
warnings.warn(
599-
"Some columns required to generate a QIIME-compliant "
600-
"mapping file are not present in the template. A "
601-
"placeholder value (XXQIITAXX) has been used to populate "
602-
"these columns. Missing columns: %s"
603-
% ', '.join(sorted(missing)),
604-
qdb.exceptions.QiitaDBWarning)
605-
606-
# Gets the orginal mapping columns and readjust the order to comply
607-
# with QIIME requirements
608-
cols = mapping.columns.values.tolist()
609-
cols.remove('BarcodeSequence')
610-
cols.remove('LinkerPrimerSequence')
611-
cols.remove('Description')
612-
new_cols.extend(cols)
613-
new_cols.append('Description')
614-
mapping = mapping[new_cols]
615-
616-
# figuring out the filepath for the QIIME map file
617-
_id, fp = qdb.util.get_mountpoint('templates')[0]
618-
filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
619-
self.id, strftime("%Y%m%d-%H%M%S")))
620-
621-
# Save the mapping file
622-
mapping.to_csv(filepath, index_label='#SampleID', na_rep='',
623-
sep='\t', encoding='utf-8')
624-
625-
# adding the fp to the object
626-
self.add_filepath(
627-
filepath,
628-
fp_id=qdb.util.convert_to_id("qiime_map", "filepath_type"))
629-
630-
return filepath
631-
632519
@property
633520
def status(self):
634521
"""The status of the prep template

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -954,9 +954,9 @@ def _common_creation_checks(self, pt, fp_count, name):
954954
for s_id in exp_sample_ids:
955955
self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id])
956956

957-
# prep and qiime files have been created
957+
# prep files have been created
958958
filepaths = pt.get_filepaths()
959-
self.assertEqual(len(filepaths), 2)
959+
self.assertEqual(len(filepaths), 1)
960960

961961
def test_validate_restrictions(self):
962962
PT = qdb.metadata_template.prep_template.PrepTemplate
@@ -1019,28 +1019,9 @@ def test_generate_files(self):
10191019
fp_count = qdb.util.get_count("qiita.filepath")
10201020
self.tester.generate_files()
10211021
obs = qdb.util.get_count("qiita.filepath")
1022-
# We just make sure that the count has been increased by 2, since
1022+
# We just make sure that the count has been increased by 1, since
10231023
# the contents of the files have been tested elsewhere.
1024-
self.assertEqual(obs, fp_count + 2)
1025-
1026-
def test_create_qiime_mapping_file(self):
1027-
pt = qdb.metadata_template.prep_template.PrepTemplate(1)
1028-
1029-
# creating prep template file
1030-
_id, fp = qdb.util.get_mountpoint('templates')[0]
1031-
1032-
obs_fp = pt.create_qiime_mapping_file()
1033-
exp_fp = join(fp, '1_prep_1_qiime_19700101-000000.txt')
1034-
1035-
obs = pd.read_csv(obs_fp, sep='\t', infer_datetime_format=False,
1036-
parse_dates=False, index_col=False, comment='\t')
1037-
exp = pd.read_csv(
1038-
exp_fp, sep='\t', infer_datetime_format=False,
1039-
parse_dates=False, index_col=False, comment='\t')
1040-
obs = obs.reindex(sorted(obs.columns), axis=1)
1041-
exp = exp.reindex(sorted(exp.columns), axis=1)
1042-
1043-
assert_frame_equal(obs, exp, check_like=True)
1024+
self.assertEqual(obs, fp_count + 1)
10441025

10451026
def test_create_data_type_id(self):
10461027
"""Creates a new PrepTemplate passing the data_type_id"""
@@ -1119,9 +1100,9 @@ def test_create_warning(self):
11191100
for s_id in exp_sample_ids:
11201101
self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id])
11211102

1122-
# prep and qiime files have been created
1103+
# prep files have been created
11231104
filepaths = pt.get_filepaths()
1124-
self.assertEqual(len(filepaths), 2)
1105+
self.assertEqual(len(filepaths), 1)
11251106

11261107
# cleaning
11271108
qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,9 +1365,9 @@ def test_generate_files(self):
13651365
fp_count = qdb.util.get_count("qiita.filepath")
13661366
self.tester.generate_files()
13671367
obs = qdb.util.get_count("qiita.filepath")
1368-
# We just make sure that the count has been increased by 6, since
1368+
# We just make sure that the count has been increased by 3, since
13691369
# the contents of the files have been tested elsewhere.
1370-
self.assertEqual(obs, fp_count + 5)
1370+
self.assertEqual(obs, fp_count + 3)
13711371

13721372
def test_to_file(self):
13731373
"""to file writes a tab delimited file with all the metadata"""

0 commit comments

Comments
 (0)