Skip to content

Commit 19b4fe3

Browse files
committed
fixes 1880
1 parent a369cad commit 19b4fe3

File tree

4 files changed

+157
-22
lines changed

4 files changed

+157
-22
lines changed

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ def extend(self, md_template):
11241124
self.validate(self.columns_restrictions)
11251125
self.generate_files()
11261126

1127-
def update(self, md_template):
1127+
def _update(self, md_template):
11281128
r"""Update values in the template
11291129
11301130
Parameters
@@ -1143,22 +1143,19 @@ def update(self, md_template):
11431143
passed md_template
11441144
"""
11451145
with qdb.sql_connection.TRN:
1146-
# Clean and validate the metadata template given
1147-
new_map = self._clean_validate_template(
1148-
md_template, self.study_id, current_columns=self.categories())
11491146
# Retrieving current metadata
11501147
current_map = self.to_dataframe()
11511148

11521149
# simple validations of sample ids and column names
1153-
samples_diff = set(new_map.index).difference(current_map.index)
1150+
samples_diff = set(md_template.index).difference(current_map.index)
11541151
if samples_diff:
11551152
raise qdb.exceptions.QiitaDBError(
11561153
'The new template differs from what is stored '
11571154
'in database by these samples names: %s'
11581155
% ', '.join(samples_diff))
11591156

1160-
if not set(current_map.columns).issuperset(new_map.columns):
1161-
columns_diff = set(new_map.columns).difference(
1157+
if not set(current_map.columns).issuperset(md_template.columns):
1158+
columns_diff = set(md_template.columns).difference(
11621159
current_map.columns)
11631160
raise qdb.exceptions.QiitaDBError(
11641161
'Some of the columns in your template are not present in '
@@ -1168,15 +1165,16 @@ def update(self, md_template):
11681165

11691166
# In order to speed up some computation, let's compare only the
11701167
# common columns and rows. current_map.columns and
1171-
# current_map.index are supersets of new_map.columns and
1172-
# new_map.index, respectivelly, so this will not fail
1173-
current_map = current_map[new_map.columns].loc[new_map.index]
1168+
# current_map.index are supersets of md_template.columns and
1169+
# md_template.index, respectivelly, so this will not fail
1170+
current_map = current_map[
1171+
md_template.columns].loc[md_template.index]
11741172

11751173
# Get the values that we need to change
11761174
# diff_map is a DataFrame that hold boolean values. If a cell is
1177-
# True, means that the new_map is different from the current_map
1178-
# while False means that the cell has the same value
1179-
diff_map = current_map != new_map
1175+
# True, means that the md_template is different from the
1176+
# current_map while False means that the cell has the same value
1177+
diff_map = current_map != md_template
11801178
# ne_stacked holds a MultiIndexed DataFrame in which the first
11811179
# level of indexing is the sample_name and the second one is the
11821180
# columns. We only have 1 column, which holds if that
@@ -1195,8 +1193,8 @@ def update(self, md_template):
11951193
changed.index.names = ['sample_name', 'column']
11961194
# the combination of np.where and boolean indexing produces
11971195
# a numpy array with only the values that actually changed
1198-
# between the current_map and new_map
1199-
changed_to = new_map.values[np.where(diff_map)]
1196+
# between the current_map and md_template
1197+
changed_to = md_template.values[np.where(diff_map)]
12001198

12011199
# to_update is a MultiIndexed DataFrame, in which the index 0 is
12021200
# the samples and the index 1 is the columns, we define these
@@ -1235,12 +1233,57 @@ def update(self, md_template):
12351233
""".format(self._table_name(self._id), sql_eq_cols,
12361234
single_value, sql_cols)
12371235
for sample in samples_to_update:
1238-
sample_vals = [new_map[col][sample] for col in cols_to_update]
1236+
sample_vals = [md_template[col][sample]
1237+
for col in cols_to_update]
12391238
sample_vals.insert(0, sample)
12401239
qdb.sql_connection.TRN.add(sql, sample_vals)
12411240

12421241
qdb.sql_connection.TRN.execute()
12431242

1243+
def update(self, md_template):
1244+
r"""Update values in the template
1245+
1246+
Parameters
1247+
----------
1248+
md_template : DataFrame
1249+
The metadata template file contents indexed by samples ids
1250+
1251+
Raises
1252+
------
1253+
QiitaDBError
1254+
If md_template and db do not have the same sample ids
1255+
If md_template and db do not have the same column headers
1256+
If self.can_be_updated is not True
1257+
QiitaDBWarning
1258+
If there are no differences between the contents of the DB and the
1259+
passed md_template
1260+
"""
1261+
with qdb.sql_connection.TRN:
1262+
# Clean and validate the metadata template given
1263+
new_map = self._clean_validate_template(
1264+
md_template, self.study_id, current_columns=self.categories())
1265+
self._update(new_map)
1266+
self.validate(self.columns_restrictions)
1267+
self.generate_files()
1268+
1269+
def extend_and_update(self, md_template):
1270+
"""Performs the update and extend operations at once
1271+
1272+
Parameters
1273+
----------
1274+
md_template : DataFrame
1275+
The metadata template contents indexed by sample ids
1276+
1277+
See Also
1278+
--------
1279+
update
1280+
extend
1281+
"""
1282+
with qdb.sql_connection.TRN:
1283+
md_template = self._clean_validate_template(
1284+
md_template, self.study_id, current_columns=self.categories())
1285+
self._common_extend_steps(md_template)
1286+
self._update(md_template)
12441287
self.validate(self.columns_restrictions)
12451288
self.generate_files()
12461289

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,8 +1280,7 @@ def test_extend_update(self):
12801280
self.metadata['str_column']['SKB7.640196'] = 'NEW VAL'
12811281

12821282
npt.assert_warns(
1283-
qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata)
1284-
pt.update(self.metadata)
1283+
qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata)
12851284

12861285
sql = "SELECT * FROM qiita.prep_{0}".format(pt.id)
12871286
obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)]

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,8 +1732,8 @@ def test_extend_update(self):
17321732
md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'],
17331733
index=md_ext.index)
17341734

1735-
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext)
1736-
st.update(md_ext)
1735+
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update,
1736+
md_ext)
17371737
exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id,
17381738
"%s.Sample3" % st.id, "%s.Sample4" % st.id}
17391739
self.assertEqual(st._get_sample_ids(), exp_sample_ids)
@@ -1800,6 +1800,100 @@ def test_extend_update(self):
18001800
for s_id in exp_sample_ids:
18011801
self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id])
18021802

1803+
# def test_extend_and_update(self):
1804+
# st = qdb.metadata_template.sample_template.SampleTemplate.create(
1805+
# self.metadata, self.new_study)
1806+
# self.metadata_dict['Sample4'] = {
1807+
# 'physical_specimen_location': 'location1',
1808+
# 'physical_specimen_remaining': 'true',
1809+
# 'dna_extracted': 'true',
1810+
# 'sample_type': 'type1',
1811+
# 'collection_timestamp': '2014-05-29 12:24:15',
1812+
# 'host_subject_id': 'NotIdentified',
1813+
# 'Description': 'Test Sample 4',
1814+
# 'latitude': '42.42',
1815+
# 'longitude': '41.41',
1816+
# 'taxon_id': '9606',
1817+
# 'scientific_name': 'homo sapiens'}
1818+
#
1819+
# # Change a couple of values on the existent samples to test that
1820+
# # they actually change
1821+
# self.metadata_dict['Sample1']['Description'] = 'Changed'
1822+
# self.metadata_dict['Sample2']['dna_extracted'] = 'Changed dynamic'
1823+
#
1824+
# md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index',
1825+
# dtpye=str)
1826+
# md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'],
1827+
# index=md_ext.index)
1828+
#
1829+
# npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext)
1830+
#
1831+
# exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id,
1832+
# "%s.Sample3" % st.id, "%s.Sample4" % st.id}
1833+
# self.assertEqual(st._get_sample_ids(), exp_sample_ids)
1834+
# self.assertEqual(len(st), 4)
1835+
# exp_categories = {'collection_timestamp', 'description',
1836+
# 'dna_extracted', 'host_subject_id', 'latitude',
1837+
# 'longitude', 'physical_specimen_location',
1838+
# 'physical_specimen_remaining', 'sample_type',
1839+
# 'scientific_name', 'taxon_id', 'tot_nitro'}
1840+
# self.assertItemsEqual(st.categories(), exp_categories)
1841+
# exp_dict = {
1842+
# "%s.Sample1" % st.id: {
1843+
# 'collection_timestamp': '2014-05-29 12:24:15',
1844+
# 'description': "Changed",
1845+
# 'dna_extracted': 'true',
1846+
# 'host_subject_id': "NotIdentified",
1847+
# 'latitude': '42.42',
1848+
# 'longitude': '41.41',
1849+
# 'physical_specimen_location': "location1",
1850+
# 'physical_specimen_remaining': 'true',
1851+
# 'sample_type': "type1",
1852+
# 'taxon_id': '9606',
1853+
# 'scientific_name': 'homo sapiens',
1854+
# 'tot_nitro': 'val1'},
1855+
# "%s.Sample2" % st.id: {
1856+
# 'collection_timestamp': '2014-05-29 12:24:15',
1857+
# 'description': "Test Sample 2",
1858+
# 'dna_extracted': 'Changed dynamic',
1859+
# 'host_subject_id': "NotIdentified",
1860+
# 'latitude': '4.2',
1861+
# 'longitude': '1.1',
1862+
# 'physical_specimen_location': "location1",
1863+
# 'physical_specimen_remaining': 'true',
1864+
# 'sample_type': "type1",
1865+
# 'taxon_id': '9606',
1866+
# 'scientific_name': 'homo sapiens',
1867+
# 'tot_nitro': 'val2'},
1868+
# "%s.Sample3" % st.id: {
1869+
# 'collection_timestamp': '2014-05-29 12:24:15',
1870+
# 'description': "Test Sample 3",
1871+
# 'dna_extracted': 'true',
1872+
# 'host_subject_id': "NotIdentified",
1873+
# 'latitude': '4.8',
1874+
# 'longitude': '4.41',
1875+
# 'physical_specimen_location': "location1",
1876+
# 'physical_specimen_remaining': 'true',
1877+
# 'sample_type': "type1",
1878+
# 'taxon_id': '9606',
1879+
# 'scientific_name': 'homo sapiens',
1880+
# 'tot_nitro': 'val3'},
1881+
# '%s.Sample4' % st.id: {
1882+
# 'physical_specimen_location': 'location1',
1883+
# 'physical_specimen_remaining': 'true',
1884+
# 'dna_extracted': 'true',
1885+
# 'sample_type': 'type1',
1886+
# 'collection_timestamp': '2014-05-29 12:24:15',
1887+
# 'host_subject_id': 'NotIdentified',
1888+
# 'description': 'Test Sample 4',
1889+
# 'latitude': '42.42',
1890+
# 'longitude': '41.41',
1891+
# 'taxon_id': '9606',
1892+
# 'scientific_name': 'homo sapiens',
1893+
# 'tot_nitro': 'val4'}}
1894+
# for s_id in exp_sample_ids:
1895+
# self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id])
1896+
18031897
def test_to_dataframe(self):
18041898
st = qdb.metadata_template.sample_template.SampleTemplate.create(
18051899
self.metadata, self.new_study)

qiita_ware/dispatchable.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,7 @@ def update_sample_template(study_id, fp):
199199
# deleting previous uploads and inserting new one
200200
st = SampleTemplate(study_id)
201201
df = load_template_to_dataframe(fp)
202-
st.extend(df)
203-
st.update(df)
202+
st.extend_and_update(df)
204203
remove(fp)
205204

206205
# join all the warning messages into one. Note that this info

0 commit comments

Comments
 (0)