Skip to content

Commit d8cb8db

Browse files
committed
Merge branch 'dev' of github.com:biocore/qiita
2 parents c0e715b + ef26847 commit d8cb8db

33 files changed

+693
-261
lines changed

CHANGELOG.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Qiita changelog
22

3+
Version 2025.01
4+
---------------
5+
6+
Deployed on January 15th, 2025
7+
8+
* The Analysis owner is now displayed in the analysis list and the individual analysis page.
9+
* Admins can now use the per-preparation "Download Data Release" button to get a "BIOM" release; this version is focus on NPH data releases.
10+
* Improved complete_job creation time, which should result in Qiita jobs ([multiple steps](https://qiita.ucsd.edu/static/doc/html/dev/resource_allocation.html) finishing faster; for bencharks visit [patch 93.sql](https://github.com/qiita-spots/qiita/blob/master/qiita_db/support_files/patches/93.sql).
11+
* SPP improvements: TellSeq support added; plugin refactored to allow for easier additions like TellSeq in the future. Job restart greatly improved. Much improved handling of sample-names and ids that contain substrings like ‘I1’ and ‘R2’. New SequenceCount job can count sequences and base-pairs in parallel for any list of fastq files.
12+
* Other general fixes [#3440](https://github.com/qiita-spots/qiita/pull/3440), [#3445](https://github.com/qiita-spots/qiita/pull/3445), [#3446](https://github.com/qiita-spots/qiita/pull/3446),
13+
14+
315
Version 2024.10
416
---------------
517

@@ -206,7 +218,7 @@ Version 2021.11
206218
* Allow chucked download of metadata files in analyses; this allows to process large meta-analysis (like those for The Microsetta Initiative) without worker blockage.
207219
* Added to the qp-qiime2 plugin the possibility of filtering tables based on system available "FeatureData[Sequence]"; to start we added 90/100/150 bps bloom tables.
208220
* Now we can instantiate a study via their title (Study.from_title); this will facilitate orchestration with qebil.
209-
* Speed up Study listing for admins and general users; the admin study display came down from 20 to 2 seconds.
221+
* Speed up Study listing for admins and general users; the admin study display came down from 20 to 2 seconds.
210222
* Fixed the following issues: [3142](https://github.com/qiita-spots/qiita/issues/3142), [3149](https://github.com/qiita-spots/qiita/issues/3149), [3150](https://github.com/qiita-spots/qiita/issues/3150), [3119](https://github.com/qiita-spots/qiita/issues/3119), and [3160](https://github.com/qiita-spots/qiita/issues/3160).
211223

212224

qiita_core/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
88

9-
__version__ = "2024.10"
9+
__version__ = "2025.01"

qiita_db/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from . import user
2828
from . import processing_job
2929

30-
__version__ = "2024.10"
30+
__version__ = "2025.01"
3131

3232
__all__ = ["analysis", "artifact", "archive", "base", "commands",
3333
"environment_manager", "exceptions", "investigation", "logger",

qiita_db/analysis.py

+16
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,22 @@ def create(cls, owner, name, description, from_default=False,
215215
job.submit()
216216
return instance
217217

218+
@classmethod
219+
def delete_analysis_artifacts(cls, _id):
220+
"""Deletes the artifacts linked to an artifact and then the analysis
221+
222+
Parameters
223+
----------
224+
_id : int
225+
The analysis id
226+
"""
227+
analysis = cls(_id)
228+
aids = [a.id for a in analysis.artifacts if not a.parents]
229+
aids.sort(reverse=True)
230+
for aid in aids:
231+
qdb.artifact.Artifact.delete(aid)
232+
cls.delete(analysis.id)
233+
218234
@classmethod
219235
def delete(cls, _id):
220236
"""Deletes an analysis

qiita_db/archive.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def get_merging_scheme_from_job(cls, job):
116116
acmd = job.command
117117
parent = job.input_artifacts[0]
118118
parent_pparameters = parent.processing_parameters
119+
phms = None
119120
if parent_pparameters is None:
120121
parent_cmd_name = None
121122
parent_parameters = None
@@ -125,12 +126,26 @@ def get_merging_scheme_from_job(cls, job):
125126
parent_cmd_name = pcmd.name
126127
parent_parameters = parent_pparameters.values
127128
parent_merging_scheme = pcmd.merging_scheme
128-
129-
return qdb.util.human_merging_scheme(
129+
if not parent_merging_scheme['ignore_parent_command']:
130+
gp = parent.parents[0]
131+
gp_params = gp.processing_parameters
132+
if gp_params is not None:
133+
gp_cmd = gp_params.command
134+
phms = qdb.util.human_merging_scheme(
135+
parent_cmd_name, parent_merging_scheme,
136+
gp_cmd.name, gp_cmd.merging_scheme,
137+
parent_parameters, [], gp_params.values)
138+
139+
hms = qdb.util.human_merging_scheme(
130140
acmd.name, acmd.merging_scheme,
131141
parent_cmd_name, parent_merging_scheme,
132142
job.parameters.values, [], parent_parameters)
133143

144+
if phms is not None:
145+
hms = qdb.util.merge_overlapping_strings(hms, phms)
146+
147+
return hms
148+
134149
@classmethod
135150
def retrieve_feature_values(cls, archive_merging_scheme=None,
136151
features=None):

qiita_db/handlers/processing_job.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,9 @@ def post(self, job_id):
146146
cmd, values_dict={'job_id': job_id,
147147
'payload': self.request.body.decode(
148148
'ascii')})
149-
job = qdb.processing_job.ProcessingJob.create(job.user, params)
149+
# complete_job are unique so it is fine to force them to be created
150+
job = qdb.processing_job.ProcessingJob.create(
151+
job.user, params, force=True)
150152
job.submit()
151153

152154
self.finish()

qiita_db/handlers/tests/test_processing_job.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ def test_post_job_success(self):
233233
self.assertIsNotNone(cj)
234234
# additionally we can test that job.print_trace is correct
235235
self.assertEqual(job.trace, [
236-
f'{job.id} [Not Available]: Validate | '
236+
f'{job.id} [Not Available] (success): Validate | '
237237
'-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000',
238-
f' {cj.id} [{cj.external_id}] | '
238+
f' {cj.id} [{cj.external_id}] (success)| '
239239
'-p qiita -N 1 -n 1 --mem 16gb --time 10:00:00 --nice=10000'])
240240

241241
def test_post_job_success_with_archive(self):

qiita_db/metadata_template/prep_template.py

+28-4
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def create(cls, md_template, study, data_type, investigation_type=None,
135135
# data_type being created - if possible
136136
if investigation_type is None:
137137
if data_type_str in TARGET_GENE_DATA_TYPES:
138-
investigation_type = 'Amplicon'
138+
investigation_type = 'AMPLICON'
139139
elif data_type_str == 'Metagenomic':
140140
investigation_type = 'WGS'
141141
elif data_type_str == 'Metatranscriptomic':
@@ -280,8 +280,22 @@ def delete(cls, id_):
280280
qdb.sql_connection.TRN.add(sql, args)
281281
archived_artifacts = set(
282282
qdb.sql_connection.TRN.execute_fetchflatten())
283+
ANALYSIS = qdb.analysis.Analysis
283284
if archived_artifacts:
284285
for aid in archived_artifacts:
286+
# before we can delete the archived artifact, we need
287+
# to delete the analyses where they were used.
288+
sql = """SELECT analysis_id
289+
FROM qiita.analysis
290+
WHERE analysis_id IN (
291+
SELECT DISTINCT analysis_id
292+
FROM qiita.analysis_sample
293+
WHERE artifact_id IN %s)"""
294+
qdb.sql_connection.TRN.add(sql, [tuple([aid])])
295+
analyses = set(
296+
qdb.sql_connection.TRN.execute_fetchflatten())
297+
for _id in analyses:
298+
ANALYSIS.delete_analysis_artifacts(_id)
285299
qdb.artifact.Artifact.delete(aid)
286300

287301
# Delete the prep template filepaths
@@ -794,14 +808,24 @@ def _get_node_info(workflow, node):
794808

795809
parent_cmd_name = None
796810
parent_merging_scheme = None
811+
phms = None
797812
if pcmd is not None:
798813
parent_cmd_name = pcmd.name
799814
parent_merging_scheme = pcmd.merging_scheme
815+
if not parent_merging_scheme['ignore_parent_command']:
816+
phms = _get_node_info(workflow, parent)
800817

801-
return qdb.util.human_merging_scheme(
818+
hms = qdb.util.human_merging_scheme(
802819
ccmd.name, ccmd.merging_scheme, parent_cmd_name,
803820
parent_merging_scheme, cparams, [], pparams)
804821

822+
# if the parent should not ignore its parent command, then we need
823+
# to merge the previous result with the new one
824+
if phms is not None:
825+
hms = qdb.util.merge_overlapping_strings(hms, phms)
826+
827+
return hms
828+
805829
def _get_predecessors(workflow, node):
806830
# recursive method to get predecessors of a given node
807831
pred = []
@@ -857,7 +881,7 @@ def _get_predecessors(workflow, node):
857881
'artifact transformation']
858882
merging_schemes = {
859883
qdb.archive.Archive.get_merging_scheme_from_job(j): {
860-
x: y.id for x, y in j.outputs.items()}
884+
x: str(y.id) for x, y in j.outputs.items()}
861885
# we are going to select only the jobs that were a 'success', that
862886
# are not 'hidden' and that have an output - jobs that are not
863887
# hidden and a successs but that do not have outputs are jobs which
@@ -975,7 +999,7 @@ def _get_predecessors(workflow, node):
975999
init_artifacts = {
9761000
wkartifact_type: f'{starting_job.id}:'}
9771001
else:
978-
init_artifacts = {wkartifact_type: self.artifact.id}
1002+
init_artifacts = {wkartifact_type: str(self.artifact.id)}
9791003

9801004
cmds_to_create.reverse()
9811005
current_job = None

qiita_db/metadata_template/test/test_prep_template.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ def _common_creation_checks(self, pt, fp_count, name):
911911
self.assertEqual(pt.data_type(), self.data_type)
912912
self.assertEqual(pt.data_type(ret_id=True), self.data_type_id)
913913
self.assertEqual(pt.artifact, None)
914-
self.assertEqual(pt.investigation_type, 'Amplicon')
914+
self.assertEqual(pt.investigation_type, 'AMPLICON')
915915
self.assertEqual(pt.study_id, self.test_study.id)
916916
self.assertEqual(pt.status, "sandbox")
917917
exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id,
@@ -1076,7 +1076,7 @@ def test_create_warning(self):
10761076
self.assertEqual(pt.data_type(), self.data_type)
10771077
self.assertEqual(pt.data_type(ret_id=True), self.data_type_id)
10781078
self.assertEqual(pt.artifact, None)
1079-
self.assertEqual(pt.investigation_type, 'Amplicon')
1079+
self.assertEqual(pt.investigation_type, 'AMPLICON')
10801080
self.assertEqual(pt.study_id, self.test_study.id)
10811081
self.assertEqual(pt.status, 'sandbox')
10821082
exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id,
@@ -1247,7 +1247,7 @@ def test_investigation_type_setter(self):
12471247
"""Able to update the investigation type"""
12481248
pt = qdb.metadata_template.prep_template.PrepTemplate.create(
12491249
self.metadata, self.test_study, self.data_type_id)
1250-
self.assertEqual(pt.investigation_type, 'Amplicon')
1250+
self.assertEqual(pt.investigation_type, 'AMPLICON')
12511251
pt.investigation_type = "Other"
12521252
self.assertEqual(pt.investigation_type, 'Other')
12531253
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):

qiita_db/processing_job.py

+46-43
Original file line numberDiff line numberDiff line change
@@ -582,10 +582,10 @@ def create(cls, user, parameters, force=False):
582582
TTRN = qdb.sql_connection.TRN
583583
with TTRN:
584584
command = parameters.command
585-
586-
# check if a job with the same parameters already exists
587-
sql = """SELECT processing_job_id, email, processing_job_status,
588-
COUNT(aopj.artifact_id)
585+
if not force:
586+
# check if a job with the same parameters already exists
587+
sql = """SELECT processing_job_id, email,
588+
processing_job_status, COUNT(aopj.artifact_id)
589589
FROM qiita.processing_job
590590
LEFT JOIN qiita.processing_job_status
591591
USING (processing_job_status_id)
@@ -596,41 +596,42 @@ def create(cls, user, parameters, force=False):
596596
GROUP BY processing_job_id, email,
597597
processing_job_status"""
598598

599-
# we need to use ILIKE because of booleans as they can be
600-
# false or False
601-
params = []
602-
for k, v in parameters.values.items():
603-
# this is necessary in case we have an Iterable as a value
604-
# but that is string
605-
if isinstance(v, Iterable) and not isinstance(v, str):
606-
for vv in v:
607-
params.extend([k, str(vv)])
599+
# we need to use ILIKE because of booleans as they can be
600+
# false or False
601+
params = []
602+
for k, v in parameters.values.items():
603+
# this is necessary in case we have an Iterable as a value
604+
# but that is string
605+
if isinstance(v, Iterable) and not isinstance(v, str):
606+
for vv in v:
607+
params.extend([k, str(vv)])
608+
else:
609+
params.extend([k, str(v)])
610+
611+
if params:
612+
# divided by 2 as we have key-value pairs
613+
len_params = int(len(params)/2)
614+
sql = sql.format(' AND ' + ' AND '.join(
615+
["command_parameters->>%s ILIKE %s"] * len_params))
616+
params = [command.id] + params
617+
TTRN.add(sql, params)
608618
else:
609-
params.extend([k, str(v)])
610-
611-
if params:
612-
# divided by 2 as we have key-value pairs
613-
len_params = int(len(params)/2)
614-
sql = sql.format(' AND ' + ' AND '.join(
615-
["command_parameters->>%s ILIKE %s"] * len_params))
616-
params = [command.id] + params
617-
TTRN.add(sql, params)
618-
else:
619-
# the sql variable expects the list of parameters but if there
620-
# is no param we need to replace the {0} with an empty string
621-
TTRN.add(sql.format(""), [command.id])
622-
623-
# checking that if the job status is success, it has children
624-
# [2] status, [3] children count
625-
existing_jobs = [r for r in TTRN.execute_fetchindex()
626-
if r[2] != 'success' or r[3] > 0]
627-
if existing_jobs and not force:
628-
raise ValueError(
629-
'Cannot create job because the parameters are the same as '
630-
'jobs that are queued, running or already have '
631-
'succeeded:\n%s' % '\n'.join(
632-
["%s: %s" % (jid, status)
633-
for jid, _, status, _ in existing_jobs]))
619+
# the sql variable expects the list of parameters but if
620+
# there is no param we need to replace the {0} with an
621+
# empty string
622+
TTRN.add(sql.format(""), [command.id])
623+
624+
# checking that if the job status is success, it has children
625+
# [2] status, [3] children count
626+
existing_jobs = [r for r in TTRN.execute_fetchindex()
627+
if r[2] != 'success' or r[3] > 0]
628+
if existing_jobs:
629+
raise ValueError(
630+
'Cannot create job because the parameters are the '
631+
'same as jobs that are queued, running or already '
632+
'have succeeded:\n%s' % '\n'.join(
633+
["%s: %s" % (jid, status)
634+
for jid, _, status, _ in existing_jobs]))
634635

635636
sql = """INSERT INTO qiita.processing_job
636637
(email, command_id, command_parameters,
@@ -2052,23 +2053,25 @@ def complete_processing_job(self):
20522053
def trace(self):
20532054
""" Returns as a text array the full trace of the job, from itself
20542055
to validators and complete jobs"""
2055-
lines = [f'{self.id} [{self.external_id}]: '
2056+
lines = [f'{self.id} [{self.external_id}] ({self.status}): '
20562057
f'{self.command.name} | {self.resource_allocation_info}']
20572058
cjob = self.complete_processing_job
20582059
if cjob is not None:
2059-
lines.append(f' {cjob.id} [{cjob.external_id}] | '
2060+
lines.append(f' {cjob.id} [{cjob.external_id}] ({cjob.status})| '
20602061
f'{cjob.resource_allocation_info}')
20612062
vjob = self.release_validator_job
20622063
if vjob is not None:
20632064
lines.append(f' {vjob.id} [{vjob.external_id}] '
2064-
f'| {vjob.resource_allocation_info}')
2065+
f' ({vjob.status}) | '
2066+
f'{vjob.resource_allocation_info}')
20652067
for v in self.validator_jobs:
2066-
lines.append(f' {v.id} [{v.external_id}]: '
2068+
lines.append(f' {v.id} [{v.external_id}] ({v.status}): '
20672069
f'{v.command.name} | {v.resource_allocation_info}')
20682070
cjob = v.complete_processing_job
20692071
if cjob is not None:
20702072
lines.append(f' {cjob.id} [{cjob.external_id}] '
2071-
f'| {cjob.resource_allocation_info}')
2073+
f'({cjob.status}) | '
2074+
f'{cjob.resource_allocation_info}')
20722075
return lines
20732076

20742077

0 commit comments

Comments
 (0)