|
9 | 9 | from tornado.web import authenticated, HTTPError
|
10 | 10 | from tornado.gen import coroutine
|
11 | 11 |
|
12 |
| -from os.path import basename, getsize, join, isdir |
| 12 | +from os.path import basename, getsize, join, isdir, getctime |
13 | 13 | from os import walk
|
14 | 14 |
|
15 | 15 | from .base_handlers import BaseHandler
|
|
23 | 23 | from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir,
|
24 | 24 | get_filepath_information, get_mountpoint,
|
25 | 25 | filepath_id_to_object_id, get_data_types,
|
26 |
| - retrieve_filepaths) |
| 26 | + retrieve_filepaths, get_work_base_dir) |
27 | 27 | from qiita_db.meta_util import validate_filepath_access_by_user
|
28 | 28 | from qiita_db.metadata_template.sample_template import SampleTemplate
|
29 | 29 | from qiita_db.metadata_template.prep_template import PrepTemplate
|
|
35 | 35 | from uuid import uuid4
|
36 | 36 | from base64 import b64encode
|
37 | 37 | from datetime import datetime, timedelta, timezone
|
| 38 | +from tempfile import mkdtemp |
| 39 | +from zipfile import ZipFile |
| 40 | +from io import BytesIO |
38 | 41 |
|
39 | 42 |
|
40 | 43 | class BaseHandlerDownload(BaseHandler):
|
@@ -374,6 +377,138 @@ def get(self, path):
|
374 | 377 | self.finish()
|
375 | 378 |
|
376 | 379 |
|
| 380 | +class DownloadDataReleaseFromPrep(BaseHandlerDownload): |
| 381 | + @authenticated |
| 382 | + @coroutine |
| 383 | + @execute_as_transaction |
| 384 | + def get(self, prep_template_id): |
| 385 | + """ This method constructs an on the fly ZIP with all the files |
| 386 | + required for a data-prep release/data-delivery. Mainly sample, prep |
| 387 | + info, bioms and coverage |
| 388 | + """ |
| 389 | + user = self.current_user |
| 390 | + if user.level not in ('admin', 'web-lab admin'): |
| 391 | + raise HTTPError(403, reason="%s doesn't have access to download " |
| 392 | + "the data release files" % user.email) |
| 393 | + |
| 394 | + pid = int(prep_template_id) |
| 395 | + pt = PrepTemplate(pid) |
| 396 | + sid = pt.study_id |
| 397 | + st = SampleTemplate(sid) |
| 398 | + date = datetime.now().strftime('%m%d%y-%H%M%S') |
| 399 | + td = mkdtemp(dir=get_work_base_dir()) |
| 400 | + |
| 401 | + files = [] |
| 402 | + readme = [ |
| 403 | + f'Delivery created on {date}', |
| 404 | + '', |
| 405 | + f'Host (human) removal: {pt.artifact.human_reads_filter_method}', |
| 406 | + '', |
| 407 | + # this is not changing in the near future so just leaving |
| 408 | + # hardcoded for now |
| 409 | + 'Main woltka reference: WoLr2, more info visit: ' |
| 410 | + 'https://ftp.microbio.me/pub/wol2/', |
| 411 | + '', |
| 412 | + f"Qiita's prep: https://qiita.ucsd.edu/study/description/{sid}" |
| 413 | + f"?prep_id={pid}", |
| 414 | + '', |
| 415 | + ] |
| 416 | + |
| 417 | + # helper dict to add "user/human" friendly names to the bioms |
| 418 | + human_names = { |
| 419 | + 'ec.biom': 'KEGG Enzyme (EC)', |
| 420 | + 'per-gene.biom': 'Per gene Predictions', |
| 421 | + 'none.biom': 'Per genome Predictions', |
| 422 | + 'cell_counts.biom': 'Cell counts', |
| 423 | + 'pathway.biom': 'KEGG Pathway', |
| 424 | + 'ko.biom': 'KEGG Ontology (KO)', |
| 425 | + 'rna_copy_counts.biom': 'RNA copy counts' |
| 426 | + } |
| 427 | + |
| 428 | + # sample-info creation |
| 429 | + fn = join(td, f'sample_information_from_prep_{pid}.tsv') |
| 430 | + readme.append(f'Sample information: {basename(fn)}') |
| 431 | + files.append([fn, basename(fn)]) |
| 432 | + st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t') |
| 433 | + |
| 434 | + # prep-info creation |
| 435 | + fn = join(td, f'prep_information_{pid}.tsv') |
| 436 | + readme.append(f'Prep information: {basename(fn)}') |
| 437 | + files.append([fn, basename(fn)]) |
| 438 | + pt.to_dataframe().to_csv(fn, sep='\t') |
| 439 | + |
| 440 | + readme.append('') |
| 441 | + |
| 442 | + # finding the bioms to be added |
| 443 | + bioms = dict() |
| 444 | + coverages = None |
| 445 | + for a in Study(sid).artifacts(artifact_type='BIOM'): |
| 446 | + if a.prep_templates[0].id != pid: |
| 447 | + continue |
| 448 | + biom = None |
| 449 | + for fp in a.filepaths: |
| 450 | + if fp['fp_type'] == 'biom': |
| 451 | + biom = fp |
| 452 | + if coverages is None and 'coverages.tgz' == basename(fp['fp']): |
| 453 | + coverages = fp['fp'] |
| 454 | + if biom is None: |
| 455 | + continue |
| 456 | + biom_fn = basename(biom['fp']) |
| 457 | + # there is a small but real chance that the same prep has the same |
| 458 | + # artifacts so using the latests |
| 459 | + if biom_fn not in bioms: |
| 460 | + bioms[biom_fn] = [a, biom] |
| 461 | + else: |
| 462 | + if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']): |
| 463 | + bioms[biom_fn] = [a, biom] |
| 464 | + |
| 465 | + # once we have all the bioms, we can add them to the list of zips |
| 466 | + # and to the readme the biom details and all the processing |
| 467 | + for fn, (a, fp) in bioms.items(): |
| 468 | + aname = basename(fp["fp"]) |
| 469 | + nname = f'{a.id}_{aname}' |
| 470 | + files.append([fp['fp'], nname]) |
| 471 | + |
| 472 | + hname = '' |
| 473 | + if aname in human_names: |
| 474 | + hname = human_names[aname] |
| 475 | + readme.append(f'{nname}\t{hname}') |
| 476 | + |
| 477 | + for an in set(a.ancestors.nodes()): |
| 478 | + p = an.processing_parameters |
| 479 | + if p is not None: |
| 480 | + c = p.command |
| 481 | + cn = c.name |
| 482 | + s = c.software |
| 483 | + sn = s.name |
| 484 | + sv = s.version |
| 485 | + pd = p.dump() |
| 486 | + readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') |
| 487 | + |
| 488 | + # if a coverage was found, add it to the list of files |
| 489 | + if coverages is not None: |
| 490 | + fn = basename(coverages) |
| 491 | + readme.append(f'{fn}\tcoverage files') |
| 492 | + files.append([coverages, fn]) |
| 493 | + |
| 494 | + fn = join(td, 'README.txt') |
| 495 | + with open(fn, 'w') as fp: |
| 496 | + fp.write('\n'.join(readme)) |
| 497 | + files.append([fn, basename(fn)]) |
| 498 | + |
| 499 | + zp_fn = f'data_release_{pid}_{date}.zip' |
| 500 | + zp = BytesIO() |
| 501 | + with ZipFile(zp, 'w') as zipf: |
| 502 | + for fp, fn in files: |
| 503 | + zipf.write(fp, fn) |
| 504 | + |
| 505 | + self.set_header('Content-Type', 'application/zip') |
| 506 | + self.set_header("Content-Disposition", f"attachment; filename={zp_fn}") |
| 507 | + self.write(zp.getvalue()) |
| 508 | + zp.close() |
| 509 | + self.finish() |
| 510 | + |
| 511 | + |
377 | 512 | class DownloadPublicHandler(BaseHandlerDownload):
|
378 | 513 | @coroutine
|
379 | 514 | @execute_as_transaction
|
|
0 commit comments