Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add taxonomic analysis and human reads removal wf #192

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: 1.2
workflows:
- name: main
subclass: Galaxy
publish: true
primaryDescriptorPath: /Taxonomic-Analysis-of-SARS-CoV-2-Wastewater-Samples-with-Human-Read-Removal.ga
testParameterFiles:
- /Taxonomic-Analysis-of-SARS-CoV-2-Wastewater-Samples-with-Human-Read-Removal-tests.yml
authors:
- name: Polina Polunina
orcid: 0000-0002-0507-460
- name: "B\xE9r\xE9nice Batut"
orcid: 0000-0001-9852-198
- name: Wolfgang Maier
orcid: 0000-0002-9464-664
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
- doc: Test outline for Taxonomic-Analysis-of-SARS-CoV-2-Wastewater-Samples-with-Human-Read-Removal
job:
SARS-CoV-2 reference genome:
class: File
path: test-data/SARS-CoV-2 reference genome.fasta
filetype: fasta
Paired Collection:
class: Collection
collection_type: list:paired
elements:
- class: Collection
type: paired
identifier: SRR12596170.fastq
elements:
- class: File
identifier: forward
path: test-data/Paired Collection_forward.fastqsanger.gz
- class: File
identifier: reverse
path: test-data/Paired Collection_reverse.fastqsanger.gz
- class: Collection
type: paired
identifier: SRR12596172.fastq
elements:
- class: File
identifier: forward
path: test-data/Paired Collection_forward.fastqsanger.gz
- class: File
identifier: reverse
path: test-data/Paired Collection_reverse.fastqsanger.gz
outputs:
kraken2 report output:
element_tests:
SRR12596170.fastq:
asserts:
has_text:
text: "root"
SRR12596172.fastq:
asserts:
has_text:
text: "unclassified"
Converted Kraken2 output:
element_tests:
SRR12596170.fastq:
asserts:
has_text:
text: "root"
SRR12596172.fastq:
asserts:
has_text:
text: "root"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"a_galaxy_workflow": "true", "annotation": "The workflow for SARS-CoV-2 wastewater Illumina-sequenced metatranscriptomics data analysis", "creator": [{"class": "Person", "identifier": "0000-0002-0507-4602", "name": "Polina Polunina"}, {"class": "Person", "identifier": "0000-0001-9852-1987", "name": "B\u00e9r\u00e9nice Batut"}, {"class": "Person", "identifier": "0000-0002-9464-6640", "name": "Wolfgang Maier"}], "format-version": "0.1", "license": "MIT", "release": "0.1", "name": "Taxonomic Analysis of SARS-CoV-2 Wastewater Samples with Human Read Removal", "steps": {"0": {"annotation": "SARS-CoV-2 reference fasta sequence (typically NC_045512.2, alias MN908947.3 of isolate Wuhan-Hu-1)", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "SARS-CoV-2 reference fasta sequence (typically NC_045512.2, alias MN908947.3 of isolate Wuhan-Hu-1)", "name": "SARS-CoV-2 reference genome"}], "label": "SARS-CoV-2 reference genome", "name": "Input dataset", "outputs": [], "position": {"left": 14.870905894625793, "top": 0.0}, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fasta\", \"fasta.gz\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", "uuid": "f16ff677-b40e-4479-b54f-96e705e82134", "when": null, "workflow_outputs": []}, "1": {"annotation": "Illumina reads with fastqsanger encoding", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Illumina reads with fastqsanger encoding", "name": "Paired Collection"}], "label": "Paired Collection", "name": "Input dataset collection", "outputs": [], "position": {"left": 0.0, "top": 374.662597070828}, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\", \"fastqsanger.gz\"], \"tag\": \"\", \"collection_type\": \"list:paired\"}", "tool_version": null, "type": "data_collection_input", "uuid": "27a89012-46f7-4c46-b82d-7eac01a3024c", "when": null, "workflow_outputs": []}, "2": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.20.1+galaxy0", "errors": null, "id": 2, "input_connections": {"single_paired|paired_input": {"id": 1, "output_name": "output"}}, "inputs": [], "label": null, "name": "fastp", "outputs": [{"name": "output_paired_coll", "type": "input"}, {"name": "report_html", "type": "html"}, {"name": "report_json", "type": "json"}], "position": {"left": 283.89489706096515, "top": 224.05028983352798}, "post_job_actions": {"HideDatasetActionreport_json": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "report_json"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.20.1+galaxy0", "tool_shed_repository": {"changeset_revision": "dbf9c561ef29", "name": "fastp", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"filter_options\": {\"quality_filtering_options\": {\"disable_quality_filtering\": false, \"qualified_quality_phred\": null, \"unqualified_percent_limit\": null, \"n_base_limit\": null}, \"length_filtering_options\": {\"disable_length_filtering\": false, \"length_required\": null, \"length_limit\": null}, \"low_complexity_filter\": {\"enable_low_complexity_filter\": false, \"complexity_threshold\": null}}, \"output_options\": {\"report_html\": true, \"report_json\": true}, \"overrepresented_sequence_analysis\": {\"overrepresentation_analysis\": false, \"overrepresentation_sampling\": null}, \"read_mod_options\": {\"polyg_tail_trimming\": {\"trimming_select\": \"\", \"__current_case__\": 1, \"poly_g_min_len\": null}, \"polyx_tail_trimming\": {\"polyx_trimming_select\": \"\", \"__current_case__\": 1}, \"umi_processing\": {\"umi\": false, \"umi_loc\": \"\", \"umi_len\": null, \"umi_prefix\": \"\"}, \"cutting_by_quality_options\": {\"cut_by_quality5\": false, \"cut_by_quality3\": false, \"cut_window_size\": null, \"cut_mean_quality\": null}, \"base_correction_options\": {\"correction\": false}}, \"single_paired\": {\"single_paired_selector\": \"paired_collection\", \"__current_case__\": 2, \"paired_input\": {\"__class__\": \"ConnectedValue\"}, \"adapter_trimming_options\": {\"disable_adapter_trimming\": false, \"adapter_sequence1\": \"\", \"adapter_sequence2\": \"\"}, \"global_trimming_options\": {\"trim_front1\": null, \"trim_tail1\": null, \"trim_front2\": null, \"trim_tail2\": null}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.20.1+galaxy0", "type": "tool", "uuid": "574667b3-fb80-4da0-b76a-7c6941f94777", "when": null, "workflow_outputs": [{"label": "fastp_pe", "output_name": "output_paired_coll", "uuid": "95cc6cb9-8571-4ad8-9dfe-2fb81022a960"}, {"label": "fastp_html_report", "output_name": "report_html", "uuid": "b988c650-2522-4fb9-a255-ecab9a5a1dd9"}]}, "3": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/kraken2/kraken2/2.1.1+galaxy1", "errors": null, "id": 3, "input_connections": {"single_paired|input_pair": {"id": 1, "output_name": "output"}}, "inputs": [], "label": null, "name": "Kraken2", "outputs": [{"name": "report_output", "type": "tabular"}, {"name": "output", "type": "tabular"}], "position": {"left": 277.3152676445572, "top": 496.4841750655785}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/kraken2/kraken2/2.1.1+galaxy1", "tool_shed_repository": {"changeset_revision": "e674066930b2", "name": "kraken2", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"confidence\": \"0.0\", \"kraken2_database\": \"viral2019-03\", \"min_base_quality\": \"0\", \"minimum_hit_groups\": \"2\", \"quick\": false, \"report\": {\"create_report\": true, \"use_mpa_style\": false, \"report_zero_counts\": false, \"report_minimizer_data\": false}, \"single_paired\": {\"single_paired_selector\": \"collection\", \"__current_case__\": 0, \"input_pair\": {\"__class__\": \"ConnectedValue\"}}, \"split_reads\": false, \"use_names\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.1.1+galaxy1", "type": "tool", "uuid": "0e8ced84-9abd-4435-b45b-2362530d60a5", "when": null, "workflow_outputs": [{"label": "Kraken2 on input dataset(s): Classification", "output_name": "output", "uuid": "3a16b606-a868-48e7-9696-8128655dc1f1"}, {"label": "kraken2 report output", "output_name": "report_output", "uuid": "610c591a-9347-4957-938f-0a8f43b6b363"}]}, "4": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/read_it_and_keep/read_it_and_keep/0.2.2+galaxy0", "errors": null, "id": 4, "input_connections": {"reads|paired_reads": {"id": 2, "output_name": "output_paired_coll"}, "ref_source|ref_fasta": {"id": 0, "output_name": "output"}}, "inputs": [], "label": null, "name": "Read It and Keep", "outputs": [{"name": "output_collection", "type": "input"}], "position": {"left": 635.8491400306144, "top": 201.57228705933926}, "post_job_actions": {"HideDatasetActionoutput_collection": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "output_collection"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/read_it_and_keep/read_it_and_keep/0.2.2+galaxy0", "tool_shed_repository": {"changeset_revision": "1563b58905f4", "name": "read_it_and_keep", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"adv\": {\"min_map_length\": \"50\", \"min_map_length_pc\": \"50.0\", \"enumerate_names\": false}, \"reads\": {\"read_type\": \"paired_collection\", \"__current_case__\": 1, \"paired_reads\": {\"__class__\": \"ConnectedValue\"}}, \"ref_source\": {\"source\": \"history\", \"__current_case__\": 0, \"ref_fasta\": {\"__class__\": \"ConnectedValue\"}}, \"sequencing_tech\": \"illumina\", \"trim_reference\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.2.2+galaxy0", "type": "tool", "uuid": "101ddc69-2eaf-472d-b8d0-b419b303ac3c", "when": null, "workflow_outputs": []}, "5": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/kraken2tax/Kraken2Tax/1.1", "errors": null, "id": 5, "input_connections": {"input": {"id": 3, "output_name": "report_output"}}, "inputs": [], "label": null, "name": "Convert Kraken", "outputs": [{"name": "out_file", "type": "taxonomy"}], "position": {"left": 534.9780458180148, "top": 525.8335410291587}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/kraken2tax/Kraken2Tax/1.1", "tool_shed_repository": {"changeset_revision": "d844fdcce44e", "name": "kraken2tax", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"ncbi_taxonomy\": \"2020-12-03\", \"read_name\": \"2\", \"tax_id\": \"3\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1", "type": "tool", "uuid": "bc2a9bf1-9c2a-484d-a41c-d01b7247b18f", "when": null, "workflow_outputs": [{"label": "Converted Kraken2 output", "output_name": "out_file", "uuid": "e03381ed-e828-4037-8334-b5a6c910976e"}]}, "6": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/crs4/taxonomy_krona_chart/taxonomy_krona_chart/2.7.1", "errors": null, "id": 6, "input_connections": {"type_of_data|input": {"id": 5, "output_name": "out_file"}}, "inputs": [], "label": null, "name": "Krona pie chart", "outputs": [{"name": "output", "type": "html"}], "position": {"left": 808.8290375111392, "top": 428.5225963831433}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/crs4/taxonomy_krona_chart/taxonomy_krona_chart/2.7.1", "tool_shed_repository": {"changeset_revision": "1334cb4c6b68", "name": "taxonomy_krona_chart", "owner": "crs4", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"combine_inputs\": false, \"root_name\": \"Root\", \"type_of_data\": {\"type_of_data_selector\": \"taxonomy\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"max_rank\": \"8\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.7.1", "type": "tool", "uuid": "1d0f9115-bb1a-4d5a-94b0-a351185da810", "when": null, "workflow_outputs": [{"label": "Krona pie chart on input dataset(s): HTML", "output_name": "output", "uuid": "9504027b-01d5-40a2-bae7-d9625b26f74e"}]}, "7": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/crs4/taxonomy_krona_chart/taxonomy_krona_chart/2.7.1", "errors": null, "id": 7, "input_connections": {"type_of_data|input": {"id": 5, "output_name": "out_file"}}, "inputs": [], "label": null, "name": "Krona pie chart", "outputs": [{"name": "output", "type": "html"}], "position": {"left": 810.6724382640716, "top": 632.069973577796}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/crs4/taxonomy_krona_chart/taxonomy_krona_chart/2.7.1", "tool_shed_repository": {"changeset_revision": "1334cb4c6b68", "name": "taxonomy_krona_chart", "owner": "crs4", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"combine_inputs\": true, \"root_name\": \"Root\", \"type_of_data\": {\"type_of_data_selector\": \"taxonomy\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"max_rank\": \"8\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.7.1", "type": "tool", "uuid": "6f391336-5aad-45de-9f49-40f77d88a96c", "when": null, "workflow_outputs": [{"label": "Combined Krona pie chart on input dataset(s): HTML", "output_name": "output", "uuid": "e01b0fcd-79b5-4cbc-82d2-35c4cc28eb37"}]}}, "tags": [], "uuid": "b648d141-01d2-4f4d-8cb5-f9cbffe8179e", "version": 2}
Binary file not shown.
Binary file not shown.
Loading
Loading