Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion tools/busco/.shed.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
categories: [Sequence Analysis]
categories:
- Sequence Analysis
- Genome annotation
description: BUSCO assess genome and annotation completeness
homepage_url: https://gitlab.com/ezlab/busco/-/releases
long_description: Assessing genome assembly and annotation completeness with Benchmarking
Expand Down
185 changes: 152 additions & 33 deletions tools/busco/busco.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
## - download complete reference DB (~200G, final 105G) to tools/busco/test-data/test-db/busco_downloads
## ```
## busco --download_path tools/busco/test-data/test-db/busco_downloads/ --download all
## find tools/busco/test-data/test-db/busco_downloads/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -exec rm -rf {} \;
## find tools/busco/test-data/test-db/busco_downloads/placement_files -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -delete
## find tools/busco/test-data/test-db/busco_downloads/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
## find tools/busco/test-data/busco_downloads/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -exec rm -rf {} \;
## find tools/busco/test-data/busco_downloads/placement_files -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -delete
## find tools/busco/test-data/busco_downloads/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
## ```
## - test containerized (note: test-data is mounted ro in containerized tests)
##
Expand Down Expand Up @@ -96,22 +96,66 @@ busco
#end if
#end if

#if $outputs and 'image' in $outputs:
&& mkdir BUSCO_summaries
&& cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
&& generate_plot.py -wd BUSCO_summaries -rt specific
#end if
#if $lineage.lineage_mode != "auto_detect":
&& cp busco_galaxy/run_*/full_table.tsv busco_galaxy/full_table_specific_lineage.tsv
&& cp busco_galaxy/run_*/missing_busco_list.tsv busco_galaxy/missing_busco_list_specific_lineage.tsv
#if $outputs and 'image' in $outputs:
&& mkdir BUSCO_summaries
&& cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
&& generate_plot.py -wd BUSCO_summaries -rt specific
&& cp BUSCO_summaries/busco_figure.png busco_galaxy/busco_figure_specific_lineage.png
#end if

#if $outputs and 'gff' in $outputs:
&& echo "\##gff-version 3" > busco_output.gff
## gff files can be absent
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_output.gff 2> /dev/null || true)
#end if
#if $outputs and 'faa' in $outputs:
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.faa >> busco_output.faa 2> /dev/null || true)
#if $outputs and 'gff' in $outputs:
&& echo "\##gff-version 3" > busco_galaxy/busco_output.gff
## gff files can be absent
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_specific.gff 2> /dev/null || true)
#end if
#if $outputs and 'faa' in $outputs:
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_specific.faa 2> /dev/null || true)
#end if
#if $outputs and 'fna' in $outputs:
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_specific.fna 2> /dev/null || true)
#end if
#end if
#if $outputs and 'fna' in $outputs:
&& (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.fna >> busco_output.fna 2> /dev/null || true)

## if $lineage.lineage_mode == "auto_detect", BUSCO output several files with the same name, causing an issue, we need to rename the files
#if $lineage.lineage_mode == "auto_detect":
&& specific_lineage=\$(find ./busco_galaxy -name "short_summary.specific.*.txt" | head -n 1 | cut -d'.' -f4)
&& generic_lineage=\$(find ./busco_galaxy -name "short_summary.generic.*.txt" | head -n 1 | cut -d'.' -f4)

##Rename the output files to differenciate specific and generic lineages results
## Full table
&& cp busco_galaxy/run_\${specific_lineage}/full_table.tsv busco_galaxy/full_table_specific_lineage.tsv
&& cp busco_galaxy/run_\${generic_lineage}/full_table.tsv busco_galaxy/full_table_generic_lineage.tsv
##Missing busco list
&& cp busco_galaxy/run_\${specific_lineage}/missing_busco_list.tsv busco_galaxy/missing_busco_list_specific_lineage.tsv
&& cp busco_galaxy/run_\${generic_lineage}/missing_busco_list.tsv busco_galaxy/missing_busco_list_generic_lineage.tsv
#if $outputs and 'image' in $outputs:
&& mkdir BUSCO_summaries_specific
&& cp busco_galaxy/short_summary.specific.*.txt BUSCO_summaries_specific/
&& generate_plot.py -wd BUSCO_summaries_specific -rt specific
&& cp BUSCO_summaries_specific/busco_figure.png busco_galaxy/busco_figure_specific_lineage.png
&& mkdir BUSCO_summaries_generic
&& cp busco_galaxy/short_summary.generic.*.txt BUSCO_summaries_generic/
&& generate_plot.py -wd BUSCO_summaries_generic -rt generic
&& cp BUSCO_summaries_generic/busco_figure.png busco_galaxy/busco_figure_generic_lineage.png
#end if
#if $outputs and 'gff' in $outputs:
## gff files can be absent
&& echo "\##gff-version 3" > busco_galaxy/busco_output_specific.gff
&& (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_specific.gff 2> /dev/null || true)
&& echo "\##gff-version 3" > busco_galaxy/busco_output_generic.gff
&& (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_generic.gff 2> /dev/null || true)
#end if
#if $outputs and 'faa' in $outputs:
&& (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_specific.faa 2> /dev/null || true)
&& (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_generic.faa 2> /dev/null || true)
#end if
#if $outputs and 'fna' in $outputs:
&& (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_specific.fna 2> /dev/null || true)
&& (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_generic.fna 2> /dev/null || true)
#end if
#end if
]]></command>
<inputs>
Expand Down Expand Up @@ -172,6 +216,7 @@ busco
<option value="--auto-lineage-prok">Prokaryotes (--auto-lineage-prok)</option>
<option value="--auto-lineage-euk">Eukaryotes (--auto-lineage-euk)</option>
</param>
<param name="generic_results" type="boolean" checked="false" label="Do you want to display the results from the generic lineage" help="BUSCO generates files for the parent lineage (generic) and the final selected lineage (specific). Generally, the lineage to select for your assessments should be the most specific lineage available. The generic lineage datasets for the domains archaea, bacteria and eukaryota."/>
</when>
<when value="select_lineage">
<param argument="--lineage_dataset" type="select" label="Lineage">
Expand All @@ -183,9 +228,9 @@ busco
</conditional>

<param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
<option value="short_summary" selected="true">short summary text</option>
<option value="missing">list with missing IDs</option>
<option value="image">summary image</option>
<option value="short_summary" selected="true">Short summary text</option>
<option value="missing">List with missing IDs</option>
<option value="image">Summary image</option>
<option value="gff">gff</option>
<option value="faa">Protein sequences</option>
<option value="fna">Nucleotide sequences</option>
Expand All @@ -199,26 +244,49 @@ busco
</inputs>

<outputs>
<data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt">
<data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: Short summary - Specific lineage" from_work_dir="busco_galaxy/short_summary.specific.*.txt">
<filter>outputs and 'short_summary' in outputs</filter>
</data>
<data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv"/>
<data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv">
<data name='busco_sum_generic' format='txt' label="${tool.name} on ${on_string}: Short summary - Generic lineage" from_work_dir="busco_galaxy/short_summary.generic.*.txt">
<filter>(outputs and 'short_summary' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: Full table - Specific lineage" from_work_dir="busco_galaxy/full_table_specific_lineage.tsv">
</data>
<data name='busco_table_generic' format='tabular' label="${tool.name} on ${on_string}: Full table - Generic lineage" from_work_dir="busco_galaxy/full_table_generic_lineage.tsv">
<filter>lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: Missing buscos - Specific lineage" from_work_dir="busco_galaxy/missing_busco_list_specific_lineage.tsv">
<filter>outputs and 'missing' in outputs</filter>
</data>
<data name='summary_image' format='png' label="${tool.name} on ${on_string}: summary image" from_work_dir="BUSCO_summaries/busco_figure.png">
<data name='busco_missing_generic' format='tabular' label="${tool.name} on ${on_string}: Missing buscos - Generic lineage" from_work_dir="busco_galaxy/missing_busco_list_generic_lineage.tsv">
<filter>(outputs and 'missing' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='summary_image' format='png' label="${tool.name} on ${on_string}: Summary image - Specific lineage" from_work_dir="busco_galaxy/busco_figure_specific_lineage.png">
<filter>outputs and 'image' in outputs</filter>
</data>
<data name='busco_gff' format='gff3' label="${tool.name} on ${on_string}: GFF" from_work_dir="busco_output.gff">
<data name='summary_image_generic' format='png' label="${tool.name} on ${on_string}: Summary image - Generic lineage" from_work_dir="busco_galaxy/busco_figure_generic_lineage.png">
<filter>(outputs and 'image' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='busco_gff' format='gff3' label="${tool.name} on ${on_string}: GFF - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.gff">
<filter>outputs and 'gff' in outputs</filter>
</data>
<data name='busco_faa' format='fasta' label="${tool.name} on ${on_string}: Protein sequences" from_work_dir="busco_output.faa">
<data name='busco_gff_generic' format='gff3' label="${tool.name} on ${on_string}: GFF - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.gff">
<filter>(outputs and 'gff' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='busco_faa' format='fasta' label="${tool.name} on ${on_string}: Protein sequences - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.faa">
<filter>outputs and 'faa' in outputs</filter>
</data>
<data name='busco_fna' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences" from_work_dir="busco_output.fna">
<data name='busco_faa_generic' format='fasta' label="${tool.name} on ${on_string}: Protein sequences - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.faa">
<filter>(outputs and 'faa' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
<data name='busco_fna' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.fna">
<filter>outputs and 'fna' in outputs</filter>
</data>
<data name='busco_fna_generic' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.fna">
<filter>(outputs and 'fna' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
</data>
</outputs>


<tests>
<!-- <test expect_num_outputs="6">
Expand Down Expand Up @@ -418,12 +486,36 @@ busco
<has_text text="BUSCO analysis done"/>
</assert_stdout>
</test> -->
<test expect_num_outputs="5">
<test expect_num_outputs="2">
<param name="test" value="true"/>
<param name="input" value="bacilli_odb_test.fasta"/>
<conditional name="lineage">
<param name="lineage_mode" value="auto_detect"/>
<param name="auto_lineage" value="--auto-lineage-prok"/>
</conditional>
<conditional name="busco_mode">
<param name="mode" value="geno"/>
</conditional>
<param name="outputs" value="short_summary"/>
<output name="busco_sum">
<assert_contents>
<has_text text="Gene predictor used: prodigal"/>
</assert_contents>
</output>
<output name="busco_table">
<assert_contents>
<has_text text="# BUSCO version is: @TOOL_VERSION@"/>
<has_text text="The lineage dataset is: bacilli_odb10"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="10">
<param name="test" value="true"/>
<param name="input" value="genome.fa"/>
<conditional name="lineage">
<param name="lineage_mode" value="auto_detect"/>
<param name="auto_lineage" value="--auto-lineage"/>
<param name="generic_results" value="true"/>
</conditional>
<conditional name="busco_mode">
<param name="mode" value="geno"/>
Expand All @@ -434,23 +526,50 @@ busco
<param name="outputs" value="short_summary,missing,image,gff"/>
<output name="busco_sum">
<assert_contents>
<has_text text="Gene predictor used: metaeuk"/>
<has_text text="Gene predictor used: prodigal"/>
</assert_contents>
</output>
<output name="busco_sum_generic">
<assert_contents>
<has_text text="Gene predictor used: prodigal"/>
</assert_contents>
</output>
<output name="busco_table">
<assert_contents>
<has_text text="# BUSCO version is: @TOOL_VERSION@"/>
<has_text text="The lineage dataset is: eukaryota_odb10"/>
<has_text text="The lineage dataset is: "/>
</assert_contents>
</output>
<output name="busco_missing">
<output name="busco_table_generic">
<assert_contents>
<has_text text="# BUSCO version is: @TOOL_VERSION@"/>
<has_text text="The lineage dataset is: eukaryota_odb10"/>
<has_text text="The lineage dataset is: "/>
</assert_contents>
</output>
<output name="busco_missing">
<assert_contents>
<has_text text="# BUSCO version is: @TOOL_VERSION@"/>
<has_text text="The lineage dataset is: "/>
</assert_contents>
</output>
<output name="busco_missing_generic">
<assert_contents>
<has_text text="# BUSCO version is: @TOOL_VERSION@"/>
<has_text text="The lineage dataset is: "/>
</assert_contents>
</output>
<output name="summary_image" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/>
<output name="busco_gff" file="genome_results_metaeuk_auto/out.gff" compare="diff"/>
<output name="summary_image_generic" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/>
<output name="busco_gff">
<assert_contents>
<has_text text="##gff-version 3"/>
</assert_contents>
</output>
<output name="busco_gff_generic">
<assert_contents>
<has_text text="##gff-version 3"/>
</assert_contents>
</output>
<assert_stdout>
<has_text text="BUSCO analysis done"/>
</assert_stdout>
Expand Down
2 changes: 1 addition & 1 deletion tools/busco/macros.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0"?>
<macros>
<token name="@TOOL_VERSION@">5.8.0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@VERSION_SUFFIX@">2</token>

<xml name="citations">
<citations>
Expand Down
6 changes: 6 additions & 0 deletions tools/busco/test-data/bacilli_odb_test.fasta

Large diffs are not rendered by default.

Loading