diff --git a/meta/bio/salmon_tximport/meta.yaml b/meta/bio/salmon_tximport/meta.yaml index 6ac09a58b77..00b05a75182 100644 --- a/meta/bio/salmon_tximport/meta.yaml +++ b/meta/bio/salmon_tximport/meta.yaml @@ -18,3 +18,16 @@ description: > +----------------+----------+------------------------------------------------+ authors: - Thibault Dayris +pathvars: + default: + - results + - resources + - logs + custom: + transcriptome_sequence: Path to FASTA file with transcriptome sequence + genome_sequence: Path to FASTA file with genome sequence + genome_annotation: Path to GTF file with genome annotation + reads_r1: Path/pattern for FASTQ files with R1 reads + reads_r2: Path/pattern for FASTQ files with R2 reads + tx_to_gene: Path to TSV table with transcript_id gene_id + per: Pattern for sample identifiers, e.g. ``"{sample}"`` diff --git a/meta/bio/salmon_tximport/meta_wrapper.smk b/meta/bio/salmon_tximport/meta_wrapper.smk index 107bfd53e9f..e33f2d4316b 100644 --- a/meta/bio/salmon_tximport/meta_wrapper.smk +++ b/meta/bio/salmon_tximport/meta_wrapper.smk @@ -1,24 +1,24 @@ rule salmon_decoy_sequences: input: - transcriptome="resources/transcriptome.fasta", - genome="resources/genome.fasta", + transcriptome="", + genome="", output: - gentrome=temp("resources/gentrome.fasta"), - decoys=temp("resources/decoys.txt"), + gentrome=temp("/gentrome.fasta"), + decoys=temp("/decoys.txt"), threads: 1 log: - "decoys.log", + "/decoys.log", wrapper: "master/bio/salmon/decoys" rule salmon_index_gentrome: input: - sequences="resources/gentrome.fasta", - decoys="resources/decoys.txt", + sequences="/gentrome.fasta", + decoys="/decoys.txt", output: multiext( - "salmon/transcriptome_index/", + "/salmon_gentrome_index/", "complete_ref_lens.bin", "ctable.bin", "ctg_offsets.bin", @@ -37,7 +37,7 @@ rule salmon_index_gentrome: ), cache: True log: - "logs/salmon/transcriptome_index.log", + "/salmon/gentrome_index.log", threads: 2 params: # optional parameters @@ -48,9 +48,10 @@ rule salmon_index_gentrome: rule salmon_quant_reads: input: - r="reads/{sample}.fastq.gz", + r1="", + r2="", index=multiext( - "salmon/transcriptome_index/", + "/salmon_gentrome_index/", "complete_ref_lens.bin", "ctable.bin", "ctg_offsets.bin", @@ -67,17 +68,17 @@ rule salmon_quant_reads: "seq.bin", "versionInfo.json", ), - gtf="resources/annotation.gtf", + gtf="", output: - quant=temp("pseudo_mapping/{sample}/quant.sf"), - quant_gene=temp("pseudo_mapping/{sample}/quant.genes.sf"), - lib=temp("pseudo_mapping/{sample}/lib_format_counts.json"), - aux_info=temp(directory("pseudo_mapping/{sample}/aux_info")), - cmd_info=temp("pseudo_mapping/{sample}/cmd_info.json"), - libparams=temp(directory("pseudo_mapping/{sample}/libParams")), - logs=temp(directory("pseudo_mapping/{sample}/logs")), + quant=temp("/pseudo_mapping//quant.sf"), + quant_gene=temp("/pseudo_mapping//quant.genes.sf"), + lib=temp("/pseudo_mapping//lib_format_counts.json"), + aux_info=temp(directory("/pseudo_mapping//aux_info")), + cmd_info=temp("/pseudo_mapping//cmd_info.json"), + libparams=temp(directory("/pseudo_mapping//libParams")), + logs=temp(directory("/pseudo_mapping//logs")), log: - "logs/salmon/{sample}.log", + "/salmon/.log", params: # optional parameters libtype="A", @@ -90,28 +91,35 @@ rule salmon_quant_reads: rule tximport: input: quant=expand( - "pseudo_mapping/{sample}/quant.sf", sample=["S1", "S2", "S3", "S4"] + "/pseudo_mapping/{sample}/quant.sf", + sample=["S1", "S2"], ), lib=expand( - "pseudo_mapping/{sample}/lib_format_counts.json", - sample=["S1", "S2", "S3", "S4"], + "/pseudo_mapping/{sample}/lib_format_counts.json", + sample=["S1", "S2"], ), aux_info=expand( - "pseudo_mapping/{sample}/aux_info", sample=["S1", "S2", "S3", "S4"] + "/pseudo_mapping/{sample}/aux_info", + sample=["S1", "S2"], ), cmd_info=expand( - "pseudo_mapping/{sample}/cmd_info.json", sample=["S1", "S2", "S3", "S4"] + "/pseudo_mapping/{sample}/cmd_info.json", + sample=["S1", "S2"], ), libparams=expand( - "pseudo_mapping/{sample}/libParams", sample=["S1", "S2", "S3", "S4"] + "/pseudo_mapping/{sample}/libParams", + sample=["S1", "S2"], ), - logs=expand("pseudo_mapping/{sample}/logs", sample=["S1", "S2", "S3", "S4"]), - tx_to_gene="resources/tx2gene.tsv", + logs=expand( + "/pseudo_mapping/{sample}/logs", + sample=["S1", "S2"], + ), + tx_to_gene="", output: - txi="tximport/SummarizedExperimentObject.RDS", + txi="/tximport/SummarizedExperimentObject.RDS", params: extra="type='salmon'", log: - "logs/tximport.log" + "/tximport.log", wrapper: "master/bio/tximport" diff --git a/meta/bio/salmon_tximport/test/Snakefile b/meta/bio/salmon_tximport/test/Snakefile index ef1032f04e9..cd81b946d99 100644 --- a/meta/bio/salmon_tximport/test/Snakefile +++ b/meta/bio/salmon_tximport/test/Snakefile @@ -2,7 +2,15 @@ from snakemake.utils import min_version min_version("9.13.1") + +configfile: "config.yaml" + + module salmon_tximport: - meta_wrapper: "master/meta/bio/salmon_tximport" + meta_wrapper: + "master/meta/bio/salmon_tximport" + config: + config + -use rule * from salmon_tximport \ No newline at end of file +use rule * from salmon_tximport diff --git a/meta/bio/salmon_tximport/test/config.yaml b/meta/bio/salmon_tximport/test/config.yaml new file mode 100644 index 00000000000..016d6dd25d3 --- /dev/null +++ b/meta/bio/salmon_tximport/test/config.yaml @@ -0,0 +1,9 @@ +pathvars: + transcriptome_sequence: "resources/transcriptome.fasta" + genome_sequence: "resources/genome.fasta" + genome_annotation: "resources/annotation.gtf" + tx_to_gene: "resources/tx2gene.tsv" + per: "{sample}" + reads_r1: "reads/{sample}_R1.fq.gz" + reads_r2: "reads/{sample}_R2.fq.gz" + resources: "resources" diff --git a/meta/bio/salmon_tximport/test/reads/S1.fastq.gz b/meta/bio/salmon_tximport/test/reads/S1.fastq.gz deleted file mode 100644 index 2c40df8fe83..00000000000 Binary files a/meta/bio/salmon_tximport/test/reads/S1.fastq.gz and /dev/null differ diff --git a/meta/bio/salmon_tximport/test/reads/S1_R1.fq.gz b/meta/bio/salmon_tximport/test/reads/S1_R1.fq.gz new file mode 100644 index 00000000000..e0e861674c9 Binary files /dev/null and b/meta/bio/salmon_tximport/test/reads/S1_R1.fq.gz differ diff --git a/meta/bio/salmon_tximport/test/reads/S1_R2.fq.gz b/meta/bio/salmon_tximport/test/reads/S1_R2.fq.gz new file mode 100644 index 00000000000..65429390686 Binary files /dev/null and b/meta/bio/salmon_tximport/test/reads/S1_R2.fq.gz differ diff --git a/meta/bio/salmon_tximport/test/reads/S2.fastq.gz b/meta/bio/salmon_tximport/test/reads/S2.fastq.gz deleted file mode 100644 index dcd2929ce54..00000000000 Binary files a/meta/bio/salmon_tximport/test/reads/S2.fastq.gz and /dev/null differ diff --git a/meta/bio/salmon_tximport/test/reads/S2_R1.fq.gz b/meta/bio/salmon_tximport/test/reads/S2_R1.fq.gz new file mode 100644 index 00000000000..b63eda9ded6 Binary files /dev/null and b/meta/bio/salmon_tximport/test/reads/S2_R1.fq.gz differ diff --git a/meta/bio/salmon_tximport/test/reads/S2_R2.fq.gz b/meta/bio/salmon_tximport/test/reads/S2_R2.fq.gz new file mode 100644 index 00000000000..41b877deecb Binary files /dev/null and b/meta/bio/salmon_tximport/test/reads/S2_R2.fq.gz differ diff --git a/meta/bio/salmon_tximport/test/reads/S3.fastq.gz b/meta/bio/salmon_tximport/test/reads/S3.fastq.gz deleted file mode 100644 index d6c64d7aaf7..00000000000 Binary files a/meta/bio/salmon_tximport/test/reads/S3.fastq.gz and /dev/null differ diff --git a/meta/bio/salmon_tximport/test/reads/S4.fastq.gz b/meta/bio/salmon_tximport/test/reads/S4.fastq.gz deleted file mode 100644 index eb36adcd09b..00000000000 Binary files a/meta/bio/salmon_tximport/test/reads/S4.fastq.gz and /dev/null differ diff --git a/meta/bio/salmon_tximport/test/resources/genome.fasta.fai b/meta/bio/salmon_tximport/test/resources/genome.fasta.fai new file mode 100644 index 00000000000..ade3417e50c --- /dev/null +++ b/meta/bio/salmon_tximport/test/resources/genome.fasta.fai @@ -0,0 +1 @@ +chromosome1 927 13 79 80 diff --git a/meta/bio/salmon_tximport/test/resources/transcriptome.fasta b/meta/bio/salmon_tximport/test/resources/transcriptome.fasta index 42493ff0db2..7e6c881c561 100644 --- a/meta/bio/salmon_tximport/test/resources/transcriptome.fasta +++ b/meta/bio/salmon_tximport/test/resources/transcriptome.fasta @@ -1,4 +1,5 @@ >transcript1 -CCAGGCTCGTATGTACATCGCTCCTCAAAGTGAGGGGAAGTCCTAAT +CAGGCTCGTATGTACATCGCTCCTCAAAGTGAGGGGAAGTCCTAATCGG >transcript2 -CATCTCCCTGAGTCGGTTTAAAGATTGTCTTGTATGCGTACTCTTGATAGGTAACCCG +CAGGCTCGTATGTACATCGCTCCTCAAAGTGAGGGGAAGTCCTAATCGGATACCGATTGGACTCTTGAGT +ACCGGCCCTGT diff --git a/meta/bio/salmon_tximport/test/resources/transcriptome.fasta.fai b/meta/bio/salmon_tximport/test/resources/transcriptome.fasta.fai new file mode 100644 index 00000000000..e09f09b0557 --- /dev/null +++ b/meta/bio/salmon_tximport/test/resources/transcriptome.fasta.fai @@ -0,0 +1,2 @@ +transcript1 49 13 49 50 +transcript2 81 76 70 71 diff --git a/test_wrappers.py b/test_wrappers.py index 6b4a32f16f7..3c322c2e734 100644 --- a/test_wrappers.py +++ b/test_wrappers.py @@ -1101,7 +1101,7 @@ def test_salmon_tximport_meta(run): "--cores", "2", "--use-conda", - "tximport/SummarizedExperimentObject.RDS", + "results/tximport/SummarizedExperimentObject.RDS", ], )