From dd49c54cbe396b61fa8c407f5f1a32a0441d2661 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Wed, 25 Jan 2023 13:58:43 -0500 Subject: [PATCH] Adding new rule to get transcript counts --- config/cluster.json | 4 ++++ config/config.json | 1 + workflow/Snakefile | 9 ++++++++- workflow/rules/quant.smk | 28 ++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 workflow/rules/quant.smk diff --git a/config/cluster.json b/config/cluster.json index 9322f59..8b3b67c 100644 --- a/config/cluster.json +++ b/config/cluster.json @@ -31,5 +31,9 @@ "nanostat": { "threads": "4", "mem": "16g" + }, + "nanocount": { + "threads": "2", + "mem": "16g" } } diff --git a/config/config.json b/config/config.json index 7056c94..67358bb 100644 --- a/config/config.json +++ b/config/config.json @@ -1,4 +1,5 @@ { "options": { + "nanocount_em_iter": "100" } } diff --git a/workflow/Snakefile b/workflow/Snakefile index 9982035..9dc71e9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -120,6 +120,12 @@ rule all: join(workpath, "{name}", "bams", "{name}.sorted.genome.metrics"), name=samples ), + # NanoCount to quantify isoform counts + # @imported from `rule nanocount` in rules/quant.smk + expand( + join(workpath, "{name}", "counts", "{name}.nanocount.transcripts.tsv"), + name=samples + ), # MultiQC to aggregate results into a report # @imported from `rule multiqc` in rules/qc.smk join(workpath, "reports", "multiqc_report.html"), @@ -130,4 +136,5 @@ include: join("rules", "common.smk") include: join("rules", "download.smk") include: join("rules", "trim.smk") include: join("rules", "qc.smk") -include: join("rules", "map.smk") \ No newline at end of file +include: join("rules", "map.smk") +include: join("rules", "quant.smk") \ No newline at end of file diff --git a/workflow/rules/quant.smk b/workflow/rules/quant.smk new file mode 100644 index 0000000..4a89f2e --- /dev/null +++ b/workflow/rules/quant.smk @@ -0,0 +1,28 @@ +# Quantification rules +rule nanocount: + """ + Quality-control step to gather various statistics from a BAM file. + This tool is supported by MultiQC. + Github: https://github.com/a-slide/NanoCount + @Input: + Sorted Transcriptomic BAM file (scatter) + @Output: + NanoCount metrics file + """ + input: + bam = join(workpath, "{name}", "bams", "{name}.sorted.transcriptome.bam"), + output: + counts = join(workpath, "{name}", "counts", "{name}.nanocount.transcripts.tsv"), + params: + rname = "nanocount", + em_iter = config['options']['nanocount_em_iter'], + conda: depending(join(workpath, config['conda']['modr']), use_conda) + container: depending(config['images']['modr'], use_singularity) + threads: int(allocated("threads", "nanocount", cluster)) + shell: """ + NanoCount \\ + -i {input.bam} \\ + --extra_tx_info \\ + -e {params.em_iter} \\ + -o {output.counts} + """