From dd49c54cbe396b61fa8c407f5f1a32a0441d2661 Mon Sep 17 00:00:00 2001
From: skchronicles <kuhnsa3@gmail.com>
Date: Wed, 25 Jan 2023 13:58:43 -0500
Subject: [PATCH] Adding new rule to get transcript counts

---
 config/cluster.json      |  4 ++++
 config/config.json       |  1 +
 workflow/Snakefile       |  9 ++++++++-
 workflow/rules/quant.smk | 28 ++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 workflow/rules/quant.smk

diff --git a/config/cluster.json b/config/cluster.json
index 9322f59..8b3b67c 100644
--- a/config/cluster.json
+++ b/config/cluster.json
@@ -31,5 +31,9 @@
     "nanostat": {
         "threads": "4",
         "mem": "16g"
+    },
+    "nanocount": {
+        "threads": "2",
+        "mem": "16g"
     }
 }
diff --git a/config/config.json b/config/config.json
index 7056c94..67358bb 100644
--- a/config/config.json
+++ b/config/config.json
@@ -1,4 +1,5 @@
 {
     "options": {
+        "nanocount_em_iter": "100"
     }
 }
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 9982035..9dc71e9 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -120,6 +120,12 @@ rule all:
             join(workpath, "{name}", "bams", "{name}.sorted.genome.metrics"),
             name=samples
         ),
+        # NanoCount to quantify isoform counts
+        # @imported from `rule nanocount` in rules/quant.smk
+        expand(
+            join(workpath, "{name}", "counts", "{name}.nanocount.transcripts.tsv"),
+            name=samples
+        ),
         # MultiQC to aggregate results into a report
         # @imported from `rule multiqc` in rules/qc.smk
         join(workpath, "reports", "multiqc_report.html"),
@@ -130,4 +136,5 @@ include: join("rules", "common.smk")
 include: join("rules", "download.smk")
 include: join("rules", "trim.smk")
 include: join("rules", "qc.smk")
-include: join("rules", "map.smk")
\ No newline at end of file
+include: join("rules", "map.smk")
+include: join("rules", "quant.smk")
\ No newline at end of file
diff --git a/workflow/rules/quant.smk b/workflow/rules/quant.smk
new file mode 100644
index 0000000..4a89f2e
--- /dev/null
+++ b/workflow/rules/quant.smk
@@ -0,0 +1,28 @@
+# Quantification rules
+rule nanocount:
+    """
+    Quality-control step to gather various statistics from a BAM file.
+    This tool is supported by MultiQC.
+    Github: https://github.com/a-slide/NanoCount
+    @Input:
+        Sorted Transcriptomic BAM file (scatter)
+    @Output:
+        NanoCount metrics file
+    """
+    input:
+        bam = join(workpath, "{name}", "bams", "{name}.sorted.transcriptome.bam"),
+    output:
+        counts = join(workpath, "{name}", "counts", "{name}.nanocount.transcripts.tsv"),
+    params:
+        rname   = "nanocount",
+        em_iter = config['options']['nanocount_em_iter'],
+    conda: depending(join(workpath, config['conda']['modr']), use_conda)
+    container: depending(config['images']['modr'], use_singularity)
+    threads: int(allocated("threads", "nanocount", cluster))
+    shell: """
+    NanoCount \\
+        -i {input.bam} \\
+        --extra_tx_info \\
+        -e {params.em_iter} \\
+        -o {output.counts} 
+    """