From e7bc9342fbfb4869f01ce3072e6ebfd961c0d031 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 8 Dec 2023 15:45:07 -0500
Subject: [PATCH] feat: added python cli for SV

---
 bin/add_gt_lofreq.sh                  |  65 +++
 bin/ascat.R                           |  47 +++
 bin/assess_significance.R             |  59 +++
 bin/freec_paired.pl                   |  43 ++
 bin/lofreq_convert.sh                 |  32 ++
 bin/makeGraph.R                       | 134 ++++++
 bin/make_freec_genome.pl              |  39 ++
 bin/run_sequenza.R                    |  65 +++
 bin/sexscript.sh                      |  18 +
 main.nf                               |   5 +-
 modules/local/copynumber.nf           | 425 +++++++++++++++----
 modules/local/structural_variant.nf   | 242 +++++++++--
 modules/local/variant_calling.nf      | 576 ++++++++++++++++++++------
 nextflow.config                       |  42 +-
 subworkflows/local/workflows.nf       | 487 +++++++++++++++-------
 subworkflows/local/workflows_tonly.nf | 242 +++++++----
 wgs-seek_bak                          | 501 ----------------------
 17 files changed, 2087 insertions(+), 935 deletions(-)
 create mode 100755 bin/add_gt_lofreq.sh
 create mode 100644 bin/ascat.R
 create mode 100644 bin/assess_significance.R
 create mode 100644 bin/freec_paired.pl
 create mode 100755 bin/lofreq_convert.sh
 create mode 100644 bin/makeGraph.R
 create mode 100644 bin/make_freec_genome.pl
 create mode 100644 bin/run_sequenza.R
 create mode 100755 bin/sexscript.sh
 delete mode 100755 wgs-seek_bak

diff --git a/bin/add_gt_lofreq.sh b/bin/add_gt_lofreq.sh
new file mode 100755
index 0000000..ac2b373
--- /dev/null
+++ b/bin/add_gt_lofreq.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#Author: Dr Charles Foster http://github.com/charlesfoster
+
+i_flag=''
+g_flag=''
+h_flag=''
+n_flag=''
+o_flag=''
+
+print_usage() {
+	printf "Usage: bash add_artificial_genotype.sh -i in.vcf.gz [-g genotype] [-n sample_name] -o out.vcf.gz\n"
+	printf "Genotype defaults to 1 if not specified\n"
+	printf "Sample name gussed from infile name if not specified\n"
+}
+
+if [[ $# -eq 0 ]] ; then
+	print_usage
+    exit 1
+fi
+
+while getopts 'i:g:n:ho:' flag; do
+  case "${flag}" in
+		i) IN="${OPTARG}" ;;
+		n) NAME="${OPTARG}" ;;
+		g) GENOTYPE="${OPTARG}" ;;
+    h) print_usage
+           exit 1 ;;
+    o) OUT="${OPTARG}" ;;
+    *) print_usage
+       exit 1 ;;
+  esac
+done
+
+if [ ! -f ${IN} ]; then
+    printf "\nError: input file not found\n"
+		print_usage
+		exit 1
+fi
+
+if [ -z "${GENOTYPE}" ]
+  then
+    printf "\nNo genotype specified: setting to 1"
+		GENOTYPE=1
+fi
+
+if [ -z "${NAME}" ]
+  then
+		NAME=$(basename ${IN} | cut -f1 -d ".")
+    printf "\nNo name specified: guessed it to be ${NAME}"
+fi
+
+if [ -z "${OUT}" ]
+  then
+		OUT=$(echo ${IN} | sed "s/.vcf.gz/_withGT.vcf.gz/")
+    printf "\nNo outfile specified: setting to ${OUT}\n"
+fi
+
+gunzip -kc ${IN} | \
+sed -e '6i##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' \
+-e "s|FILTER\tINFO|FILTER\tINFO\tFORMAT\t${NAME}|g" | \
+awk -F'\t' -v genotype=${GENOTYPE} -v OFS="\t" '/^[^#]/{ $9 = "GT"; $10 = genotype }1' | \
+bgzip -c > ${OUT}
+tabix -p vcf ${OUT}
+printf "VCF with artificial genotype written to ${OUT}\n"
+exit 0
diff --git a/bin/ascat.R b/bin/ascat.R
new file mode 100644
index 0000000..334afe2
--- /dev/null
+++ b/bin/ascat.R
@@ -0,0 +1,47 @@
+#!/usr/bin/env Rscript
+
+#######
+#
+#R Script for ASCAT
+#
+######
+library(ASCAT)
+library(RColorBrewer)
+
+args = commandArgs(trailingOnly=TRUE)
+tumor_bam=args[1]
+normal_bam=args[2]
+
+genome="hg38"
+
+ascat.prepareHTS(
+  tumourseqfile = tumor_bam,
+  normalseqfile = normal_bam,
+  tumourname = tumor_name,
+  normalname = normal_name,
+  allelecounter_exe = "/PATH/TO/allelecounter",
+  alleles.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT/G1000_alleles",
+  loci.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT_G1000_loci",
+  nthreads = 10
+  gender = "XX",
+  genomeVersion = genome,
+  nthreads = 8,
+  tumourLogR_file = "Tumor_LogR.txt",
+  tumourBAF_file = "Tumor_BAF.txt",
+  normalLogR_file = "Germline_LogR.txt",
+  normalBAF_file = "Germline_BAF.txt")
+
+ascat.bc = ascat.loadData(Tumor_LogR_file = "Tumor_LogR.txt", Tumor_BAF_file = "Tumor_BAF.txt", 
+    Germline_LogR_file = "Germline_LogR.txt", Germline_BAF_file = "Germline_BAF.txt", gender = 'XX', genomeVersion = "hg19")
+
+ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_")
+ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = "GC_file.txt", replictimingfile = "RT_file.txt")
+ascat.plotRawData(ascat.bc, img.prefix = "After_correction_")
+ascat.bc = ascat.aspcf(ascat.bc)
+ascat.plotSegmentedData(ascat.bc)
+ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = T)
+QC = ascat.metrics(ascat.bc,ascat.output)
+save(ascat.bc, ascat.output, QC, file = 'ASCAT_objects.Rdata')
+
+
+#####
\ No newline at end of file
diff --git a/bin/assess_significance.R b/bin/assess_significance.R
new file mode 100644
index 0000000..7f7216c
--- /dev/null
+++ b/bin/assess_significance.R
@@ -0,0 +1,59 @@
+#!/usr/bin/env Rscript
+
+library(rtracklayer)
+
+args <- commandArgs()
+
+dataTable <-read.table(args[5], header=TRUE);
+ratio<-data.frame(dataTable)
+
+dataTable <- read.table(args[4], header=FALSE)
+cnvs<- data.frame(dataTable)
+
+ratio$Ratio[which(ratio$Ratio==-1)]=NA
+
+cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3]))  
+ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio)
+
+overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed)
+normals <- setdiff(ratio.bed,cnvs.bed)
+normals <- subsetByOverlaps(ratio.bed,normals)
+
+#mu <- mean(score(normals),na.rm=TRUE)
+#sigma<- sd(score(normals),na.rm=TRUE)
+
+#hist(score(normals),n=500,xlim=c(0,2))
+#hist(log(score(normals)),n=500,xlim=c(-1,1))
+
+#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
+#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
+#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
+
+#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
+#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
+#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
+
+numberOfCol=length(cnvs)
+wscore=c()
+kscore=c()
+for (i in c(1:length(cnvs[,1]))) {
+values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i]))
+resultw <- class(try(wilcox.test(values,score(normals)), silent = TRUE))
+ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values,score(normals))$p.value))
+resultks <- class(try(ks.test(values,score(normals)), silent = TRUE))
+ifelse(resultks == "try-error",kscore <- c(kscore, "NA"),kscore <- c(kscore, ks.test(values,score(normals))$p.value))
+}
+cnvs = cbind(cnvs, as.numeric(wscore), as.numeric(kscore))
+
+if (numberOfCol==7) {
+  names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
+}
+if (numberOfCol==9) {
+  names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
+}
+if (numberOfCol==11) {
+  names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
+}
+write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F)
+
+
diff --git a/bin/freec_paired.pl b/bin/freec_paired.pl
new file mode 100644
index 0000000..161e24e
--- /dev/null
+++ b/bin/freec_paired.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+use strict;
+use List::Util 'shuffle';
+
+#INPUT
+
+#my $mergedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix...
+#open C, ">$mergedmaf";
+
+my $outfile = $ARGV[0] . '/freec_genome_config.txt';
+my $chrLenFile = $ARGV[1];
+my $chrFiles = $ARGV[2];
+my $tumormateFile = $ARGV[3];
+my $controlmateFile = $ARGV[4];
+my $makePileup = $ARGV[5];
+my $fastaFile = $ARGV[6];
+my $SNPfile = $ARGV[7];
+
+open C, ">$outfile";
+
+print C '[general]' . "\n\n";
+
+print C "BedGraphOutput = TRUE\ndegree = 3\nforceGCcontentNormalization = 0\nminCNAlength = 1\nreadCountThreshold = 10\n";
+print C "chrLenFile = $chrLenFile\n";
+print C "ploidy = 2,3,4\nbreakPointThreshold = 0.8\nwindow = 50000\n";
+print C "chrFiles = $chrFiles\n";
+print C "minimalSubclonePresence = 20\nmaxThreads = 8\n";
+print C "outputDir = $ARGV[0]\n\n";
+ 
+print C '[sample]' . "\n\n";
+ 
+print C "mateFile = $tumormateFile\n";
+print C "inputFormat = BAM\nmateOrientation = FR\n\n";
+
+print C '[BAF]' . "\n\n";
+
+print C "mateFile = $controlmateFile\n";
+print C "inputFormat = BAM\nmateOrientation = FR\n\n";
+ 
+print C "makePileup = $makePileup\n";
+print C "fastaFile = $fastaFile\n";
+print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n";
+print C "SNPfile = $SNPfile";
diff --git a/bin/lofreq_convert.sh b/bin/lofreq_convert.sh
new file mode 100755
index 0000000..1d5edda
--- /dev/null
+++ b/bin/lofreq_convert.sh
@@ -0,0 +1,32 @@
+INPUT_FILE="$1"
+TUMOR_NAME="$2"
+export TUMOR_NAME
+
+zcat "${INPUT_FILE}" \
+  | awk '($4=="A" || $4 == "C" || $4=="T" || $4=="G" || /^\#/)' \
+  | perl -ne 'print if /^#|^(chr)*[\dX]+\s.+/' \
+  | perl -ne 's/AF=/VAF=/g;s/ID=AF/ID=VAF/;print;' \
+  | perl -ne '
+              # Add 2 new rows to the description and 2 new columns in the header
+              if(/^#/){
+                if(/##INFO=<ID=DP,.+\n/){
+                  $DP=$&;
+                };
+                $DP =~ s/INFO/FORMAT/;
+                print $DP if /min_dp/;
+                if(/##INFO=<ID=DP4,.+\n/){
+                  $DP4=$&;
+                };
+                $DP4 =~ s/INFO/FORMAT/;
+                print $DP4 if /min_dp/;
+                if(/^#CHROM.+/){
+                  s/$&/$&\tFORMAT\t$ENV{'TUMOR_NAME'}/;
+                };
+                print;
+              }
+              # For each feature, add FORMAT column with descriptors and populate TUMOUR column with depth, reads counts
+              else{
+                my @data = map { chomp; [ split /=|;/ ] } $_;
+                $NEW_ROW = "$_\tDP:DP4\t$data[0][1]:$data[0][7]\n";
+                print $NEW_ROW;
+              }'
\ No newline at end of file
diff --git a/bin/makeGraph.R b/bin/makeGraph.R
new file mode 100644
index 0000000..9938587
--- /dev/null
+++ b/bin/makeGraph.R
@@ -0,0 +1,134 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs()
+
+dataTable <-read.table(args[5], header=TRUE);
+
+ratio<-data.frame(dataTable)
+ploidy <- type.convert(args[4])
+
+
+png(filename = paste(args[5],".log2.png",sep = ""), width = 1180, height = 1180,
+    units = "px", pointsize = 20, bg = "white", res = NA)
+plot(1:10)
+op <- par(mfrow = c(5,5))
+
+for (i in c(1:22,'X','Y')) {
+	tt <- which(ratio$Chromosome==i)
+	if (length(tt)>0) {
+	 plot(ratio$Start[tt],log2(ratio$Ratio[tt]),xlab = paste ("position, chr",i),ylab = "normalized copy number profile (log2)",pch = ".",col = colors()[88])
+	 tt <- which(ratio$Chromosome==i  & ratio$CopyNumber>ploidy )
+	 points(ratio$Start[tt],log2(ratio$Ratio[tt]),pch = ".",col = colors()[136])
+	
+	
+	tt <- which(ratio$Chromosome==i  & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
+	 points(ratio$Start[tt],log2(ratio$Ratio[tt]),pch = ".",col = colors()[461])
+	 tt <- which(ratio$Chromosome==i)
+	 
+	 #UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
+	 #points(ratio$Start[tt],log2(ratio$CopyNumber[tt]/ploidy), pch = ".", col = colors()[24],cex=4)
+	 
+	}
+	tt <- which(ratio$Chromosome==i)
+	
+	#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
+	#points(ratio$Start[tt],log2(ratio$MedianRatio[tt]), pch = ".", col = colors()[463],cex=4)
+	
+}
+
+dev.off()
+
+
+png(filename = paste(args[5],".png",sep = ""), width = 1180, height = 1180,
+    units = "px", pointsize = 20, bg = "white", res = NA)
+plot(1:10)
+op <- par(mfrow = c(5,5))
+
+maxLevelToPlot <- 3
+for (i in c(1:length(ratio$Ratio))) {
+	if (ratio$Ratio[i]>maxLevelToPlot) {
+		ratio$Ratio[i]=maxLevelToPlot;
+	}
+}
+
+
+for (i in c(1:22,'X','Y')) {
+	tt <- which(ratio$Chromosome==i)
+	if (length(tt)>0) {
+	 plot(ratio$Start[tt],ratio$Ratio[tt]*ploidy,ylim = c(0,maxLevelToPlot*ploidy),xlab = paste ("position, chr",i),ylab = "normalized copy number profile",pch = ".",col = colors()[88])
+	 tt <- which(ratio$Chromosome==i  & ratio$CopyNumber>ploidy )
+	 points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
+	
+	tt <- which(ratio$Chromosome==i  & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy)	
+	points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
+	 
+	tt <- which(ratio$Chromosome==i  & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
+	 points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461])
+	 tt <- which(ratio$Chromosome==i)
+	 
+	 #UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
+	 #points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
+	 
+	}
+	tt <- which(ratio$Chromosome==i)
+	
+	#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
+	#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
+	
+}
+
+dev.off()
+
+
+
+
+if (length(args)>5) {
+	dataTable <-read.table(args[6], header=TRUE);
+	BAF<-data.frame(dataTable)
+
+	png(filename = paste(args[6],".png",sep = ""), width = 1180, height = 1180,
+	    units = "px", pointsize = 20, bg = "white", res = NA)
+	plot(1:10)
+	op <- par(mfrow = c(5,5))
+
+	for (i in c(1:22,'X','Y')) {
+	    tt <- which(BAF$Chromosome==i)
+	    if (length(tt)>0){
+		lBAF <-BAF[tt,]
+		plot(lBAF$Position,lBAF$BAF,ylim = c(-0.1,1.1),xlab = paste ("position, chr",i),ylab = "BAF",pch = ".",col = colors()[1])
+
+		tt <- which(lBAF$A==0.5)		
+		points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92])
+		tt <- which(lBAF$A!=0.5 & lBAF$A>=0)
+		points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62])
+		tt <- 1
+		pres <- 1
+
+		if (length(lBAF$A)>4) {
+			for (j in c(2:(length(lBAF$A)-pres-1))) {
+				if (lBAF$A[j]==lBAF$A[j+pres]) {	
+					tt[length(tt)+1] <- j 
+				}
+			}
+			points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4)
+			points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4)	
+		}
+
+		tt <- 1
+		pres <- 1
+		if (length(lBAF$FittedA)>4) {
+			for (j in c(2:(length(lBAF$FittedA)-pres-1))) {
+				if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) {	
+					tt[length(tt)+1] <- j 
+				}
+			}
+			points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4)
+			points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4)	
+		}
+
+	   }
+
+	}
+	dev.off()
+
+}
diff --git a/bin/make_freec_genome.pl b/bin/make_freec_genome.pl
new file mode 100644
index 0000000..28aec4c
--- /dev/null
+++ b/bin/make_freec_genome.pl
@@ -0,0 +1,39 @@
+#!/usr/bin/perl -w
+use strict;
+use List::Util 'shuffle';
+
+#INPUT
+
+#my $mergedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix...
+#open C, ">$mergedmaf";
+
+my $outfile = $ARGV[0] . '/freec_genome_config.txt';
+my $chrLenFile = $ARGV[1];
+my $chrFiles = $ARGV[2];
+my $tumormateFile = $ARGV[3];
+my $makePileup = $ARGV[4];
+my $fastaFile = $ARGV[5];
+my $SNPfile = $ARGV[6];
+
+open C, ">$outfile";
+
+print C '[general]' . "\n\n";
+
+print C "BedGraphOutput = TRUE\ndegree = 3\nforceGCcontentNormalization = 0\nminCNAlength = 1\nreadCountThreshold = 10\n";
+print C "chrLenFile = $chrLenFile\n";
+print C "ploidy = 2,3,4\nbreakPointThreshold = 0.8\nwindow = 50000\n";
+print C "chrFiles = $chrFiles\n";
+print C "minimalSubclonePresence = 20\nmaxThreads = 4\n";
+print C "outputDir = $ARGV[0]\n\n";
+ 
+print C '[sample]' . "\n\n";
+ 
+print C "mateFile = $tumormateFile\n";
+print C "inputFormat = BAM\nmateOrientation = FR\n\n";
+
+print C '[BAF]' . "\n\n";
+
+print C "makePileup = $makePileup\n";
+print C "fastaFile = $fastaFile\n";
+print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n";
+print C "SNPfile = $SNPfile";
diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R
new file mode 100644
index 0000000..3758b8a
--- /dev/null
+++ b/bin/run_sequenza.R
@@ -0,0 +1,65 @@
+
+args = commandArgs(trailingOnly=TRUE)
+
+library(sequenza)
+Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 10)
+
+
+if (length(args)==0) {
+    stop("Must provide a seqz file")
+} else {
+    seqz_file = args[1]
+    if (! file.exists(seqz_file)) {
+    stop(paste0("Can't find this SEQZ output file: ", seqz_file))
+    }
+}
+
+if (length(args) > 1) {
+    out_dir = args[2]
+} else {
+    out_dir = dirname(seqz_file)
+}
+
+if (length(args) > 2) {
+    sampleid = args[3]
+} else {
+    sampleid = gsub(".seqz.gz","",basename(seqz_file))
+}
+
+if (length(args) > 3) {
+    n_cores = as.numeric(args[4])
+} else {
+   n_cores = 1
+}
+
+if (is.na(n_cores)) {
+    n_cores = 1
+}
+
+
+print(paste0("Using ",n_cores," cores..."))
+date()
+print("Extracting seqz data...")
+seqzdata <- sequenza.extract(seqz_file, min.reads = 30, min.reads.normal= 10, parallel=n_cores)
+
+date()
+print("Fitting model...")
+CP.example <- sequenza.fit(seqzdata, mc.cores = n_cores)
+
+## Sequenza.extract seems to fail if too few mutations
+num_mutations <- unlist(lapply(seqzdata$mutations, nrow))
+chrom_list <- names(num_mutations)[num_mutations > 3]
+## But it might actually be segments, idk?
+#num_segments <- unlist(lapply(seqzdata$segments, nrow))
+#chrom_list <- names(num_mutations)[num_segments > 1]
+
+not_included <- setdiff(names(num_mutations), chrom_list)
+print("Printing results...")
+if (length(not_included) > 0) {
+    print("Excluding these chromosomes because of too few mutations...")
+    print(not_included)
+}
+sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir, chromosome.list=chrom_list)
+
+date()
+print("Done")
diff --git a/bin/sexscript.sh b/bin/sexscript.sh
new file mode 100755
index 0000000..8e94555
--- /dev/null
+++ b/bin/sexscript.sh
@@ -0,0 +1,18 @@
+ #!/bin/bash
+
+ x_map=$(samtools idxstats $1 | grep "chrX\s" | cut -f 3)
+ x_len=$(samtools idxstats $1 | grep "chrX\s" | cut -f 2)
+ x_cov=$(echo "scale=3; ${x_map}/${x_len}" | bc)
+
+ y_map=$(samtools idxstats $1 | grep "chrY\s" | cut -f 3)
+ y_len=$(samtools idxstats $1 | grep "chrY\s" | cut -f 2)
+ y_cov=$(echo "scale=3; ${y_map}/${y_len}" | bc)
+
+ ratio=$(echo "scale=2; ${x_cov}/${y_cov}" | bc)
+ echo $ratio
+
+ if (( $(echo "$ratio > 4.00" | bc -l) )); then
+     echo "F"
+ else
+     echo "M"
+ fi
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 31a753d..4babf3a 100644
--- a/main.nf
+++ b/main.nf
@@ -22,8 +22,9 @@ include {INPUT_PIPE;TRIM_ALIGN_PIPE;
     GERMLINE_PIPE;VARIANTCALL_PIPE;INPUT_BAMVC_PIPE;SV_PIPE;
     QC_PIPE} from "./subworkflows/local/workflows.nf"
 
-include {INPUT_TONLY_PIPE;TRIM_ALIGN_TONLY_PIPE;
-    VARIANT_TONLY_PIPE;INPUT_TONLY_BAMVC_PIPE;QC_TONLY_PIPE} from "./subworkflows/local/workflows_tonly.nf"
+include {INPUT_TONLY; INPUT_TONLY_BAM;
+    ALIGN_TONLY;
+    VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
 
 
 log.info """\
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 7618bc4..b15a8c2 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -1,39 +1,231 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
+GENOMEREF=file(params.genomes[params.genome].genome)
+SEQUENZAGC=file(params.genomes[params.genome].SEQUENZAGC)
+SEQUENZA_SCRIPT=params.script_sequenza
+
+if (params.genome=="mm10"){
+FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS)
+FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS)
+FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP)
+FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS)
+FREECTARGETS=file(params.genomes[params.genome].intervals)
+FREECSCRIPT = params.script_freec
+FREECPAIR_SCRIPT = params.script_freecpaired
+FREECSIGNIFICANCE = params.freec_significance
+FREECPLOT = params.freec_plot
+}
 
-GENOME=file(params.genome)
 GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz"
 GCPROFILE='/data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp'
 DIPLODREG='/data/SCLC-BRAINMETS/cn/copy_number/DiploidRegions.38.bed.gz'
 ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
 DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
 HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'
-cobalt="/data/SCLC-BRAINMETS/cn/cobalt_v1.14.jar"
-amber="/data/SCLC-BRAINMETS/cn/amber-3.9.jar"
-purple="/data/SCLC-BRAINMETS/cn/purple_v3.8.2.jar"
-
 
-date = new Date().format( 'yyyyMMdd' )
+//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
+//ascatR=
 
 outdir=file(params.output)
 
+//mm10 Paired-Sequenza, FREEC-tumor only 
+process seqz_sequenza_bychr {
+    label 'process_low'
 
-log.info """\
-         W G S S E E K   P I P E L I N E    
-         =============================
-         Samples:  ${params.sample_sheet}
-         """
-         .stripIndent()
+    input:
+        tuple val(pairid), val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), val(chr)
+
+    output:
+        tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz")
+
+    script:
+    """
+        sequenza-utils bam2seqz \
+        -gc ${SEQUENZAGC} \
+        -F $GENOMEREF \
+        -C ${chr} \
+        -n ${normal} \
+        -t ${tumor} | gzip > "${tumorname}_${normalname}_${chr}.seqz.gz"
+
+    """
+
+    stub:
+    """
+    touch "${tumorname}_${normalname}_${chr}.seqz.gz"
+    """
+}
+
+
+
+
+process sequenza {
+    label 'process_highcpu'
+    publishDir("${outdir}/cnv/sequenza", mode: 'copy')
+
+    input:
+        tuple val(pairid), path(seqz)
+
+    output:
+        tuple val(pairid), 
+        path("${pairid}_alternative_solutions.txt"),
+        path("${pairid}_alternative_fit.pdf"),
+        path("${pairid}_model_fit.pdf"),
+        path("${pairid}_confints_CP.txt"),
+        path("${pairid}_CN_bars.pdf"),
+        path("${pairid}_genome_view.pdf"),
+        path("${pairid}_chromosome_view.pdf"),
+        path("${pairid}_mutations.txt"),
+        path("${pairid}_segments.txt"),
+        path("${pairid}_CP_contours.pdf"),
+        path("${pairid}_sequenza_cp_table.RData"),
+        path("${pairid}_chromosome_depths.pdf"),
+        path("${pairid}_gc_plots.pdf"),
+        path("${pairid}_sequenza_extract.RData")
+
+    //samtools mpileup ${tumor} -f $GENOMEREF -Q 20 |gzip > ${tumorname}.mpileup.gz
+    //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz
+    //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz
+
+    shell: 
+    '''
+    
+    zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\
+    sequenza-utils seqz_binning \
+        -w 100 \
+        -s - > !{pairid}.bin100.seqz
+
+    Rscript !{SEQUENZA_SCRIPT} \
+        !{pairid}.bin100.seqz \
+        . \
+        !{pairid} \
+        !{task.cpus}
+
+    '''
+
+    stub: 
+    
+    """
+    touch "${pairid}_alternative_solutions.txt" 
+    touch "${pairid}_alternative_fit.pdf" 
+    touch "${pairid}_model_fit.pdf"
+    touch "${pairid}_confints_CP.txt"
+    touch "${pairid}_CN_bars.pdf"
+    touch "${pairid}_genome_view.pdf"
+    touch "${pairid}_chromosome_view.pdf"
+    touch "${pairid}_mutations.txt"
+    touch "${pairid}_segments.txt"
+    touch "${pairid}_CP_contours.pdf"
+    touch "${pairid}_sequenza_cp_table.RData"
+    touch "${pairid}_chromosome_depths.pdf"
+    touch "${pairid}_gc_plots.pdf"
+    touch "${pairid}_sequenza_extract.RData"
+
+    """
+
+}
+
+process freec_paired {
+    label 'process_highcpu'
+    publishDir("${outdir}/cnv/freec", mode: 'copy')
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
+
+    shell: """
+
+    perl $FREECPAIR_SCRIPT \
+        . \
+        $FREECLENGTHS \
+        $FREECCHROMS \
+        ${tumor} \
+        ${normal} \
+        $FREECPILEUP \
+        $GENOMEREF \
+        $FREECSNPS \
+        $FREECTARGETS
+
+    freec -conf freec_genome_config.txt
+
+    cat $FREECSIGNIFICANCE | \
+        R --slave \
+        --args ${tumor}_CNVs \
+        ${tumor}_ratio.txt
+
+    cat $FREECPLOT | \
+        R --slave \
+        --args 2 \
+        ${tumorname}_vs_${normalname}.bam_ratio.txt \
+        ${tumorname}_vs_${normalname}.bam_BAF.txt
+
+    """      
+
+    stub:
+    """
+    touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt  
+    touch ${tumorname}_vs_${normalname}.bam_ratio.txt 
+    touch ${tumorname}_vs_${normalname}.bam_BAF.txt
+    touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png
+    touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png
+
+    """
+}
+
+
+process freec {
+    label 'process_mid'
+    publishDir("${outdir}/cnv/freec", mode: 'copy')
 
-//Purple 
-process amber {
-    module=["java/12.0.1","R/3.6.3"]
-    publishDir("${outdir}/amber", mode: 'copy')
     input:
-        tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bam.bai")
+        tuple val(tumorname), path(tumor), path(tumorbai)
+
+    shell: """
+
+    perl $FREECSCRIPT \
+        . \
+        $FREECLENGTHS \
+        $FREECCHROMS \
+        ${tumor} \
+        $FREECPILEUP \
+        $GENOMEREF \
+        $FREECSNPS \
+        $FREECTARGETS
+
+    freec -conf freec_genome_config.txt
+
+    cat $FREECSIGNIFICANCE | \
+        R --slave \
+        --args ${tumor}_CNVs \
+        ${tumor}_ratio.txt
+
+    cat $FREECPLOT | \
+        R --slave \
+        --args 2 \
+        ${tumor}_ratio.txt \
+        ${tumor}_BAF.txt
+
+    """      
+
+    stub:
+    """
+    touch ${tumor}_CNVs.p.value.txt  
+    touch ${tumor}_ratio.txt 
+    touch ${tumor}_BAF.txt 
+    touch ${tumor}_ratio.txt.log2.png
+    touch ${tumor}_ratio.txt.png
+
+    """
+}
+
+
+process amber_tonly {
+    label 'process_mid'
+    publishDir("${outdir}/cnv/amber", mode: 'copy')
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai)
+       
 
     output:
-        tuple val(samplename), path("${samplename}_amber")
+        tuple val(tumorname), path("${tumorname}_amber")
         //path("${samplename}.amber.baf.tsv.gz"),
         //path("${samplename}.amber.baf.pcf"),
         //path("${samplename}.amber.qc")
@@ -43,10 +235,10 @@ process amber {
 
     """
 
-    java -Xmx32G -cp $amber com.hartwig.hmftools.amber.AmberApplication \
-    -tumor ${samplename} -tumor_bam ${samplename}.bqsr.bam \
-    -output_dir ${samplename}_amber \
-    -threads 16 \
+    java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
+    -tumor ${tumorname} -tumor_bam ${tumor} \
+    -output_dir ${tumorname}_amber \
+    -threads $task.cpus \
     -ref_genome_version V38 \
     -loci $GERMLINEHET
 
@@ -55,20 +247,57 @@ process amber {
     stub:
 
     """
-    mkdir ${samplename}_amber
-    touch ${samplename}_amber/${samplename}.amber.baf.tsv.gz ${samplename}_amber/${samplename}.amber.baf.pcf ${samplename}_amber/${samplename}.amber.qc 
+    mkdir ${tumorname}_amber
+    touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc 
     """
 }
 
-process cobalt {
-    module=["java/12.0.1","R/3.6.3"]
-    publishDir("${outdir}/cobalt", mode: 'copy')
+process amber_tn {
+    label 'process_mid'
+    publishDir("${outdir}/cnv/amber", mode: 'copy')
+    
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai)
+
+    output:
+        tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber")
+        //path("${samplename}.amber.baf.tsv.gz"),
+        //path("${samplename}.amber.baf.pcf"),
+        //path("${samplename}.amber.qc")
+        //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor
+
+    script:
+
+    """
+
+    java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
+    -tumor ${tumorname} -tumor_bam ${tumor} \
+    -reference ${normalname} -reference_bam ${normal} \
+    -output_dir ${tumorname}_vs_${normalname}_amber \
+    -threads $task.cpus \
+    -ref_genome_version V38 \
+    -loci $GERMLINEHET
+
+    """
+
+    stub:
+
+    """
+    mkdir ${tumorname}_vs_${normalname}_amber
+    touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc 
+    """
+}
+
+process cobalt_tonly {
+    label "process_mid"
+    publishDir("${outdir}/cnv/cobalt", mode: 'copy')
 
     input:
-        tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bam.bai")
+        tuple val(tumorname), path(tumor), path(tumorbai)
 
     output:
-        tuple val(samplename), path("${samplename}_cobalt")
+        tuple val(tumorname), path("${tumorname}_cobalt")
         //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), 
         //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
         //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
@@ -77,88 +306,142 @@ process cobalt {
 
     """
 
-    java -jar -Xmx8G $cobalt \
-    -tumor ${samplename} -tumor_bam ${samplename}.bqsr.bam \
-    -output_dir ${samplename}_cobalt \
-    -threads 16 \
+    java -jar -Xmx8G cobalt.jar \
+    -tumor ${tumorname} -tumor_bam ${tumor} \
+    -output_dir ${tumorname}_cobalt \
+    -threads $task.cpus \
     -tumor_only_diploid_bed $DIPLODREG \
-    -gc_profile /data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp 
+    -gc_profile $GCPROFILE
 
     """
 
     stub:
 
     """
-    mkdir ${samplename}_cobalt
-    touch ${samplename}_cobalt/${samplename}.cobalt.ratio.tsv.gz ${samplename}_cobalt/${samplename}.cobalt.ratio.pcf ${samplename}_cobalt/${samplename}.cobalt.gc.median.tsv
+    mkdir ${tumorname}_cobalt
+    touch ${tumorname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_cobalt/${tumorname}.cobalt.gc.median.tsv
     """
 }
 
-process purple {
-    module=["java/12.0.1","R/3.6.3"]
-
-    publishDir("${outdir}/purple", mode: 'copy')
+process cobalt_tn {
+    label "process_mid"
+    publishDir("${outdir}/cnv/cobalt", mode: 'copy')
 
     input:
-        tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz")
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai)
 
     output:
-        tuple val(samplename), path("${samplename}")
+        tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt")
+        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), 
+        //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
+        //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
 
     script:
 
     """
 
-    java -jar $purple \
-    -tumor ${samplename} \
+    java -jar -Xmx8G cobalt.jar \
+    -tumor ${tumorname} -tumor_bam ${tumorname} \
+    -reference ${normalname} -reference_bam ${normal} \
+    -output_dir ${tumorname}_vs_${normalname}_cobalt \
+    -threads $task.cpus \
+    -tumor_only_diploid_bed $DIPLODREG \
+    -gc_profile $GCPROFILE
+
+    """
+
+    stub:
+
+    """
+    mkdir ${tumorname}_vs_${normalname}_cobalt
+    touch ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.gc.median.tsv
+    """
+}
+
+
+process purple {
+    label 'process_mid'
+    publishDir("${outdir}/cnv/purple", mode: 'copy')
+
+    input:
+        tuple val(tumorname),
+        path(cobaltin), 
+        path(amberin),
+        path(somaticvcf),
+        path(somaticvcfindex)
+
+    output:
+        tuple val(tumorname), path("${tumorname}")
+
+    script:
+
+    """
+    java -jar purple.jar \
+    -tumor ${tumorname} \
     -amber ${amberin} \
     -cobalt ${cobaltin} \
     -gc_profile $GCPROFILE \
     -ref_genome_version 38 \
     -ref_genome $GENOME \
     -ensembl_data_dir $ENSEMBLCACHE \
-    -somatic_vcf ${samplename}.tonly.final.mut2.vcf.gz \
+    -somatic_vcf ${somaticvcf} \
     -driver_gene_panel $DRIVERS \
     -somatic_hotspots $HOTSPOTS \
-    -output_dir ${samplename}
-
+    -output_dir ${tumorname}
     """
 
     stub:
 
     """
-    mkdir ${samplename}
-    touch ${samplename}/${samplename}.purple.cnv.somatic.tsv ${samplename}/${samplename}.purple.cnv.gene.tsv ${samplename}/${samplename}.driver.catalog.somatic.tsv
+    mkdir ${tumorname}
+    touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
     """
 
 }
 
+/*
+process ascat_tn {
+    module=["java/12.0.1","R/3.6.3"]
 
-//Workflow
-workflow {
-    sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true).view()
-                       .ifEmpty { "sample sheet not found" }
-                       .splitCsv(header:true, sep: "\t", strip:true)
-                       .map { row -> tuple(
-                        row.ID,
-                        row.bam,
-                        row.vcf
-                       )
-                                  }
+    publishDir("${outdir}/purple", mode: 'copy')
 
+    input:
+        tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz")
 
-    baminput=sample_sheet
-           .map{samplename,bam,vcf-> tuple(samplename,file(bam),file("${bam}.bai"))}
+    output:
+        tuple val(samplename), path("${samplename}")
 
-    somaticinput=sample_sheet
-           .map{samplename,bam,vcf-> tuple(samplename,file(vcf))}
+    script:
+
+    """
+    Rscript ${ascatR}
+    """
 
-    cobalt(baminput)
-    amber(baminput) 
+    stub:
+
+    """
+    touch ${prefix}.after_correction.gc_rt.test.tumour.germline.png
+    touch ${prefix}.after_correction.gc_rt.test.tumour.tumour.png
+    touch ${prefix}.before_correction.test.tumour.germline.png
+    touch ${prefix}.before_correction.test.tumour.tumour.png
+    touch ${prefix}.cnvs.txt
+    touch ${prefix}.metrics.txt
+    touch ${prefix}.normal_alleleFrequencies_chr21.txt
+    touch ${prefix}.normal_alleleFrequencies_chr22.txt
+    touch ${prefix}.purityploidy.txt
+    touch ${prefix}.segments.txt
+    touch ${prefix}.tumour.ASPCF.png
+    touch ${prefix}.tumour.sunrise.png
+    touch ${prefix}.tumour_alleleFrequencies_chr21.txt
+    touch ${prefix}.tumour_alleleFrequencies_chr22.txt
+    touch ${prefix}.tumour_normalBAF.txt
+    touch ${prefix}.tumour_normalLogR.txt
+    touch ${prefix}.tumour_tumourBAF.txt
+    touch ${prefix}.tumour_tumourLogR.txt
+        """
 
-    pre1=cobalt.out.join(amber.out)
-    pre1.join(somaticinput).view()| purple
 }
-    
 
+*/
 
diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf
index 7a92b3f..d807753 100644
--- a/modules/local/structural_variant.nf
+++ b/modules/local/structural_variant.nf
@@ -1,23 +1,18 @@
-GENOME=file(params.genome)
-GENOMEDICT=file(params.genomedict)
-WGSREGION=file(params.wgsregion) 
-MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
-SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz
-KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
-DBSNP_INDEL=file(params.dbsnp_indel) //dbsnp_138.hg38.vcf.gz"
-BWAGENOME=file(params.bwagenome)
-GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
-PON=file(params.pon) 
+GENOMEREF=file(params.genomes[params.genome].genome)
+GENOME=params.genome
+BWAGENOME=file(params.genomes[params.genome].bwagenome)
+DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
 
 outdir=file(params.output)
 
 
 process svaba_somatic {
+    label 'process_highcpu'
+
     publishDir(path: "${outdir}/SV/svaba", mode: 'copy') 
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai)
+        tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
     
     output:
         tuple val(tumorname),
@@ -46,14 +41,26 @@ process svaba_somatic {
     """
     touch "${tumor.simpleName}.bps.txt.gz"
     touch "${tumor.simpleName}.contigs.bam"
-    touch "${tumor.simpleName}.discordants.txt.gz"
+    touch "${tumor.simpleName}.discordant.txt.gz"
     touch "${tumor.simpleName}.alignments.txt.gz"
+    touch "${tumor.simpleName}.svaba.germline.indel.vcf"
+    touch "${tumor.simpleName}.svaba.germline.sv.vcf"
+    touch "${tumor.simpleName}.svaba.somatic.indel.vcf"
+    touch "${tumor.simpleName}.svaba.somatic.sv.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.germline.indel.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.germline.sv.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf"
+    touch "${tumor.simpleName}.log"
+
     """
 }
 
 
+
 process manta_somatic {
-    //https://github.com/illumina/manta
+
+    label 'process_highcpu'
     publishDir(path: "${outdir}/SV/manta", mode: 'copy') 
 
     input:
@@ -71,10 +78,10 @@ process manta_somatic {
     mkdir -p wd
 
     configManta.py \
-    --normalBam=${normal} \
-    --tumorBam=${tumor} \
-    --referenceFasta=$GENOME \
-    --runDir=wd
+        --normalBam=${normal} \
+        --tumorBam=${tumor} \
+        --referenceFasta=$GENOMEREF \
+        --runDir=wd
 
     wd/runWorkflow.py -m local -j 10 -g 10
     
@@ -96,30 +103,215 @@ process manta_somatic {
 }
 
 
-process annotsv_tn{
+process annotsv_tn {
      //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files
      //Requires bedtools,bcftools
 
+    module = ['annotsv/3.3.1']
     publishDir(path: "${outdir}/SV/annotated", mode: 'copy') 
 
     input:
-        tuple val(tumorname), path(somaticvcf)
+        tuple val(tumorname), path(somaticvcf), val(sv)
 
     output:
         tuple val(tumorname),
-        path("${tumor.simpleName}.tsv"),
-        path("${tumor.simpleName}.unannotated.tsv"),
+        path("${sv}/${tumorname}.tsv"),
+        path("${sv}/${tumorname}.unannotated.tsv")
 
 
     script:
     """
-    AnnotSV -SVinputFile ${tumor}.vcf.gz -SVinputInfo 1 -outputFile ${tumor.simpleName} -outputDir .
+    mkdir ${sv}
+
+    AnnotSV -SVinputFile ${somaticvcf} \
+    -genomeBuild $GENOME \
+    -SVinputInfo 1 -outputFile ${tumorname} \
+    -outputDir ${sv}
+
+    """
+
+    stub:
+    """
+    mkdir ${sv}
+
+    touch "${sv}/${tumorname}.tsv"
+    touch "${sv}/${tumorname}.unannotated.tsv"
+    """
+}
+
+
+process manta_tonly {
+    label 'process_highcpu'
+    publishDir(path: "${outdir}/SV/manta_tonly", mode: 'copy') 
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai)
+    
+    output:
+        tuple val(tumorname),
+        path("${tumor.simpleName}.candidateSV.vcf.gz"),
+        path("${tumor.simpleName}.candidateSmallIndels.vcf.gz"),
+        path("${tumor.simpleName}.tumorSV.vcf.gz")
+
+
+    script:
+    """
+    mkdir -p wd
+
+    configManta.py \
+        --tumorBam=${tumor} \
+        --referenceFasta=$GENOMEREF \
+        --runDir=wd
+
+    wd/runWorkflow.py -m local -j 10 -g 10
+    
+    mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz
+    mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz
+    mv wd/results/variants/tumorSV.vcf.gz ${tumor.simpleName}.tumorSV.vcf.gz
+
     """
 
     stub:
+    
     """
-    touch "${tumor.simpleName}.tsv"
-    touch "${tumor.simpleName}.unannotated.tsv"
+    touch ${tumor.simpleName}.candidateSV.vcf.gz
+    touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz
+    touch ${tumor.simpleName}.tumorSV.vcf.gz
+
     """
 }
 
+
+
+process svaba_tonly {
+    label 'process_highcpu'
+    publishDir(path: "${outdir}/SV/svaba_tonly", mode: 'copy') 
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai)
+    
+    output:
+        tuple val(tumorname),
+        path("${tumor.simpleName}.bps.txt.gz"),
+        path("${tumor.simpleName}.contigs.bam"),
+        path("${tumor.simpleName}.discordant.txt.gz"),
+        path("${tumor.simpleName}.alignments.txt.gz"),
+        path("${tumor.simpleName}.svaba.indel.vcf"),
+        path("${tumor.simpleName}.svaba.sv.vcf"),
+        path("${tumor.simpleName}.svaba.unfiltered.indel.vcf"),
+        path("${tumor.simpleName}.svaba.unfiltered.sv.vcf"),
+        path("${tumor.simpleName}.log")
+
+
+    script:
+    """
+    svaba run -t ${tumor} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME
+    """
+
+    stub:
+    
+    """
+    touch "${tumor.simpleName}.bps.txt.gz"
+    touch "${tumor.simpleName}.contigs.bam"
+    touch "${tumor.simpleName}.discordant.txt.gz"
+    touch "${tumor.simpleName}.alignments.txt.gz"
+    touch "${tumor.simpleName}.svaba.indel.vcf"
+    touch "${tumor.simpleName}.svaba.sv.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.indel.vcf"
+    touch "${tumor.simpleName}.svaba.unfiltered.sv.vcf"
+    touch "${tumor.simpleName}.log"
+
+    """
+}
+
+
+process gunzip {
+
+    input:
+        tuple val(tumorname), 
+        path(vcf), val(sv)
+
+    output:
+        tuple val(tumorname), 
+        path("${tumorname}.tumorSV.vcf"), val(sv)
+
+    script:
+    """
+    gunzip ${vcf} > ${tumorname}.tumorSV.vcf
+    """
+
+    stub:
+
+    """
+    touch ${tumorname}.tumorSV.vcf
+    """
+
+}
+
+
+process survivor_sv {
+    module = ['survivor']
+    publishDir(path: "${outdir}/SV/survivor", mode: 'copy') 
+
+    input:
+        tuple val(tumorname), 
+        path(vcfs),val(svs)
+
+    output:
+        tuple val(tumorname),
+        path("${tumorname}_merged.vcf"),
+        val("survivor")
+
+
+    script:
+    strin = vcfs.join("\\n")
+
+    """
+    echo -e '$strin' > filelistin
+    SURVIVOR merge filelistin 1000 2 1 1 1 30 ${tumorname}_merged.vcf
+    """
+
+    stub:
+    strin = vcfs.join("\\n")
+    """
+    echo -e '$strin' > filelistin
+    touch "${tumorname}_merged.vcf"
+    """
+}
+
+
+process annotsv_tonly {
+     //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files
+     //Requires bedtools,bcftools
+
+    module = ['annotsv/3.3.1']
+    publishDir(path: "${outdir}/SV/annotated_tonly", mode: 'copy') 
+
+    input:
+        tuple val(tumorname), path(somaticvcf), val(sv)
+
+    output:
+        tuple val(tumorname),
+        path("${sv}/${tumorname}.tsv"),
+        path("${sv}/${tumorname}.unannotated.tsv")
+
+
+    script:
+    """
+    mkdir ${sv}
+
+    AnnotSV -SVinputFile ${somaticvcf} \
+    -genomeBuild $GENOME \
+    -SVinputInfo 1 -outputFile ${tumorname} \
+    -outputDir ${sv}
+
+    """
+
+    stub:
+    """
+    mkdir ${sv}
+
+    touch "${sv}/${tumorname}.tsv"
+    touch "${sv}/${tumorname}.unannotated.tsv"
+    """
+}
\ No newline at end of file
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 807fa77..d7b3bf9 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -1,99 +1,106 @@
-GENOME=file(params.genome)
-GENOMEDICT=file(params.genomedict)
-WGSREGION=file(params.wgsregion) 
-MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
-SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz
-KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
-GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
-PON=file(params.pon) 
-VEP_CACHEDIR=file(params.vep_cache)
+GENOMEREF=file(params.genomes[params.genome].genome)
+GENOMEFAI=file(params.genomes[params.genome].genomefai)
+GENOMEDICT=file(params.genomes[params.genome].genomedict)
+KGPGERMLINE=params.genomes[params.genome].kgp 
+DBSNP=file(params.genomes[params.genome].dbsnp) 
+GNOMADGERMLINE=params.genomes[params.genome].gnomad 
+PON=file(params.genomes[params.genome].pon) 
+VEPCACHEDIR=file(params.genomes[params.genome].vepcache)
+VEPSPECIES=params.genomes[params.genome].vepspecies
+VEPBUILD=params.genomes[params.genome].vepbuild
+SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest
+GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
+LOFREQ_CONVERT=params.lofreq_convert
 
 //Output
 outdir=file(params.output)
 
 
 process mutect2 {
+    label 'process_somaticcaller'
+
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), 
+        path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats")
 
     
     script:
-
     """
     gatk Mutect2 \
-    --reference ${GENOME} \
+    --reference $GENOMEREF \
     --intervals ${bed} \
     --input ${tumor} \
     --input ${normal} \
     --normal-sample ${normal.simpleName} \
     --tumor-sample ${tumor.simpleName} \
-    --germline-resource ${GNOMAD} \
+    $GNOMADGERMLINE \
     --panel-of-normals ${PON} \
-    --output ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz \
-    --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \
+    --output ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz \
+    --f1r2-tar-gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz \
     --independent-mates
     """
 
     stub:
-    
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats
     """
 }
 
 
 process pileup_paired_t {
+    label 'process_highmem'
+
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table")
 
     script:
-
     """
     gatk --java-options -Xmx48g GetPileupSummaries \
         -I ${tumor} \
-        -V ${KGP} \
+        -V $KGPGERMLINE \
         -L ${bed} \
         -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table 
 
     """
 
     stub:
-    
     """
     touch ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table
-
     """
 
 }
 
 
 process pileup_paired_n {
+    label 'process_highmem'
+
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.normal.pileup.table")
 
     script:
-
     """
     gatk --java-options -Xmx48g GetPileupSummaries \
         -I ${normal} \
-        -V ${KGP} \
+        -V $KGPGERMLINE \
         -L ${bed} \
         -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table 
 
@@ -108,10 +115,10 @@ process pileup_paired_n {
 
 
 process contamination_paired {
+    label 'process_highmem'
 
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
-    //OUTPUT THE CONTAMINATION TABLE
     input:
         tuple val(tumorname),
         path(tumor_pileups),
@@ -132,11 +139,11 @@ process contamination_paired {
 
     """
     gatk GatherPileupSummaries \
-    --sequence-dictionary ${GENOMEDICT} \
+    --sequence-dictionary $GENOMEDICT \
     -I ${alltumor} -O ${tumorname}_allpileups.table
     
     gatk GatherPileupSummaries \
-    --sequence-dictionary ${GENOMEDICT} \
+    --sequence-dictionary $GENOMEDICT \
     -I ${allnormal} -O ${tumorname}_normal.allpileups.table
 
     gatk CalculateContamination \
@@ -161,7 +168,10 @@ process contamination_paired {
     
 }
 
+
 process learnreadorientationmodel {
+    label 'process_highmem'
+
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
@@ -186,8 +196,9 @@ process learnreadorientationmodel {
 }
 
 
-
 process mergemut2stats {
+    label 'process_low'
+
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
@@ -213,68 +224,70 @@ process mergemut2stats {
 }
 
 
-
-
 process mutect2filter {
+    label 'process_mid'
         
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
-        tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
+        tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), 
+        path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
+    
     output:
-        tuple val(sample), path("${sample}.mut2.marked.vcf.gz"), 
-        path("${sample}.mut2.norm.vcf.gz"), path("${sample}.marked.vcf.gz.filteringStats.tsv")
+        tuple val("${tumor}_vs_${normal}"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"),
+        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
-    //Include the stats and  concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
     mut2in = mutvcfs.join(" -I ")
 
-
     """
-    gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz 
-    gatk IndexFeatureFile -I ${sample}.concat.vcf.gz 
+    gatk SortVcf -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz --CREATE_INDEX
     gatk FilterMutectCalls \
-        -R ${GENOME} \
-        -V ${sample}.concat.vcf.gz \
+        -R $GENOMEREF \
+        -V ${tumor}_vs_${normal}.concat.vcf.gz \
         --ob-priors ${obs} \
         --contamination-table ${tumorcontamination} \
         --stats ${stats} \
-        -O ${sample}.mut2.marked.vcf.gz
-
-
+        -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz
     gatk SelectVariants \
-        -R ${GENOME} \
-        --variant ${sample}.mut2.marked.vcf.gz \
+        -R $GENOMEREF \
+        --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \
         --exclude-filtered \
-        --output ${sample}.mut2.final.vcf.gz
+        --output ${tumor}_vs_${normal}.mut2.final.vcf.gz
     
-    bcftools sort ${sample}.mut2.final.vcf.gz -@ 16 -Oz |\
-    bcftools norm --threads $task.cpus --check-ref s -f $GENOME -O v |\
+    bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\
+    bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
-        sed '/^\$/d' > ${sample}.mut2.norm.vcf.gz
+        sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz
+    bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     """
 
     stub:
     """
-    touch ${sample}.mut2.marked.vcf.gz
-    touch ${sample}.mut2.norm.vcf.gz
-    touch ${sample}.marked.vcf.gz.filteringStats.tsv
+    touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz ${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi
+    touch ${tumor}_vs_${normal}.mut2.norm.vcf.gz ${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi
+    touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv
     """
 
 
 }
 
 
-
 process strelka_tn {
-    
+    label 'process_highcpu'
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai), 
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi")
     
     script:
 
@@ -285,62 +298,80 @@ process strelka_tn {
     tabix ${bed}.gz
 
     configureStrelkaSomaticWorkflow.py \
-        --ref=${GENOME} \
+        --ref=$GENOMEREF \
         --tumor=${tumor} \
         --normal=${normal} \
         --runDir=wd \
         --callRegions ${bed}.gz
     ./wd/runWorkflow.py -m local -j $task.cpus
-    mv wd/results/variants/somatic.snvs.vcf.gz  ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
-    mv wd/results/variants/somatic.indels.vcf.gz  ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
+    mv wd/results/variants/somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz
+    mv wd/results/variants/somatic.indels.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz
+    
+    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname 
+    
+    bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \
+        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz 
+    bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \
+        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz 
 
+    bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
+    bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz 
+  
     """
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
 
     """
 
-
 }
 
 
 process vardict_tn {
-    
+    label 'process_highcpu'
+
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.vardict.vcf")
-    
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz")
+    //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and 
+    //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” 
     script:
 
     """
-    VarDict -G $GENOME \
+    bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed}
+
+    VarDict -G $GENOMEREF \
         -f 0.01 \
         --nosv \
-        -b "${tumor}|${normal}" \
+        -b "${tumor}|${normal}" --fisher \
         -t -Q 20 -c 1 -S 2 -E 3 \
-        ${bed} --th 6 \
-        | teststrandbias.R \
+        --th $task.cpus temp_${bed} \
         | var2vcf_paired.pl \
             -N "${tumor}|${normal}" \
             -Q 20 \
             -d 10 \
             -v 6 \
             -S \
-            -f 0.05 >  ${tumor.simpleName}_${bed.simpleName}.vardict.vcf
+            -f 0.05 >  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
+
+    printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname 
+    
+    bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \
+        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
+
 
     """
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.vardict.vcf
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
 
     """
 
@@ -348,37 +379,185 @@ process vardict_tn {
 }
 
 
-
 process varscan_tn {
+    label 'process_somaticcaller'
+
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), 
         val(normalname), path(normal), path(normalbai), path(bed),
-        path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table)
+        val(tumor1),
+        path(tumorpileup), path(normalpileup), 
+        path(tumor_con_table), path(normal_con_table)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.varscan.vcf")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz")
     
     shell:
-
     '''
     tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l)
     normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l)
+    dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}"
     varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity"
-    varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{rumor} !{tumor.simpleName}_{bed.simpleName}.vardict.vcf $varscan_opts --mpileup 1 
+    varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1"
+    eval "$varscan_cmd"
+
+    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
+        | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz
+    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
+        | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz
+
+    gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
+    -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \
+    -R  !{GENOMEREF} -SD !{GENOMEDICT} \
+    -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
+
+    printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname 
+    
+    bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \
+       | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz
+
     '''
 
+    stub:
+    """
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz
+    """
+
+}
+
+
+process octopus_tn {
+    //label 'process_highcpu' Using separate docker for octopus
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai), 
+        val(normalname), path(normal), path(normalbai), path(bed)
+    
+
+    output:
+        tuple val("${tumorname}_vs_${normalname}"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")
+    
+    script:
+
+    """
+    octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \
+    -C cancer \
+    --annotations AC AD DP -t ${bed} \
+    --threads $task.cpus \
+    $GERMLINE_FOREST \
+    $SOMATIC_FOREST \
+    --target-working-memory 64Gb \
+    -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz
+    """
+
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.varscan.vcf
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz"
+    """
+
+} 
+
+
+process lofreq_tn {
+    label 'process_somaticcaller' 
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai), 
+        val(normalname), path(normal), path(normalbai), path(bed)
+    
+
+    output:
     
+        tuple val(tumorname), val(normalname), 
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi")
+    
+    script:
+
     """
+    lofreq somatic -f $GENOMEREF -n ${normal} -t ${tumor} \
+        -d $DBSNP \
+        --threads $task.cpus \
+        -l ${bed} \
+        --call-indels \
+        -o ${tumorname}_vs_${normalname}_${bed.simpleName}_
+    
+    bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \
+        ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
+        ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz
+
+    $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \
+        -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz
+    
+    bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead
+    
+    sed 's/^##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">/##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref\\/fwd, ref\\/rev, var\\/fwd, var\\/rev">/' temphead > temphead1
+    bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\
+        bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
+
+    bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
+
+    """
+
+    stub:
+    
+    """
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi"
+    
+    """
+} 
 
-}
 
-process combineVariants {
 
+process muse_tn {
+    label 'process_somaticcaller' 
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai), 
+        val(normalname), path(normal), path(normalbai)
+    
+
+    output:
+        tuple val(tumorname), val(normalname),
+        path("${tumorname}_vs_${normalname}.vcf.gz")
+    
+    script:
+
+    """
+    MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal
+    MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \
+        -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G
+    
+    bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz
+
+    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname 
+    
+    bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \
+        | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
+
+    """
+
+    stub:
+    
+    """
+    touch "${tumorname}_vs_${normalname}.vcf.gz"
+    """
+
+} 
+
+
+process combineVariants {
+    label 'process_highmem'
     publishDir(path: "${outdir}/vcfs/", mode: 'copy')
 
     input:
@@ -386,22 +565,79 @@ process combineVariants {
     
     output:
         tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        path("${vc}/${sample}.${vc}.norm.vcf.gz"),
+        path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
+
+    script:
+    vcfin = inputvcf.join(" -I ")
+    
+    """
+    mkdir ${vc}
+    gatk --java-options "-Xmx48g" SortVcf \
+        -O ${sample}.${vc}.marked.vcf.gz \
+        -SD $GENOMEDICT \
+        -I $vcfin
+    bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        sed '/^\$/d' > ${sample}.${vc}.temp.vcf
+
+    bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
+
+    mv ${sample}.${vc}.marked.vcf.gz ${vc}
+    mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc}
+
+    bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
+    """
+
+    stub:
+
+    """
+    mkdir ${vc}
+    touch ${vc}/${sample}.${vc}.marked.vcf.gz
+    touch ${vc}/${sample}.${vc}.norm.vcf.gz
+    touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
+    touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
+    """
+
+}
+
+
+
+
+process combineVariants_alternative {
+    label 'process_highmem'
+    publishDir(path: "${outdir}/vcfs/", mode: 'copy')
+
+    input:
+        tuple val(sample), path(vcfs), path(vcfsindex), val(vc)
+    
+    output:
+        tuple val(sample), 
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        path("${vc}/${sample}.${vc}.norm.vcf.gz"),
+        path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
     
     script:
-    vcfin = inputvcf.join(" ")
+    vcfin = vcfs.join(" ")
     
     """
     mkdir ${vc}
-    bcftools concat $vcfin -Oz -o ${sample}.${vc}.temp.vcf.gz
-    bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
-    bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOME -O v |\
+    bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
+    bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
+    bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz
+    bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
 
     mv ${sample}.${vc}.marked.vcf.gz ${vc}
+
+    bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
+    bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
     """
 
     stub:
@@ -410,7 +646,35 @@ process combineVariants {
     mkdir ${vc}
     touch ${vc}/${sample}.${vc}.marked.vcf.gz
     touch ${vc}/${sample}.${vc}.norm.vcf.gz
+    touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
+    touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
+    
+    """
+
+}
+
+
+process bcftools_index_octopus {
+    label 'process_low'
+
+    input:
+        tuple val(tumor),
+        path(vcf)
+
+    output:
+        tuple val(tumor),
+        path(vcf), 
+        path("${vcf}.tbi")
     
+    script:    
+    """
+    bcftools index -t ${vcf}
+    """
+
+    stub:
+    """
+    touch ${vcf}
+    touch ${vcf}.tbi
     """
 
 }
@@ -418,79 +682,158 @@ process combineVariants {
 
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
-
+    label 'process_mid'
     publishDir(path: "${outdir}/vcfs/strelka", mode: 'copy')
 
     input:
-        tuple val(sample), path(strelkasnvs), path(strelkaindels)
+        tuple val(sample), 
+        path(strelkasnvs), path(snvindex),
+        path(strelkaindels), path(indelindex)
     
     output:
-        tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz")
+        tuple val(sample), 
+        path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"),
+        path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi")
     
     
     script:
     
-    vcfin = strelkavcfs.join(" ")
+    vcfin = strelkasnvs.join(" ")
     indelsin = strelkaindels.join(" ")
 
 
     """
-    bcftools concat $vcfin $indelsin -Oz -o ${sample}.temp.strelka.vcf.gz
-    bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
+    bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a 
+    bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
+
+    bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
 
     bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz
 
+    bcftools index ${sample}.strelka.vcf.gz -t
+    bcftools index ${sample}.filtered.strelka.vcf.gz -t
     """
 
     stub:
 
     """
-    touch ${sample}.strelka.vcf.gz
-    touch ${sample}.filtered.strelka.vcf.gz
+    touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi
+    touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi
+    
+    """
+
+}
+
+
+process somaticcombine {
+    label 'process_mid'
+    publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')
+
+    input: 
+        tuple val(tumorsample), val(normal),
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample), val(normal),
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz"),
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi")
+
+    script:
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
     
     """
+    java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
+        -R $GENOMEREF \
+        --genotypeMergeOption PRIORITIZE \
+        --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
+        -O ${tumorsample}_vs_${normal}_combined.vcf.gz \
+        $vcfin2
+    """
+
+    stub:
+    vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+    vcfin2="-V:" + vcfin1.join(" -V:")
 
+    """
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi
+    """
 
 }
 
+
 process annotvep_tn {    
     publishDir(path: "${outdir}/mafs/", mode: 'copy')
 
     input:
         tuple val(tumorsample), val(normalsample), 
-        val(vc), path(tumorvcf) 
+        val(vc), path(tumorvcf), path(vcfindex) 
 
     output:
-        path("paired/${vc}/${tumorsample}.maf")
+        path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf")
 
     shell:
 
-    """
-    
-    zcat !{tumorvcf}.vcf.gz > !{tumorvcf}.vcf
+    '''
+    VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf}))
+    TID_IDX=0
+    NID_IDX=""
+    VCF_NID=""
+    NORM_VCF_ID_ARG=""
+    NSAMPLES=${#VCF_SAMPLE_IDS[@]}
+    if [ $NSAMPLES -gt 1 ]; then
+        # Assign tumor, normal IDs 
+        # Look through column names and 
+        # see if they match provided IDs
+        for (( i = 0; i < $NSAMPLES; i++ )); do
+            echo "${VCF_SAMPLE_IDS[$i]}"
+            if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then
+                TID_IDX=$i
+            fi
+            
+            if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then
+                NID_IDX=$i
+            fi
+        done
+
+        if [ ! -z $NID_IDX ]; then
+            VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]}
+            NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID"
+        fi
+    fi
+    VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]}
+   
+    zcat !{tumorvcf} > !{tumorvcf.baseName}
+    
+    mkdir -p paired/!{vc}
 
     vcf2maf.pl \
-    --vep-forks 16 --input-vcf ${tumorvcf}.vcf \
-    --output-maf !{vc}/!{tumorsample}.maf \
+    --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \
+    --output-maf paired/!{vc}/!{tumorsample}_vs_!{normalsample}.maf \
     --tumor-id !{tumorsample} \
     --normal-id !{normalsample} \
     --vep-path /opt/vep/src/ensembl-vep \
-    --vep-data $VEP_CACHEDIR \
-    --ncbi-build GRCh38 --species homo_sapiens --ref-fasta !{GENOME}
+    --vep-data !{VEPCACHEDIR} \
+    --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \
+    --vep-overwrite
 
-    """
+    '''
 
     stub:
     """
     mkdir -p paired/${vc}
-    touch paired/${vc}/${tumorsample}.maf
+    touch paired/${vc}/${tumorsample}_vs_${normalsample}.maf
     """
 }
 
 
-
-
 process combinemafs_tn {
+    label 'process_low'
     publishDir(path: "${outdir}/mafs/paired", mode: 'copy')
 
     input: 
@@ -514,3 +857,4 @@ process combinemafs_tn {
     """
 }
 
+
diff --git a/nextflow.config b/nextflow.config
index d076bf4..987ca4b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,4 +1,15 @@
 
+manifest {
+    name = "CCBR/LOGAN"
+    author = "CCR Collaborative Bioinformatics Resource"
+    homePage = "https://github.com/CCBR/LOGAN"
+    description = "one-line description of LOGAN goes here"
+    mainScript = "main.nf"
+}
+    
+includeConfig 'conf/hg38.config'
+includeConfig 'conf/mm10.config'
+
 params { // TODO create a separate genome config, with genome index dir that can change depending on platform. see https://github.com/CCBR/CHAMPAGNE/blob/main/conf/genomes.config
     genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) 
     genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
@@ -133,7 +144,8 @@ profiles {
                 cacheDir = "$PWD/singularity"
                 envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
                 // TODO refactor to no longer need bind mounts. These paths also only work on biowulf
-                runOptions = '-B /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/,/data/nousomedr/,/data/CCBR/projects/,/vf/users/,/gpfs/,/fdb'  
+                runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+
             }    
     }
 
@@ -344,6 +356,34 @@ env {
     JULIA_DEPOT_PATH = "/usr/local/share/julia"
 }
 
+// Capture exit codes from upstream processes when piping
+process.shell = ['/bin/bash', '-euo', 'pipefail']
+
+        //Container options
+        singularity {
+                enabled = true
+                autoMounts = true
+                cacheDir = "$PWD/singularity"
+                envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
+                runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+            }
+
+    }
+}
+
+includeConfig 'conf/genomes.config'
+includeConfig 'conf/containers.config'
+
+// Export these variables to prevent local Python/R libraries from conflicting with those in the container
+// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
+// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
+env {
+    PYTHONNOUSERSITE = 1
+    R_PROFILE_USER   = "/.Rprofile"
+    R_ENVIRON_USER   = "/.Renviron"
+    JULIA_DEPOT_PATH = "/usr/local/share/julia"
+}
+
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index d40e6ba..436d677 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -1,40 +1,58 @@
-//All Worksflows in One Place        
-// TODO split subworkflows out into one per file  
-
-// TODO: this line should be moved to within a subworkflow or the main workflow 
-intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file')
+//All Worksflows in One Place         
+intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
 
 
 include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc;
     samtools_flagstats;vcftools;collectvariantcallmetrics;
     bcftools_stats;gatk_varianteval;
     snpeff;
-    somalier_extract;somalier_analysis;multiqc} from  '../../modules/local/qc.nf'
+    somalier_extract;somalier_analysis_human;somalier_analysis_mouse;
+    multiqc} from  '../../modules/local/qc.nf'
+
+include {fastp; bwamem2; //indelrealign; 
+    bqsr; gatherbqsr; applybqsr; samtoolsindex} from  '../../modules/local/trim_align.nf'
+
 include {deepvariant_step1;deepvariant_step2;deepvariant_step3;
     deepvariant_combined;glnexus} from '../../modules/local/germline.nf'
-include {fastp; bwamem2; //indelrealign; 
-    bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf'
+
 include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; 
     contamination_paired; learnreadorientationmodel;mergemut2stats;
     strelka_tn; combineVariants_strelka; 
-    varscan_tn; vardict_tn;
-    combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; 
-    combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly
-    annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict;
-    combinemafs_tn} from '../../modules/local/variant_calling.nf'
+    varscan_tn; vardict_tn; lofreq_tn; muse_tn;
+    octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly;
+    combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; 
+    combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly;
+    combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse;
+    combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly;
+    annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; 
+    annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus;
+    annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse;
+    annotvep_tn as annotvep_tn_combined;
+    combinemafs_tn; somaticcombine} from '../../modules/local/variant_calling.nf'
+
 include {mutect2_t_tonly; mutect2filter_tonly; 
-    varscan_tonly; vardict_tonly; 
+    varscan_tonly; vardict_tonly; octopus_tonly;
     contamination_tumoronly;
     learnreadorientationmodel_tonly; 
     mergemut2stats_tonly;
-    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2;
-    combinemafs_tonly} from '../../modules/local/variant_calling_tonly.nf'
-include {svaba_somatic} from '../../modules/local/structural_variant.nf'
-include {splitinterval} from "../../modules/local/splitbed.nf"
+    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
+    annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
+    annotvep_tonly as annotvep_tonly_combined;
+    combinemafs_tonly;somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf'
 
+include {svaba_somatic; manta_somatic; 
+    survivor_sv; gunzip;
+    annotsv_tn as annotsv_survivor_tn
+    annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf'
 
+include {amber_tn; cobalt_tn; purple;
+    sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf'
 
-workflow INPUT_PIPE {
+include {splitinterval} from '../../modules/local/splitbed.nf'
+
+
+
+workflow INPUT {
     
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
@@ -67,7 +85,7 @@ workflow INPUT_PIPE {
 
 }
 
-workflow TRIM_ALIGN_PIPE {
+workflow ALIGN {
     take:
         fastqinput
         sample_sheet
@@ -93,9 +111,7 @@ workflow TRIM_ALIGN_PIPE {
 
     tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0)
     applybqsr(tobqsr) 
-    //samtoolsindex(applybqsr.out)
-    
-    //samtoolsindex.out.view()
+
     //sample_sheet.view()
     bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)}
 
@@ -111,7 +127,7 @@ workflow TRIM_ALIGN_PIPE {
         bqsrout=applybqsr.out
 }
 
-workflow GERMLINE_PIPE {
+workflow GL {
     //GERMLINE REQUIRES only BAMBYINTERVAL
     take:
         bambyinterval
@@ -132,7 +148,7 @@ workflow GERMLINE_PIPE {
     
 }
     
-workflow VARIANTCALL_PIPE {
+workflow VC {
     take:
     //Input is the BAMby interval
         bamwithsample
@@ -140,6 +156,9 @@ workflow VARIANTCALL_PIPE {
         sample_sheet
         
     main: 
+    //Create Pairing for TN (in case of dups)
+    sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)}
+
     bambyinterval=bamwithsample.combine(splitout.flatten())
 
     //Paired Mutect2    
@@ -158,148 +177,297 @@ workflow VARIANTCALL_PIPE {
 
 
     pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout)
-    contamination_paired(pileup_paired_all)
-
-    mut2out_lor=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-    )}
-
-    learnreadorientationmodel(mut2out_lor)
-
-    mut2out_mstats=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-
-    mergemut2stats(mut2out_mstats)
-
-    allmut2tn=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
+    contamination_paired(pileup_paired_all) 
+
+    //Mutect2 TN
+    mutect2.out.groupTuple(by:[0,1])
+        | multiMap { tumor,normal,vcfs,f1r2,stats -> 
+        mut2out_lor: tuple("${tumor}_vs_${normal}",
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2out_mstats:  tuple( "${tumor}_vs_${normal}",
+                stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tn: tuple( "${tumor}_vs_${normal}",
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2out}
     
-    mut2tn_filter=allmut2tn
-    .join(mergemut2stats.out)
-    .join(learnreadorientationmodel.out)
-    .join(contamination_paired.out)
-    mutect2filter(mut2tn_filter)
-
-
-    //Tumor Only Calling
+    learnreadorientationmodel(mut2out.mut2out_lor)
+    mergemut2stats(mut2out.mut2out_mstats)
+   
+    mutect2_in=mut2out.allmut2tn
+        | join(mergemut2stats.out)
+        | join(learnreadorientationmodel.out) 
+        | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} 
+        | join(contamination_paired.out) 
+        | mutect2filter
+        | join(sample_sheet_paired) 
+        | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}  
+
+    annotvep_tn_mut2(mutect2_in)
+
+    //Mutect2 Tumor Only
     bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)}
-    mutect2_t_tonly(bambyinterval_t)    
-    
-    //LOR     
-    mut2tout_lor=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-    )}
-    learnreadorientationmodel_tonly(mut2tout_lor)
-
-
-    //Stats
-    mut2tonly_mstats=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-    mergemut2stats_tonly(mut2tonly_mstats)
-
+    mutect2_t_tonly(bambyinterval_t)  
+        
+    mutect2_t_tonly.out.groupTuple()
+        | multiMap { tumor,vcfs,f1r2,stats -> 
+        mut2tout_lor: tuple(tumor,
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2tonly_mstats:  tuple( tumor,
+                stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tonly: tuple(tumor,
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2tonlyout}
 
-    //Contamination
+    
+    learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor)
+    mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats)
     contamination_tumoronly(pileup_paired_tout)
     
-    //Final TUMOR ONLY FILTER
-    allmut2tonly=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
+    mutect2_in_tonly=mut2tonlyout.allmut2tonly
+        | join(mergemut2stats_tonly.out)
+        | join(learnreadorientationmodel_tonly.out)
+        | join(contamination_tumoronly.out) 
+    | mutect2filter_tonly
+    | join(sample_sheet)
+    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} 
+    annotvep_tonly_mut2(mutect2_in_tonly)
     
-    mut2tonly_filter=allmut2tonly
-    .join(mergemut2stats_tonly.out)
-    .join(learnreadorientationmodel_tonly.out)
-    .join(contamination_tumoronly.out)
-
-    mutect2filter_tonly(mut2tonly_filter)
-
-    //VCF2maf
-    mutect2filter.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,normal,"mutect2",finalvcf)} | annotvep_tn_mut2
-
-    mutect2filter_tonly.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2
-
-    //Strelka
-    strelka_tn(bambyinterval)
-    strelkaout=strelka_tn.out.groupTuple()
-    .map { samplename,vcfs,indels -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },
-    indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() }  
-    )}
-    combineVariants_strelka(strelkaout)
-    combineVariants_strelka.out.join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,normal -> tuple(tumor,normal,"strelka",finalvcf)} | annotvep_tn_strelka
-
-    //Vardict
-    vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict
-    vardict_comb.join(sample_sheet)
-     .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"vardict",normvcf)} | annotvep_tn_vardict
-
-     //VarDict_tonly
-    vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
-        tuple(tumorname,tumorbam,tumorbai,bed)} 
-    vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
-    combineVariants_vardict_tonly.out.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict
-
-
-    //VarScan
-    varscan_in=bambyinterval.join(contamination_paired.out)
-    varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
-    varscan_comb.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan
-
-    //VarScan_tonly
-    varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
-    tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly 
-    varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
+    //Strelka TN 
+    strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1])
+        | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}",
+            vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex,
+            indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} 
+        | combineVariants_strelka |  join(sample_sheet_paired)
+        | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} 
+    annotvep_tn_strelka(strelka_in)
+
+    //Vardict TN
+    vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1])
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} 
+        | combineVariants_vardict | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}  
+    annotvep_tn_vardict(vardict_in)
+
+    //VarDict TOnly
+    vardict_in_tonly=bambyinterval 
+        | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
+            tuple(tumorname,tumorbam,tumorbai,bed)} 
+        | vardict_tonly | groupTuple()
+        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")}
+        | combineVariants_vardict_tonly | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} 
+    annotvep_tonly_vardict(vardict_in_tonly)
     
-    varscan_tonly_comb1.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan
-
+    //VarScan TN
+    varscan_in=bambyinterval.combine(contamination_paired.out) 
+        | varscan_tn | groupTuple(by:[0,1]) 
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} 
+        | combineVariants_varscan | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} 
+    annotvep_tn_varscan(varscan_in)
     
-    //Combine All MAFs
-    annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn
+    //VarScan TOnly
+    varscan_in_tonly=bambyinterval.combine(contamination_paired.out) 
+    | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc ->
+            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple() 
+    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")}
+    | combineVariants_varscan_tonly
+    | join(sample_sheet)
+    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
+    annotvep_tonly_varscan(varscan_in_tonly)
+    
+    //Lofreq TN
+    lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) 
+        | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} 
+        | combineVariants_lofreq | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
+    annotvep_tn_lofreq(lofreq_in)
+
+    //MuSE TN
+    muse_in=muse_tn(bamwithsample) 
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} 
+        | combineVariants_muse | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} 
+    annotvep_tn_muse(muse_in)
+
+    //Octopus TN
+    octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus 
+        | groupTuple() 
+        | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")}
+        | combineVariants_octopus 
+        | map{samplename,marked,markedindex,normvcf,normindex -> 
+            tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
+    annotvep_tn_octopus(octopus_in) 
+
+    //Octopus TOnly
+    octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
+    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly 
+    | groupTuple() 
+        | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")}
+        | combineVariants_octopus_tonly 
+        | join(sample_sheet) |
+        map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+    annotvep_tonly_octopus(octopus_in_tonly)
+
+    //Combine All Variants Using VCF and Then Reannotate
+    mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in)
+        | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1])
+        | somaticcombine 
+        | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} 
+        | annotvep_tn_combined
+
+    mutect2_in_tonly|concat(octopus_in_tonly)
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() 
+        | somaticcombine_tonly 
+        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
+        | annotvep_tonly_combined
+    
+    //Implement PCGR Annotator/CivIC Next
+
+    emit:
+        somaticcall_input=octopus_in
 
-    annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly
 
-    //Implement Copy Number
-    //Implement PCGR Annotator/CivIC Next
 }
 
 
-workflow SV_PIPE {
+workflow SV {
     take:
         bamwithsample
         
     main: 
         //Svaba
-        svaba_somatic(bamwithsample)    
+        svaba_out=svaba_somatic(bamwithsample)
+        .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log ->
+            tuple(tumor,so_sv,"svaba")} 
+        annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated")
 
         //Manta
+        manta_out=manta_somatic(bamwithsample)
+            .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> 
+            tuple(tumor,so_sv,"manta")} 
+        annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated")
+
+        //Delly-WIP
+
+        //Survivor
+        gunzip(manta_out).concat(svaba_out).groupTuple()
+       | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated")
+
+}
+
+workflow CNVmouse {
+    take:
+        bamwithsample
+        
+    main: 
+        //Sequenza (Preferred for Paired)
+        chrs=Channel.fromList(params.genomes[params.genome].chromosomes)
+        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> 
+            tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)}
+        seqzin.combine(chrs) | seqz_sequenza_bychr
+        seqz_sequenza_bychr.out.groupTuple()   
+            .map{pair, seqz -> tuple(pair, seqz.sort{it.name})}
+            | sequenza 
+        
+        //FREEC Paired Mode
+        bamwithsample | freec_paired
+        
+}
+
+workflow CNVhuman {
+    take:
+        bamwithsample
+        somaticcall_input
+
+    main: 
+        //Sequenza
+        chrs=Channel.fromList(params.genomes[params.genome].chromosomes)
+        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> 
+            tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)}
+        seqzin.combine(chrs) | seqz_sequenza_bychr
+        seqz_sequenza_bychr.out.groupTuple()   
+            .map{pair, seqz -> tuple(pair, seqz.sort{it.name})}
+            | sequenza 
+
+        //Purple
+        bamwithsample | amber_tn
+        bamwithsample | cobalt_tn
+        purplein=amber_tn.out.join(cobalt_tn.out)
+        purplein.join(somaticcall_input)| 
+        map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)}  
+            | purple
+        
+}         
+
+  /*
+    //baminput=sample_sheet
+      //     .map{samplename,bam,vcf-> tuple(samplename,file(bam),file("${bam}.bai"))}
+
+    //somaticinput=sample_sheet
+     //      .map{samplename,bam,vcf-> tuple(samplename,file(vcf))}
 
+ 
+
+    */
+
+
+
+
+workflow QC_NOGL {
+    take:
+        fastqin
+        fastpout
+        applybqsr
+
+    main:
+    //QC Steps
+    fc_lane(fastqin)
+    fastq_screen(fastpout)
+    kraken(fastqin)
+    qualimap_bamqc(applybqsr)
+    samtools_flagstats(applybqsr)
+    fastqc(applybqsr)
+   
+    //Somalier
+    somalier_extract(applybqsr) 
+    som_in=somalier_extract.out.collect()
+
+    //Prep for MultiQC input
+    fclane_out=fc_lane.out.map{samplename,info->info}.collect()
+    fqs_out=fastq_screen.out.collect() 
+
+    kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect()
+    qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect()
+    fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect()
+
+    samtools_flagstats_out=samtools_flagstats.out.collect()
+
+    if(params.genome=="hg38"){ 
+        somalier_analysis_human(som_in)
+        somalier_analysis_out=somalier_analysis_human.out.collect()
+    }
+    else if(params.genome=="mm10"){ 
+        somalier_analysis_mouse(som_in)
+        somalier_analysis_out=somalier_analysis_mouse.out.collect()
+    }
+
+    conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,
+        somalier_analysis_out).flatten().toList()
+    multiqc(conall)
 }
 
 
-workflow QC_PIPE {
+
+workflow QC_GL {
     take:
         fastqin
         fastpout
         applybqsr
-        glnexusout //GLnexus germline output
-        bcfout //DV germline output
+        glnexusout 
+        bcfout 
 
     main:
     //QC Steps
@@ -321,9 +489,19 @@ workflow QC_PIPE {
     //Somalier
     somalier_extract(applybqsr) 
     som_in=somalier_extract.out.collect()
-    somalier_analysis(som_in)
+
+
 
     //Prep for MultiQC input
+    if(params.genome=="hg38"){ 
+        somalier_analysis_human(som_in)
+        somalier_analysis_out=somalier_analysis_human.out.collect()
+    }
+    else if(params.genome=="mm10"){ 
+        somalier_analysis_mouse(som_in)
+        somalier_analysis_out=somalier_analysis_mouse.out.collect()
+    }
+
     fclane_out=fc_lane.out.map{samplename,info->info}.collect()
     fqs_out=fastq_screen.out.collect() 
 
@@ -333,10 +511,9 @@ workflow QC_PIPE {
     samtools_flagstats_out=samtools_flagstats.out.collect()
     bcftools_stats_out= bcftools_stats.out.collect()
     gatk_varianteval_out= gatk_varianteval.out.collect()
-    snpeff_out=snpeff.out.collect()//map{vcf,csv,html->vcf,csv,html}.collect()
+    snpeff_out=snpeff.out.collect()
     vcftools_out=vcftools.out
-    collectvariantcallmetrics_out=collectvariantcallmetrics.out//.map{details,summary->details,summary}
-    somalier_analysis_out=somalier_analysis.out.collect()
+    collectvariantcallmetrics_out=collectvariantcallmetrics.out
 
     conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,bcftools_stats_out,
     gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList()
@@ -345,7 +522,7 @@ workflow QC_PIPE {
 
 
 //Variant Calling from BAM only
-workflow INPUT_BAMVC_PIPE {
+workflow INPUT_BAM {
     
    if(params.sample_sheet){
         sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true)
@@ -360,13 +537,28 @@ workflow INPUT_BAMVC_PIPE {
     
     //Either BAM Input or File sheet input 
     if(params.bam_input){
-        baminputonly=Channel.fromPath(params.bam_input)
+        //Check if Index is .bai or .bam.bai
+        bambai=params.bam_input +".bai"
+        baionly = bambai.replace(".bam", "")
+        bamcheck1=file(bambai)
+        bamcheck2=file(baionly)
+
+        if (bamcheck1.size()>0){
+            baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it,file("${it}.bai"))}
+        }
+        if (bamcheck2.size()>0){
+            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view()
+            baminputonly=Channel.fromPath(params.bam_input)
+           .map{it-> tuple(it.simpleName,it)}
+           .join(bai)
+        }
+        
     }else if(params.file_input) {
         baminputonly=Channel.fromPath(params.file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
-                        .map{ sample,bam -> 
-                        tuple(sample, file(bam),file("${bam}.bai"))
+                        .map{ sample,bam,bai  -> 
+                        tuple(sample, file(bam),file(bai))
                                   }
     }
 
@@ -382,3 +574,4 @@ workflow INPUT_BAMVC_PIPE {
 
 }
 
+
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 854444c..07c274c 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -2,36 +2,53 @@
 // TODO split subworkflows out into one per file  
 
 // TODO: this line should be moved to within a subworkflow or the main workflow
-intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file')
+intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
+
 
 
 include {fc_lane; fastq_screen;kraken;qualimap_bamqc;
     samtools_flagstats;vcftools;collectvariantcallmetrics;
     bcftools_stats;gatk_varianteval;
     snpeff;fastqc;
-    somalier_extract;somalier_analysis;multiqc} from  '../../modules/local/qc.nf'
+    somalier_extract;somalier_analysis_human;somalier_analysis_mouse;
+    multiqc} from  '../../modules/local/qc.nf'
+
 include {deepvariant_step1;deepvariant_step2;deepvariant_step3;
     deepvariant_combined;glnexus} from '../../modules/local/germline.nf'
+
 include {fastp; bwamem2; 
     bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf'
+    
 include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; 
-    contamination_paired; learnreadorientationmodel;mergemut2stats;
+    bcftools_index_octopus;
+    contamination_paired; learnreadorientationmodel; mergemut2stats;
     combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; 
-    combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly
+    combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly;
+    combineVariants_alternative ; 
     annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict;
     combinemafs_tn} from '../../modules/local/variant_calling.nf'
-include {mutect2_t_tonly; mutect2filter_tonly; 
+
+include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; 
     varscan_tonly; vardict_tonly; 
+    octopus_tonly; 
     contamination_tumoronly;
     learnreadorientationmodel_tonly; 
     mergemut2stats_tonly;
-    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2;
-    combinemafs_tonly} from '../../modules/local/variant_calling_tonly.nf'
-include {splitinterval} from "../../modules/local/splitbed.nf"
+    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
+    annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
+    annotvep_tonly as annotvep_tonly_combined;
+    combinemafs_tonly; somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf'
 
+include {manta_tonly; svaba_tonly; survivor_sv; gunzip;
+annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly;
+annotsv_tonly as annotsv_survivor_tonly} from './structural_variant.nf'
 
+include {freec; amber_tonly; cobalt_tonly; purple  } from './copynumber.nf'
 
-workflow INPUT_TONLY_PIPE {
+include {splitinterval} from '../../modules/local/splitbed.nf'
+
+
+workflow INPUT_TONLY {
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
 
@@ -66,7 +83,7 @@ workflow INPUT_TONLY_PIPE {
 
 }
 
-workflow TRIM_ALIGN_TONLY_PIPE {
+workflow ALIGN_TONLY {
     take:
         fastqinput
         sample_sheet
@@ -90,9 +107,7 @@ workflow TRIM_ALIGN_TONLY_PIPE {
 
     tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0)
     applybqsr(tobqsr) 
-    //samtoolsindex(applybqsr.out)
-    //samtoolsindex.out.view()
-    //bamwithsample=samtoolsindex.out.join(sample_sheet).map{it.swap(3,0)}.join(samtoolsindex.out).map{it.swap(3,0)}
+    
     bamwithsample=applybqsr.out.join(sample_sheet)
     .map{samplename,tumor,tumorbai -> tuple( samplename,tumor,tumorbai)
         }
@@ -104,14 +119,13 @@ workflow TRIM_ALIGN_TONLY_PIPE {
         fastpout=fastp.out
         fastqin=fastqinput
         splitout=splitinterval.out
-        //indelbambyinterval
         bqsrbambyinterval
         sample_sheet
         bqsrout=applybqsr.out
 
 }
 
-workflow VARIANT_TONLY_PIPE {
+workflow VC_TONLY {
     take:
     //Input is the BAMby interval
         bamwithsample
@@ -129,67 +143,131 @@ workflow VARIANT_TONLY_PIPE {
 
     mutect2_t_tonly(bambyinterval)    
     
-    //LOR     
-    mut2tout_lor=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-    )}
-    learnreadorientationmodel_tonly(mut2tout_lor)
-
-
-    //Stats
-    mut2tonly_mstats=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-    mergemut2stats_tonly(mut2tonly_mstats)
+    mutect2_t_tonly.out.groupTuple()
+        | multiMap { tumor,vcfs,f1r2,stats -> 
+        mut2tout_lor: tuple(tumor,
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2tonly_mstats:  tuple( tumor,
+                stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tonly: tuple(tumor,
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2tonlyout}
 
+  
 
-    //Contamination
+    learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor)
+    mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats)
     contamination_tumoronly(pileup_paired_tout)
 
     
-    //Final TUMOR ONLY FILTER
-    allmut2tonly=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
-    
-    mut2tonly_filter=allmut2tonly
+    mut2tonly_filter=mut2tonlyout.allmut2tonly
     .join(mergemut2stats_tonly.out)
     .join(learnreadorientationmodel_tonly.out)
     .join(contamination_tumoronly.out)
 
-    mutect2filter_tonly(mut2tonly_filter)
-    
-    
-    //##VCF2MAF TO
+    mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) 
+        | join(sample_sheet)
+        | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2_tonly",finalvcf,finalindex)} 
+    annotvep_tonly_mut2(mutect2_tonly_in)
 
-    mutect2filter_tonly.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2
 
-    //VarDict_tonly
-    vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
-        tuple(tumorname,tumorbam,tumorbai,bed)} 
-    vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
-    combineVariants_vardict_tonly.out.join(sample_sheet)
-    .map{tumor,marked,normvcf ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict
+    //VarDict
+    vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} 
+        | combineVariants_vardict_tonly
+        | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
+    annotvep_tonly_vardict(vardict_in_tonly)
 
     //VarScan_tonly
-    varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
-    tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly 
-    varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
-    
-    varscan_tonly_comb1.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan
+    varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out)
+        | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} 
+        | combineVariants_varscan_tonly 
+        | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
+    annotvep_tonly_varscan(varscan_in_tonly)
+
+    //Octopus_tonly
+    octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus
+        | groupTuple()
+        | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name}
+                ,vcfindex, "octopus_tonly")} 
+        | combineVariants_alternative | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+    annotvep_tonly_octopus(octopus_in_tonly)
+
+
+    mutect2_tonly_in|concat(octopus_in_tonly)
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly)
+        | somaticcombine_tonly 
+        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
+        | annotvep_tonly_combined
+
+
+
+    emit:
+        somaticcall_input=octopus_in_tonly
 
-    annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly
 
 }
 
 
-workflow QC_TONLY_PIPE {
+workflow SV_TONLY {
+    take:
+        bamwithsample
+        
+    main: 
+        //Svaba
+        svaba_out=svaba_tonly(bamwithsample)
+        .map{ tumor,bps,contigs,discord,alignments,so_indel,so_sv,unfil_so_indel,unfil_sv,log ->
+            tuple(tumor,so_sv,"svaba_tonly")} 
+        annotsv_svaba_tonly(svaba_out).ifEmpty("Empty SV input--No SV annotated")
+
+        //Manta
+        manta_out=manta_tonly(bamwithsample)
+            .map{tumor, sv, indel, tumorsv -> 
+            tuple(tumor,tumorsv,"manta_tonly")} 
+        annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated")
+
+        //Delly-WIP
+
+        //Survivor
+        gunzip(manta_out).concat(svaba_out).groupTuple()
+       | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated")
+}
+
+
+
+workflow CNVmouse_tonly {
+    take:
+        bamwithsample
+        
+    main: 
+        freec(bamwithsample)
+}
+
+
+workflow CNVhuman_tonly {
+    take:
+        bamwithsample
+        somaticcall_input
+
+    main: 
+        //FREEC-Unpaired onlypu
+        bamwithsample | freec 
+        
+        //Purple
+        bamwithsample | amber_tonly
+        bamwithsample | cobalt_tonly
+        purplein=amber_tonly.out.join(cobalt_tonly.out)
+        purplein.join(somaticcall_input)| 
+        map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)}  
+            | purple
+
+        
+}
+
+workflow QC_TONLY {
     take:
         fastqin
         fastpout
@@ -206,11 +284,16 @@ workflow QC_TONLY_PIPE {
     samtools_flagstats(bqsrout)
     qualimap_bamqc(bqsrout)
 
-
     somalier_extract(bqsrout) 
     som_in=somalier_extract.out.collect()
-    somalier_analysis(som_in)
-    
+    if(params.genome=="hg38"){ 
+        somalier_analysis_human(som_in)
+        somalier_analysis_out=somalier_analysis_human.out.collect()
+    }
+    else if(params.genome=="mm10"){ 
+        somalier_analysis_mouse(som_in)
+        somalier_analysis_out=somalier_analysis_mouse.out.collect()
+    }
     
     //Prep for MultiQC input
     fclane_out=fc_lane.out.map{samplename,info->info}.collect()
@@ -221,7 +304,8 @@ workflow QC_TONLY_PIPE {
     fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect()
 
     samtools_flagstats_out=samtools_flagstats.out.collect()
-    somalier_analysis_out=somalier_analysis.out.collect()
+
+
 
     conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,fastqc_out,
     samtools_flagstats_out,
@@ -233,26 +317,41 @@ workflow QC_TONLY_PIPE {
 
 
 
+
+
 //Variant Calling from BAM only
-workflow INPUT_TONLY_BAMVC_PIPE {
+workflow INPUT_TONLY_BAM {
     main:
-  
     //Either BAM Input or File sheet input 
     if(params.bam_input){
-        baminputonly=Channel.fromPath(params.bam_input)
+        bambai = params.bam_input +".bai"
+        baionly = bambai.replace(".bam", "")
+        bamcheck1 = file(bambai)
+        bamcheck2 = file(baionly)
+
+        if (bamcheck1.size()>0){
+            baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it,file("${it}.bai"))}
+        }
+        if (bamcheck2.size()>0){
+            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view()
+            baminputonly=Channel.fromPath(params.bam_input)
+           .map{it-> tuple(it.simpleName,it)}
+           .join(bai)
+        }
 
         sample_sheet=baminputonly.map{samplename,bam,bai -> tuple (
-             samplename)}.view()
+             samplename)}
+
     }else if(params.file_input) {
         baminputonly=Channel.fromPath(params.file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
-                        .map{ sample,bam -> 
-                        tuple(sample, file(bam),file("${bam}.bai"))
+                        .map{ sample,bam,bai  -> 
+                        tuple(sample, file(bam),file(bai))
                                   }
+
         sample_sheet=baminputonly.map{samplename,bam,bai -> tuple (
-             samplename)}.view()
-        
+             samplename)}
     }
 
     splitinterval(intervalbedin)
@@ -262,8 +361,7 @@ workflow INPUT_TONLY_BAMVC_PIPE {
     emit:
         bamwithsample
         splitout=splitinterval.out
-       sample_sheet
+        sample_sheet
     
-
 }
 
diff --git a/wgs-seek_bak b/wgs-seek_bak
deleted file mode 100755
index eb36bb1..0000000
--- a/wgs-seek_bak
+++ /dev/null
@@ -1,501 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: UTF-8 -*-
-
-"""
-ABOUT: This is the main entry for the GATK4 WES pipeline.
-REQUIRES:
-  - python>=3.5
-  - nextflow  
-  - singularity (recommended==latest)
-DISCLAIMER:
-                    PUBLIC DOMAIN NOTICE
-        CCR Collaborative Bioinformatics Resource (CCBR)
-                National Cancer Institute (NCI)
-This software/database is a "United  States Government Work" under
-the terms of the United  States Copyright Act.  It was written as 
-part of the author's official duties as a United States Government
-employee and thus cannot be copyrighted. This software is freely
-available to the public for use.
-Although all  reasonable  efforts have been taken  to ensure  the
-accuracy and reliability of the software and data, CCBR do not and
-cannot warrant the performance or results that may  be obtained by 
-using this software or data. CCBR and NCI disclaim all warranties,
-express  or  implied,  including   warranties   of   performance, 
-merchantability or fitness for any particular purpose.
-Please cite the author and the "NIH Biowulf Cluster" in any work or
-product based on this material.
-USAGE:
-  $ wgs-seek <run> [OPTIONS]
-EXAMPLE:
-  $ wgs-seek run --input *.R{1,2}.fastq.gz --output WGS_hg38/ --mode biowulf --sample_sheet pairs.tsv 
-
-"""
-
-# Python standard library
-from __future__ import print_function
-import sys, os, subprocess, re, json, textwrap
-
-# 3rd party imports from pypi
-import argparse  # potential python3 3rd party package, added in python/3.5
-
-
-def run(sub_args):
-    """Initialize, setup, and run the GATK4 WGS pipeline.
-    Calls initialize() to create output directory and copy over pipeline resources,
-    setup() to create the pipeline config file, dryrun() to ensure their are no issues
-    before running the pipeline, and finally run() to execute the Nextflow workflow.
-    @param sub_args <parser.parse_args() object>:
-        Parsed arguments for run sub-command
-    """
-    # Step 0. Check for required dependencies
-    # The pipelines has only two requirements:
-    # nextflow and singularity 
-    require(['nextflow', 'singularity'], ['nextflow', 'singularity'])
-
-    # Step 1. Initialize working directory,
-    # copy over required resources to run 
-    # the pipeline
-    git_repo = __home__
-    input_files = init(
-        repo_path = git_repo,
-        output_path = sub_args.output,
-        links = sub_args.input
-    )
-
-    # Step 2. Setup pipeline for execution, 
-    # dynamically create config.json config
-    # file from user inputs and base config
-    # templates
-    config = setup(sub_args, 
-        ifiles = input_files,
-        repo_path = git_repo,
-        output_path = sub_args.output
-    )
-
-    # Step 3. Resolve docker/singularity bind
-    # paths from the config file.
-    bindpaths = bind(
-        sub_args,
-        config = config
-    )
-
-    # Optional Step: Dry-run pipeline
-    if sub_args.dry_run:
-        # Dryrun pipeline
-        dryrun_output = dryrun(outdir = sub_args.output) # python3 returns byte-string representation
-        print("\nDry-running exome-seek pipeline:\n{}".format(dryrun_output.decode("utf-8")))
-        sys.exit(0)
-
-    # Step 4. Orchestrate pipeline execution,
-    # run pipeline in locally on a compute node
-    # for debugging purposes or submit the master
-    # job to the job scheduler, SLURM, and create 
-    # logging file
-    if not exists(os.path.join(sub_args.output, 'logfiles')):
-        # Create directory for logfiles
-        os.makedirs(os.path.join(sub_args.output, 'logfiles'))
-    if sub_args.mode == 'local':
-        log = os.path.join(sub_args.output, 'logfiles', 'snakemake.log')
-    else: 
-        log = os.path.join(sub_args.output, 'logfiles', 'master.log')
-    logfh = open(log, 'w')
-    wait = ''
-    if sub_args.wait: wait = '--wait'
-    mjob = runner(mode = sub_args.mode, 
-        outdir = sub_args.output, 
-        # additional_bind_paths = all_bind_paths,
-        alt_cache = sub_args.singularity_cache,  
-        threads = int(sub_args.threads),
-        jobname = sub_args.job_name,
-        submission_script='runner',
-        logger = logfh,
-        additional_bind_paths = ",".join(bindpaths),
-        tmp_dir = sub_args.tmp_dir, 
-        wait = wait
-    )
-    
-    # Step 5. Wait for subprocess to complete,
-    # this is blocking and not asynchronous
-    if not sub_args.silent:
-        print("\nRunning GATK4 WES pipeline in '{}' mode...".format(sub_args.mode)) 
-    mjob.wait()
-    logfh.close()
-
-    # Step 6. Relay information about submission
-    # of the master job or the exit code of the 
-    # pipeline that ran in local mode
-    if sub_args.mode == 'local':
-        if int(mjob.returncode) == 0:
-            print('GATK4 WES pipeline has successfully completed')
-        else:
-            fatal('GATK4 WES pipeline failed. Please see {} for more information.'.format(
-                os.path.join(sub_args.output, 'logfiles', 'snakemake.log')))
-    elif sub_args.mode == 'slurm':
-        jobid = open(os.path.join(sub_args.output, 'logfiles', 'mjobid.log')).read().strip()
-        if not sub_args.silent:
-            if int(mjob.returncode) == 0:
-                print('Successfully submitted master job: ', end="")
-            else:
-                fatal('Error occurred when submitting the master job.')
-        print(jobid)
-
-
-def unlock(sub_args):
-    """Unlocks a previous runs output directory. If snakemake fails ungracefully,
-    it maybe required to unlock the working directory before proceeding again.
-    This is rare but it does occasionally happen. Maybe worth add a --force
-    option to delete the '.snakemake/' directory in the future.
-    @param sub_args <parser.parse_args() object>:
-        Parsed arguments for unlock sub-command
-    """
-
-    print("Unlocking the pipeline's output directory...")
-    outdir = sub_args.output
-
-    try:
-        unlock_output = subprocess.check_output([
-            'snakemake', '--unlock',
-            '--cores', '1',
-            '--configfile=config.json'
-        ], cwd = outdir,
-        stderr=subprocess.STDOUT)
-    except subprocess.CalledProcessError as e:
-        # Unlocking process returned a non-zero exit code
-        sys.exit("{}\n{}".format(e, e.output))
-
-    print("Successfully unlocked the pipeline's working directory!")
-
-
-def cache(sub_args):
-    """Caches remote resources or reference files stored on DockerHub and S3.
-    Local SIFs will be created from images defined in 'config/containers/images.json'.
-    @TODO: add option to cache other shared S3 resources (i.e. kraken db and fqscreen indices)
-    @param sub_args <parser.parse_args() object>:
-        Parsed arguments for unlock sub-command
-    """
-    print(sub_args)
-    fatal('NotImplementedError... Comming Soon!')
-
-    sif_cache = sub_args.sif_cache
-    # Get absolute PATH to templates in exome-seek git repo
-    repo_path = os.path.dirname(os.path.abspath(__file__))
-    images = os.path.join(repo_path, 'config','containers', 'images.json')
-
-    # Create image cache
-    if not exists(sif_cache):
-        # Pipeline output directory does not exist on filesystem
-        os.makedirs(sif_cache)
-    elif exists(sif_cache) and os.path.isfile(sif_cache):
-        # Provided Path for pipeline output directory exists as file
-        raise OSError("""\n\tFatal: Failed to create provided sif cache directory!
-        User provided --sif-cache PATH already exists on the filesystem as a file.
-        Please {} cache again with a different --sif-cache PATH.
-        """.format(sys.argv[0])
-        )
-
-    # Check if local SIFs already exist on the filesystem
-    with open(images, 'r') as fh:
-        data = json.load(fh)
-
-    pull = []
-    for image, uri in data['images'].items():
-        sif = os.path.join(sif_cache, '{}.sif'.format(os.path.basename(uri).replace(':', '_')))
-        if not exists(sif):
-            # If local sif does not exist on in cache, print warning
-            # and default to pulling from URI in config/containers/images.json
-            print('Image will be pulled from "{}".'.format(uri), file=sys.stderr)
-            pull.append(uri)
-
-    if not pull:
-        # Nothing to do!
-        print('Singularity image cache is already up to update!')
-    else:
-        # There are image(s) that need to be pulled 
-        if not sub_args.dry_run:
-            # submission_script for exome-seek cache is /path/to/output/resources/cacher
-            # Quote user provided values to avoid shell injections
-            masterjob = subprocess.Popen(
-                'sbatch --parsable -J pl:cache --gres=lscratch:200  --time=10:00:00 --mail-type=BEGIN,END,FAIL ' +
-                str(os.path.join(repo_path, 'resources', 'cacher')) + ' slurm ' +
-                " -s '{}' ".format(sif_cache) +
-                " -i '{}' ".format(','.join(pull)) + 
-                " -t '/lscratch/${SLURM_JOB_ID}/.singularity/' ", 
-                cwd = sif_cache, shell=True, stderr = subprocess.STDOUT, stdout = subprocess.PIPE)
-
-            masterjob.communicate()
-            print('exome-seek reference cacher submitted master job with exit-code: {}'.format(masterjob.returncode))
-
-
-def parsed_arguments():
-    """Parses user-provided command-line arguments. Requires argparse and textwrap
-    package. argparse was added to standard lib in python 3.5 and textwrap was added
-    in python 3.5. To create custom help formatting for subparsers a docstring is
-    used create the help message for required options. argparse does not support named
-    subparser groups, which is normally what would be used to accomphish this reformatting.
-    As so, the help message for require options must be suppressed. If a new required arg
-    is added to a subparser, it must be added to the docstring and the usage statement
-    also must be updated.
-    """
-
-    # Create a top-level parser
-    parser = argparse.ArgumentParser(description = 'exome-seek: a highly-reproducible GATK4 WES pipeline')
-
-    # Adding Verison information
-    #parser.add_argument('--version', action = 'version', version='%(prog)s {}'.format(__version__))
-
-    # Create sub-command parser
-    subparsers = parser.add_subparsers(help='List of available sub-commands')
-
-
-    # Sub-parser for the "run" sub-command
-    # Grouped sub-parser arguments are currently not supported by argparse.
-    # https://bugs.python.org/issue9341
-    # Here is a work around to create more useful help message for named
-    # options that are required! Please note: if a required arg is added the
-    # description below should be updated (i.e. update usage and add new option)
-    required_run_options = textwrap.dedent("""\
-        usage: exome-seek run [--help] \\
-                              [--mode {local, slurm}] \\
-                              [--job-name JOB_NAME] \\
-                              [--callers {mutect2,mutect,strelka, ...}] \\
-                              [--pairs PAIRS] \\
-                              [--ffpe] \\
-                              [--cnv] \\
-                              [--dry-run] \\
-                              [--silent] \\
-                              [--singularity-cache SINGULARITY_CACHE] \\
-                              [--sif-cache SIF_CACHE] \\
-                              [--tmpdir TMP_DIR] \\
-                              [--threads THREADS] \\
-                              [--wait] \\
-                              --input INPUT [INPUT ...] \\
-                              --output OUTPUT \\
-                              --genome {hg38, mm10, ...} \\
-                              --targets TARGETS 
-
-        required arguments:
-          --input INPUT [INPUT ...]
-                                Input FastQ or BAM file(s) to process. One or more input 
-                                files can be provided.  The pipeline does NOT support
-                                single-end WES data. Please provide either a set of
-                                FastQ files or a set of BAM files. The pipeline does
-                                NOT support processing a mixture of FastQ files and
-                                BAM files.
-                                Example: --input .tests/*.R?.fastq.gz
-          --output OUTPUT
-                                Path to an output directory. This location is where
-                                the pipeline will create all of its output files, also
-                                known as the pipeline's working directory. If the user
-                                provided working directory has not been initialized,
-                                it will be created automatically.
-                                Example: --output /data/$USER/WES_hg38
-          --genome {hg38, mm10, ...}
-                                Reference genome. This option defines the reference
-                                genome of the samples. Currently, hg38 and mm10 are supported.
-                                Support for additional and custom genomes will be added soon.
-                                Example: --genome hg38
-          --targets TARGETS
-                                Path to exome targets BED file. This file can be
-                                obtained from the manufacturer of the target capture
-                                kit that was used.
-
-        """)
-
-    # Display example usage in epilog
-    run_epilog = textwrap.dedent("""\
-        example:
-          # Step 1.) Grab an interactive node (do not run on head node)
-          sinteractive --mem=8g --cpus-per-task=4
-          module purge
-          module load singularity snakemake
-
-          # Step 2A.) Dry-run the pipeline
-          ./exome-seek run --input .tests/*.R?.fastq.gz \\
-                         --output /data/$USER/WES_hg38 \\
-                         --genome hg38 \\
-                         --targets Agilent_SSv7_allExons_hg38.bed \\
-                         --mode slurm \\
-                         --dry-run
-
-          # Step 2B.) Run the GATK4 WES pipeline
-          # The slurm mode will submit jobs to the cluster.
-          # It is recommended running exome-seek in this mode.
-          ./exome-seek run --input .tests/*.R?.fastq.gz \\
-                         --output /data/$USER/WES_hg38 \\
-                         --genome hg38 \\
-                         --targets Agilent_SSv7_allExons_hg38.bed \\
-                         --mode slurm
-
-        version:
-          {}
-        """)#.format(__version__))
-
-    # Supressing help message of required args to overcome no sub-parser named groups
-    subparser_run = subparsers.add_parser('run',
-        help = 'Run the GATK4 WES pipeline with input files.',
-        usage = argparse.SUPPRESS,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        description = required_run_options,
-        epilog  = run_epilog
-    )
-
-    # Required Arguments
-    # Input FastQ files
-    subparser_run.add_argument('--input',
-        # Check if the file exists and if it is readable
-        type = lambda file: permissions(parser, file, os.R_OK),
-        required = True,
-        nargs = '+',
-        help = argparse.SUPPRESS
-    )
-
-    # Output Directory (analysis working directory)
-    subparser_run.add_argument('--output',
-        type = lambda option: os.path.abspath(os.path.expanduser(option)),
-        required = True,
-        help = argparse.SUPPRESS
-    )
-    
-    # Reference Genome (to dynamically select reference files)
-    subparser_run.add_argument('--genome',
-        required = True,
-        #choices = ['hg38', 'mm10'],
-        type = lambda option: str(genome_options(subparser_run, option, ['hg38','mm10'])),
-        help = argparse.SUPPRESS
-    )
-    
-    # Exome TARGET BED file
-    subparser_run.add_argument('--targets',
-        # Check if the file exists and if it is readable
-        type = lambda file: permissions(parser, file, os.R_OK),
-        required = True,
-        help = argparse.SUPPRESS
-    )
-
-    # Optional Arguments
-    # Execution Method (run locally on a compute node, submit to SLURM job scheduler, etc.)
-    subparser_run.add_argument('--mode',
-        type = str,
-        required = False,
-        default = "biowulf",
-        choices = ['biowulf', 'local'],
-        help = 'Execution Method [Default: slurm]. Defines the mode or method of execution. \
-        Vaild mode options include: local or slurm. \
-        local: uses local method of execution. local executions will run serially on \
-        compute instance. This is useful for testing, debugging, or when a users does \
-        not have access to a high performance computing environment. If this option is \
-        not provided, it will default to a local execution mode. \
-        slurm: uses slurm and singularity backend. The slurm execution method will submit \
-        jobs to a cluster. It is recommended running exome-seek in this mode as execution \
-        will be significantly faster in a distributed environment. \
-        Example: --mode slurm'
-    )
-
-    # Name of master job
-    subparser_run.add_argument('--job-name',
-        type = str,
-        required = False,
-        default = 'pl:wgs-seek',
-        help = 'Set the name of the pipeline\'s master job. \
-        When submitting the pipeline to a job scheduler, like SLURM, \
-        this option always you to set the name of the pipeline\'s master \
-        job. By default, the name of the pipeline\'s master job \
-        is set to "pl:exome-seek". \
-        Example: --job-name WES_hg38_main'
-    )
-
-
-
-    # Tumor normal pairs file
-    subparser_run.add_argument('--pairs',
-        # Check if the file exists and if it is readable
-        type = lambda file: permissions(parser, file, os.R_OK),
-        required = False,
-        help = 'Tumor normal pairs file. This tab delimited file contains two columns with the names \
-        of tumor and normal pairs, one per line. The header of the file needs to be "Tumor" for the \
-        tumor column and "Normal" for the normal column.'
-    )
-
-    # Correction for FFPE samples 
-    subparser_run.add_argument('--ffpe',
-        action = 'store_true',
-        required = False,
-        default = False,
-        help = 'FFPE correction. Runs an additional filtering step for Formalin-Fixed Paraffin-Embedded \
-        (FFPE) samples. Do NOT use this option with non-FFPE samples.'
-    )
-
-    # Call CNVs
-    subparser_run.add_argument('--cnv',
-        action = 'store_true',
-        required = False,
-        default = False,
-        help = 'Call copy number variations or CNVs. CNVs will only be called from tumor-normal pairs. \
-        If this option is provided without providing a --pairs file, CNVs will NOT be called.'
-    )
-
-
-    # Silent output mode
-    subparser_run.add_argument('--silent',
-        action = 'store_true',
-        required = False,
-        default = False,
-        help = 'Silence standard output. Reduces the amount of information directed \
-        to standard output when submitting master job to the job scheduler. Only the \
-        job id of the master job is returned.'
-    )
-    
-
-    # Base directory to write temporary files 
-    subparser_run.add_argument('--tmp-dir',
-        type = str,
-        required = False,
-        default = '/lscratch/$SLURM_JOBID/',
-        help = 'Path on the filesystem for writing intermediate, temporary output \
-        files. By default, this variable is set to \'/lscratch/$SLURM_JOBID\' \
-        for backwards compatibility with the NIH\'s Biowulf cluster; however, \
-        if you are running the pipeline on another cluster, this option will \
-        need to be specified. Ideally, this path should point to a dedicated \
-        location on the filesystem for writing tmp files. On many systems, this \
-        location is set to somewhere in /scratch. If you need to inject a variable \
-        into this string that should NOT be expanded, please quote this options \
-        value in single quotes. As an example, on the NCI/NIH FRCE cluster the \
-        value of this option would be set to \
-        --tmp-dir \'/scratch/cluster_scratch/$USER/\', \
-        default:  \'/lscratch/$SLURM_JOBID/\''
-    )
-
-    # Number of threads for the exome-seek pipeline's main proceess
-    subparser_run.add_argument('--threads',
-        type = int,
-        required = False,
-        default = 2,
-        help = 'Max number of threads for local processes. It is recommended \
-        setting this vaule to the maximum number of CPUs available on the host \
-        machine, default: 2.'
-    )
-
-
-
-    # Define handlers for each sub-parser
-    subparser_run.set_defaults(func = run)
-    #subparser_unlock.set_defaults(func = unlock)
-    #subparser_cache.set_defaults(func = cache)
-
-    # Parse command-line args
-    args = parser.parse_args()
-    return args
-
-
-def main():
-
-    # Collect args for sub-command
-    args = parsed_arguments()
-
-    # Display version information
-    #err('wgs-seek ({})'.format(__version__))
-    # Mediator method to call sub-command's set handler function
-    args.func(args)
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file