Merge pull request #47 from CCBR/cnvcalllist

CNV callers
CCBR · May 29, 2024 · 76043b0 · 76043b0
2 parents 01e6a3e + 21133f7
commit 76043b0
Show file tree

Hide file tree

Showing 9 changed files with 293 additions and 116 deletions.
diff --git a/conf/base.config b/conf/base.config
@@ -59,7 +59,7 @@ process {
         time   = { check_max( 72.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_somaticcaller_high {
-        cpus   = { check_max( 17     * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 18     * task.attempt, 'cpus'    ) }
         memory = { check_max( 96.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 72.h   * task.attempt, 'time'    ) }
     }

diff --git a/conf/biowulf.config b/conf/biowulf.config
@@ -22,7 +22,7 @@ singularity {
     autoMounts = true
     cacheDir = "/data/CCBR_Pipeliner/SIFS"
     envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
-    runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+    runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
 }
 
 env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"

diff --git a/conf/genomes.config b/conf/genomes.config
@@ -1,8 +1,8 @@
 params {
     genomes {
         'hg38' {
-            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta"
-            genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai"
+            genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa"
+            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa.fai"
             bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
             genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
             wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
@@ -28,14 +28,22 @@ params {
             octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
             SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
+            //PURPLE
+            GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz"
+            GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp"
+            DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
+            ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
+            DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
+            HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz'
+
         }
 
         'hg19' {
             genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa"
             genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai"
             bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
             genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict"
-            intervals= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg19/hg19_noblacklistsort_vc.bed"
+            intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed"
             INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" 
             KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf"
             KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
@@ -53,10 +61,18 @@ params {
             vepspecies = "homo_sapiens"
             vepbuild = "GRCh37"
             annotsvgenome = "GRCh37"
-            octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
-            octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
+            octopus_sforest= "" //"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
+            octopus_gforest= "" //"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
             SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
+             //PURPLE
+            GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz"
+            GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp"
+            DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
+            ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
+            DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
+            HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz'
+
         }
 
         'mm10' {
@@ -93,6 +109,7 @@ params {
                 FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz"
             }
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM']
+
         }
     }
 }
diff --git a/main.nf b/main.nf
@@ -17,12 +17,13 @@ log.info """\
 
 
 include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL;
-    VC; SV; CNVmouse; CNVhuman;
+    VC; SV; CNVmouse; CNVhuman; CNVhuman_novc;
     QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
 include {INPUT_TONLY; INPUT_TONLY_BAM;
     ALIGN_TONLY;
-    VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
+    VC_TONLY; SV_TONLY; CNVmouse_tonly;  CNVhuman_tonly; CNVhuman_novc_tonly;
+    QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
 
 
 workflow.onComplete {
@@ -56,8 +57,7 @@ workflow {
                 CNVmouse(ALIGN.out.bamwithsample)
             } else if (params.genome== "hg38"){
                 if (!params.vc){
-                    VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
-                    CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
+                    CNVhuman_novc(ALIGN.out.bamwithsample)
                 } else {
                     CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
                 }
@@ -73,7 +73,7 @@ workflow {
 
     //TUMOR-NOMRAL BAM INPUT
     if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){
-        println "Tumor-Normal with BAMs"
+        println "Tumor-Normal BAM"
         INPUT_BAM()
         if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
@@ -84,10 +84,9 @@ workflow {
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse(INPUT_BAM.out.bamwithsample)
-            } else if (params.genome== "hg38"){
+            } else if (params.genome == "hg38"){
                 if (!params.vc){
-                    VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
-                    CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
+                    CNVhuman_novc(INPUT_BAM.out.bamwithsample)
                 }else {
                     CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
                 }

diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
@@ -14,15 +14,14 @@ if (params.genome=="mm10"){
     FREECPLOT = params.freec_plot
 }
 
-GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz"
-GCPROFILE='/data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp'
-DIPLODREG='/data/SCLC-BRAINMETS/cn/copy_number/DiploidRegions.38.bed.gz'
-ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
-DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
-HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'
-
-//ascatR=
-
+if (params.genome=="hg38" | params.genome=="hg19"){
+    GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET)
+    GCPROFILE=file(params.genomes[params.genome].GCPROFILE)
+    DIPLODREG=file(params.genomes[params.genome].DIPLODREG)
+    ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE)
+    DRIVERS=file(params.genomes[params.genome].DRIVERS)
+    HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS)
+}
 
 //mm10 Paired-Sequenza, FREEC-tumor only
 process seqz_sequenza_bychr {
@@ -81,9 +80,6 @@ process sequenza {
         path("${pairid}_gc_plots.pdf"),
         path("${pairid}_sequenza_extract.RData")
 
-    //samtools mpileup ${tumor} -f $GENOMEREF -Q 20 |gzip > ${tumorname}.mpileup.gz
-    //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz
-    //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz
 
     shell:
     '''
@@ -263,11 +259,7 @@ process amber_tonly {
 
     output:
         tuple val(tumorname), path("${tumorname}_amber")
-        //path("${samplename}.amber.baf.tsv.gz"),
-        //path("${samplename}.amber.baf.pcf"),
-        //path("${samplename}.amber.qc")
-        //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor
-
+
     script:
 
     """
@@ -300,11 +292,7 @@ process amber_tn {
 
     output:
         tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber")
-        //path("${samplename}.amber.baf.tsv.gz"),
-        //path("${samplename}.amber.baf.pcf"),
-        //path("${samplename}.amber.qc")
-        //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor
-
+
     script:
 
     """
@@ -337,9 +325,6 @@ process cobalt_tonly {
 
     output:
         tuple val(tumorname), path("${tumorname}_cobalt")
-        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
-        //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
-        //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
 
     script:
 
@@ -373,20 +358,15 @@ process cobalt_tn {
 
     output:
         tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt")
-        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
-        //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
-        //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
 
     script:
 
     """
-
     java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
     -tumor ${tumorname} -tumor_bam ${tumorname} \
     -reference ${normalname} -reference_bam ${normal} \
     -output_dir ${tumorname}_vs_${normalname}_cobalt \
     -threads $task.cpus \
-    -tumor_only_diploid_bed $DIPLODREG \
     -gc_profile $GCPROFILE
 
     """
@@ -405,11 +385,85 @@ process purple {
     label 'process_medium'
 
     input:
-        tuple val(tumorname),
-        path(cobaltin),
-        path(amberin),
-        path(somaticvcf),
-        path(somaticvcfindex)
+        tuple val(tumorname), val(normalname),
+        path(cobaltin), path(amberin),
+        path(somaticvcf), path(somaticvcfindex)
+
+    output:
+        tuple val(tumorname), path("${tumorname}")
+
+    script:
+
+    """
+    java -jar /opt2/hmftools/purple.jar \
+    -tumor ${tumorname} \
+    -reference ${normalname} \
+    -amber ${amberin} \
+    -cobalt ${cobaltin} \
+    -gc_profile $GCPROFILE \
+    -ref_genome_version 38 \
+    -ref_genome $GENOME \
+    -ensembl_data_dir $ENSEMBLCACHE \
+    -somatic_vcf ${somaticvcf} \
+    -driver_gene_panel $DRIVERS \
+    -somatic_hotspots $HOTSPOTS \
+    -output_dir ${tumorname}
+    """
+
+    stub:
+
+    """
+    mkdir ${tumorname}
+    touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
+    """
+
+}
+
+
+process purple_novc {
+    container = "${params.containers.logan}"
+    label 'process_medium'
+
+    input:
+        tuple val(tumorname), val(normalname),
+        path(cobaltin), path(amberin)
+
+    output:
+        tuple val(tumorname), path("${tumorname}")
+
+    script:
+
+    """
+    java -jar /opt2/hmftools/purple.jar \
+    -tumor ${tumorname} \
+    -reference ${normalname} \
+    -amber ${amberin} \
+    -cobalt ${cobaltin} \
+    -gc_profile $GCPROFILE \
+    -ref_genome_version 38 \
+    -ref_genome $GENOME \
+    -ensembl_data_dir $ENSEMBLCACHE \
+    -output_dir ${tumorname}
+    """
+
+    stub:
+
+    """
+    mkdir ${tumorname}
+    touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
+    """
+
+}
+
+
+process purple_tonly {
+    container = "${params.containers.logan}"
+    label 'process_medium'
+
+    input:
+        tuple val(tumorname), 
+        path(cobaltin), path(amberin),
+        path(somaticvcf), path(somaticvcfindex)
 
     output:
         tuple val(tumorname), path("${tumorname}")
@@ -440,6 +494,41 @@ process purple {
 
 }
 
+
+process purple_tonly_novc {
+    container = "${params.containers.logan}"
+    label 'process_medium'
+
+    input:
+        tuple val(tumorname), val(normalname),
+        path(cobaltin), path(amberin)
+
+    output:
+        tuple val(tumorname), path("${tumorname}")
+
+    script:
+
+    """
+    java -jar /opt2/hmftools/purple.jar \
+    -tumor ${tumorname} \
+    -amber ${amberin} \
+    -cobalt ${cobaltin} \
+    -gc_profile $GCPROFILE \
+    -ref_genome_version 38 \
+    -ref_genome $GENOME \
+    -ensembl_data_dir $ENSEMBLCACHE \
+    -output_dir ${tumorname}
+    """
+
+    stub:
+
+    """
+    mkdir ${tumorname}
+    touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
+    """
+
+}
+
 /*
 process ascat_tn {
     module=["java/12.0.1","R/3.6.3"]

diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
@@ -338,8 +338,7 @@ process octopus_tonly {
     """
     octopus -R $GENOMEREF -C cancer -I ${tumor} \
     --annotations AF AC AD DP \
-    --target-working-memory 92Gb \
-    -B 90Gb \
+    -B 92Gb \
     -t ${bed} \
     --threads ${task.cpus}\
     $SOMATIC_FOREST \

diff --git a/nextflow.config b/nextflow.config
@@ -52,6 +52,9 @@ params {
     bam_input=null
     BAMINPUT=null
 
+    callers = "mutect2,octopus,vardict,varscan"
+    cnvcallers= "purple,sequenza,freec"
+
     publish_dir_mode = 'symlink'
     outdir = 'results'
 
@@ -74,7 +77,7 @@ profiles {
         autoMounts = true
         cacheDir = "$PWD/singularity"
         envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
-        runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+        runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
     }
     biowulf {
         includeConfig 'conf/biowulf.config'