From 8c593b033ecf6d060a1ee50bad5ac95d48b90bff Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 21 May 2024 15:22:39 -0400 Subject: [PATCH 1/7] feat: cnv callers --- conf/biowulf.config | 2 +- conf/genomes.config | 4 +- main.nf | 15 ++-- modules/local/copynumber.nf | 36 +++++++++ nextflow.config | 5 +- subworkflows/local/workflows.nf | 111 +++++++++++++++++--------- subworkflows/local/workflows_tonly.nf | 67 +++++++++++----- 7 files changed, 168 insertions(+), 72 deletions(-) diff --git a/conf/biowulf.config b/conf/biowulf.config index 77a06d5..d28a1ac 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -22,7 +22,7 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" diff --git a/conf/genomes.config b/conf/genomes.config index 3053157..7682167 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -1,8 +1,8 @@ params { genomes { 'hg38' { - genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" - genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" + genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa.fai" bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" diff --git a/main.nf b/main.nf index 4622d07..3009c56 100644 --- a/main.nf +++ b/main.nf @@ -17,12 +17,13 @@ log.info """\ include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL; - VC; SV; CNVmouse; CNVhuman; + VC; SV; CNVmouse; CNVhuman; CNVhuman_novc; QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" include {INPUT_TONLY; INPUT_TONLY_BAM; ALIGN_TONLY; - VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" + VC_TONLY; SV_TONLY; CNVmouse_tonly; CNVhuman_tonly; CNVhuman_novc_tonly; + QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" workflow.onComplete { @@ -56,8 +57,7 @@ workflow { CNVmouse(ALIGN.out.bamwithsample) } else if (params.genome== "hg38"){ if (!params.vc){ - VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) - CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) + CNVhuman_novc(ALIGN.out.bamwithsample) } else { CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) } @@ -73,7 +73,7 @@ workflow { //TUMOR-NOMRAL BAM INPUT if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){ - println "Tumor-Normal with BAMs" + println "Tumor-Normal BAM" INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) @@ -84,10 +84,9 @@ workflow { if (params.cnv){ if (params.genome == "mm10"){ CNVmouse(INPUT_BAM.out.bamwithsample) - } else if (params.genome== "hg38"){ + } else if (params.genome == "hg38"){ if (!params.vc){ - VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) - CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) + CNVhuman_novc(INPUT_BAM.out.bamwithsample) }else { CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 9cd1e27..27abe0a 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -440,6 +440,42 @@ process purple { } + +process purple_novc { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), + path(cobaltin), + path(amberin) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version 38 \ + -ref_genome $GENOME \ + -ensembl_data_dir $ENSEMBLCACHE \ + -output_dir ${tumorname} + """ + + stub: + + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv + """ + +} + /* process ascat_tn { module=["java/12.0.1","R/3.6.3"] diff --git a/nextflow.config b/nextflow.config index b317131..d9dac03 100644 --- a/nextflow.config +++ b/nextflow.config @@ -52,6 +52,9 @@ params { bam_input=null BAMINPUT=null + callers = "mutect2,octopus,vardict,varscan" + cnvcallers= "purple,sequenza,freec" + publish_dir_mode = 'symlink' outdir = 'results' @@ -74,7 +77,7 @@ profiles { autoMounts = true cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } biowulf { includeConfig 'conf/biowulf.config' diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 153c230..63a1afe 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -42,7 +42,7 @@ include {svaba_somatic; manta_somatic; annotsv_tn as annotsv_survivor_tn annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf' -include {amber_tn; cobalt_tn; purple; +include {amber_tn; cobalt_tn; purple;purple_novc; sequenza; seqz_sequenza_bychr; freec; freec_paired } from '../../modules/local/copynumber.nf' include {splitinterval} from '../../modules/local/splitbed.nf' @@ -160,7 +160,7 @@ workflow VC { main: //Create Pairing for TN (in case of dups) - sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} + sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} |view() bambyinterval=bamwithsample.combine(splitout.flatten()) bambyinterval @@ -174,14 +174,13 @@ workflow VC { concat(bambyinterval_tonly.n1) |unique() //Prep Pileups - params.callers = "mutect2,octopus,muse,lofreq,vardict,varscan" - params.callist = params.callers.split(',') as List + call_list = params.callers.split(',') as List vc_all=Channel.empty() vc_tonly=Channel.empty() //Common for Mutect2/Varscan - if ("mutect2" in params.callist | "varscan" in params.callist){ + if ("mutect2" in call_list | "varscan" in call_list){ pileup_paired_t(bambyinterval) pileup_paired_n(bambyinterval) @@ -210,7 +209,7 @@ workflow VC { contamination_tumoronly(pileup_all) } - if ("mutect2" in params.callist){ + if ("mutect2" in call_list){ //Paired Mutect2 mutect2(bambyinterval) mutect2.out.groupTuple(by:[0,1]) @@ -269,7 +268,7 @@ workflow VC { } - if ("strelka" in params.callist){ + if ("strelka" in call_list){ //Strelka TN strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1]) | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}", @@ -284,7 +283,7 @@ workflow VC { } - if ("vardict" in params.callist){ + if ("vardict" in call_list){ //Vardict TN vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1]) | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} @@ -304,7 +303,7 @@ workflow VC { vc_tonly=vc_tonly|concat(vardict_in_tonly) } - if ("varscan" in params.callist){ + if ("varscan" in call_list){ //VarScan TN varscan_in=bambyinterval.combine(contamination_paired.out,by:0) | varscan_tn | groupTuple(by:[0,1]) @@ -327,7 +326,7 @@ workflow VC { } //Lofreq TN - if ("lofreq" in params.callist){ + if ("lofreq" in call_list){ lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} | combineVariants_lofreq | join(sample_sheet_paired) @@ -339,7 +338,7 @@ workflow VC { //MuSE TN - if ("muse" in params.callist){ + if ("muse" in call_list){ muse_in=muse_tn(bamwithsample) | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} | combineVariants_muse | join(sample_sheet_paired) @@ -350,7 +349,7 @@ workflow VC { } //Octopus TN - if ("octopus" in params.callist){ + if ("octopus" in call_list){ octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} @@ -379,26 +378,26 @@ workflow VC { //Combine All Variants Using VCF -> Annotate - if (params.callist.size()>1){ + if (call_list.size()>1){ vc_all | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} | annotvep_tn_combined } - - if (params.callist.size()>1){ + + if (call_list.size()>1){ vc_tonly | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,normal,"combined_tonly",vcf,index)} | annotvep_tn_combined } - + //Implement PCGR Annotator/CivIC Next - if ("octopus" in params.callist){ + if ("octopus" in call_list){ somaticcall_input=octopus_in_sc - }else if("mutect2" in params.callist){ + }else if("mutect2" in call_list){ somaticcall_input=mutect2_in } @@ -436,7 +435,7 @@ workflow SV { workflow CNVmouse { take: bamwithsample - + main: //Sequenza (Preferred for Paired) chrs=Channel.fromList(params.genomes[params.genome].chromosomes) @@ -462,23 +461,59 @@ workflow CNVhuman { bamwithsample somaticcall_input - main: - //Sequenza - chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza + main: + cnvcall_list = params.cnvcallers.split(',') as List + + if ("purple" in cnvcall_list){ + //Purple + bamwithsample | amber_tn + bamwithsample | cobalt_tn + purplein=amber_tn.out.join(cobalt_tn.out) + purplein.join(somaticcall_input)| + map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} + | purple + } - //Purple - bamwithsample | amber_tn - bamwithsample | cobalt_tn - purplein=amber_tn.out.join(cobalt_tn.out) - purplein.join(somaticcall_input)| - map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} - | purple + if ("sequenza" in cnvcall_list){ + //Sequenza + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | sequenza + } + +} + + +workflow CNVhuman_novc { + take: + bamwithsample + + main: + cnvcall_list = params.cnvcallers.split(',') as List + + if ("purple" in cnvcall_list){ + //Purple + bamwithsample | amber_tn + bamwithsample | cobalt_tn + purplein=amber_tn.out |join(cobalt_tn.out) + purplein | map{t1,amber,cobalt,n1 -> tuple(t1,amber,cobalt)} + | purple_novc + } + + if ("sequenza" in cnvcall_list){ + //Sequenza + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | sequenza + } } @@ -600,7 +635,7 @@ workflow INPUT_BAM { row.Tumor, row.Normal ) - } + } } //Either BAM Input or File sheet input @@ -628,7 +663,7 @@ workflow INPUT_BAM { .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai)) - } + } } intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') splitinterval(intervalbedin) @@ -647,7 +682,7 @@ workflow INPUT_BAM { bamwithsample=baminput2.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} } else { - bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} } emit: diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index eeae1e9..f6d9402 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -131,12 +131,9 @@ workflow VC_TONLY { bambyinterval=bamwithsample.combine(splitout.flatten()) //Common steps - params.callers = "mutect2,octopus,vardict,varscan" - params.callist = params.callers.split(',') as List - vc_tonly=Channel.empty() - if ("mutect2" in params.callist | "varscan" in params.callist){ + if ("mutect2" in call_list | "varscan" in call_list){ pileup_paired_tonly(bambyinterval) pileup_paired_tout=pileup_paired_tonly.out.groupTuple() .map{samplename,pileups-> tuple( samplename, @@ -146,7 +143,7 @@ workflow VC_TONLY { } //Mutect2 - if ("mutect2" in params.callist){ + if ("mutect2" in call_list){ mutect2_t_tonly(bambyinterval) mutect2_t_tonly.out.groupTuple() @@ -177,7 +174,7 @@ workflow VC_TONLY { } //VarDict - if ("vardict" in params.callist){ + if ("vardict" in call_list){ vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} | combineVariants_vardict_tonly @@ -189,7 +186,7 @@ workflow VC_TONLY { } //VarScan_tonly - if ("varscan" in params.callist){ + if ("varscan" in call_list){ varscan_in_tonly=bambyinterval.combine(contamination_tumoronly.out,by: 0) | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")} @@ -201,7 +198,7 @@ workflow VC_TONLY { } //Octopus_tonly - if ("octopus" in params.callist){ + if ("octopus" in call_list){ octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus | groupTuple() | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} @@ -215,7 +212,7 @@ workflow VC_TONLY { } //Combined Variants and Annotated - if (params.callist.size()>1){ + if (call_list.size()>1){ vc_tonly | groupTuple() | view() | somaticcombine_tonly @@ -224,13 +221,13 @@ workflow VC_TONLY { } //Emit for SC downstream, take Oc/Mu2/Vard/Varscan - if("octopus" in params.callist){ + if("octopus" in call_list){ somaticcall_input=octopus_in_tonly_sc - }else if("mutect2" in params.callist){ + }else if("mutect2" in call_list){ somaticcall_input=mutect2_in_tonly - }else if("vardict" in params.calllist){ + }else if("vardict" in call_list){ somaticcall_input=vardict_in_tonly - }else if("varscan" in params.calllist){ + }else if("varscan" in call_list){ somaticcall_input=varscan_in_tonly } @@ -280,20 +277,46 @@ workflow CNVhuman_tonly { somaticcall_input main: - //FREEC-Unpaired onlypu - bamwithsample | freec + cnvcall_list = params.cnvcallers.split(',') as List + + if ("freec" in cnvcall_list){ + //FREEC-Unpaired only + bamwithsample | freec + } + + if ("purple" in cnvcall_list){ + //Purple + bamwithsample | amber_tonly + bamwithsample | cobalt_tonly + purplein=amber_tonly.out.join(cobalt_tonly.out) + purplein.join(somaticcall_input)| + map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)} + | purple + } - //Purple - bamwithsample | amber_tonly - bamwithsample | cobalt_tonly - purplein=amber_tonly.out.join(cobalt_tonly.out) - purplein.join(somaticcall_input)| - map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)} - | purple +} +workflow CNVhuman_novc_tonly { + take: + bamwithsample + + main: + if ("freec" in cnvcall_list){ + //FREEC-Unpaired only + bamwithsample | freec + } + if ("purple" in cnvcall_list){ + //Purple + bamwithsample | amber_tonly + bamwithsample | cobalt_tonly + purplein=amber_tonly.out.join(cobalt_tonly.out) + map{t1,amber,cobalt -> tuple(t1,amber,cobalt)} + | purple_novc + } } + workflow QC_TONLY { take: fastqin From 6c1f18c2d4446daa37c120ad3c1c09723ba0bb37 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 21 May 2024 16:04:00 -0400 Subject: [PATCH 2/7] feat: add tumoronly purple calls --- conf/genomes.config | 8 ++ modules/local/copynumber.nf | 111 +++++++++++++++++++++----- subworkflows/local/workflows.nf | 14 +++- subworkflows/local/workflows_tonly.nf | 16 ++-- 4 files changed, 117 insertions(+), 32 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index 7682167..d598da1 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -28,6 +28,14 @@ params { octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + //PURPLE + GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz" + GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp" + DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz' + ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/' + DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv' + HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz' + } 'hg19' { diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 27abe0a..e00ae88 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -14,14 +14,15 @@ if (params.genome=="mm10"){ FREECPLOT = params.freec_plot } -GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz" -GCPROFILE='/data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp' -DIPLODREG='/data/SCLC-BRAINMETS/cn/copy_number/DiploidRegions.38.bed.gz' -ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data' -DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv' -HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' +GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) +GCPROFILE=file(params.genomes[params.genome].GCPROFILE) +DIPLODREG=file(params.genomes[params.genome].DIPLODREG) +ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE) +DRIVERS=file(params.genomes[params.genome].DRIVERS) +HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS) + + -//ascatR= //mm10 Paired-Sequenza, FREEC-tumor only @@ -337,9 +338,6 @@ process cobalt_tonly { output: tuple val(tumorname), path("${tumorname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), - //path("${samplename}/${samplename}.cobalt.ratio.pcf"), - //path("${samplename}/${samplename}.cobalt.gc.median.tsv") script: @@ -373,9 +371,6 @@ process cobalt_tn { output: tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), - //path("${samplename}/${samplename}.cobalt.ratio.pcf"), - //path("${samplename}/${samplename}.cobalt.gc.median.tsv") script: @@ -386,7 +381,6 @@ process cobalt_tn { -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_cobalt \ -threads $task.cpus \ - -tumor_only_diploid_bed $DIPLODREG \ -gc_profile $GCPROFILE """ @@ -405,11 +399,9 @@ process purple { label 'process_medium' input: - tuple val(tumorname), - path(cobaltin), - path(amberin), - path(somaticvcf), - path(somaticvcfindex) + tuple val(tumorname), val(normalname), + path(cobaltin), path(amberin), + path(somaticvcf), path(somaticvcfindex) output: tuple val(tumorname), path("${tumorname}") @@ -419,6 +411,7 @@ process purple { """ java -jar /opt2/hmftools/purple.jar \ -tumor ${tumorname} \ + -reference ${normalname} \ -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ @@ -446,9 +439,83 @@ process purple_novc { label 'process_medium' input: - tuple val(tumorname), - path(cobaltin), - path(amberin) + tuple val(tumorname), val(normalname), + path(cobaltin), path(amberin) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -reference ${normalname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version 38 \ + -ref_genome $GENOME \ + -ensembl_data_dir $ENSEMBLCACHE \ + -output_dir ${tumorname} + """ + + stub: + + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv + """ + +} + + +process purple_tonly { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), + path(cobaltin), path(amberin), + path(somaticvcf), path(somaticvcfindex) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version 38 \ + -ref_genome $GENOME \ + -ensembl_data_dir $ENSEMBLCACHE \ + -somatic_vcf ${somaticvcf} \ + -driver_gene_panel $DRIVERS \ + -somatic_hotspots $HOTSPOTS \ + -output_dir ${tumorname} + """ + + stub: + + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv + """ + +} + + +process purple_tonly_novc { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), val(normalname), + path(cobaltin), path(amberin) output: tuple val(tumorname), path("${tumorname}") diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 63a1afe..713612c 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -42,7 +42,7 @@ include {svaba_somatic; manta_somatic; annotsv_tn as annotsv_survivor_tn annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf' -include {amber_tn; cobalt_tn; purple;purple_novc; +include {amber_tn; cobalt_tn; purple; purple_novc; sequenza; seqz_sequenza_bychr; freec; freec_paired } from '../../modules/local/copynumber.nf' include {splitinterval} from '../../modules/local/splitbed.nf' @@ -437,6 +437,10 @@ workflow CNVmouse { bamwithsample main: + cnvcall_list = params.cnvcallers.split(',') as List + + if ("sequenza" in cnvcall_list){ + //Sequenza (Preferred for Paired) chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> @@ -445,7 +449,9 @@ workflow CNVmouse { seqz_sequenza_bychr.out.groupTuple() .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} | sequenza + } + if ("freec" in cnvcall_list){ //FREEC Paired Mode bamwithsample | freec_paired @@ -453,7 +459,7 @@ workflow CNVmouse { bamwithsample | map{tname,tumor,tbai,nname,norm,nbai->tuple(tname,tumor,tbai)} | freec - + } } workflow CNVhuman { @@ -470,7 +476,7 @@ workflow CNVhuman { bamwithsample | cobalt_tn purplein=amber_tn.out.join(cobalt_tn.out) purplein.join(somaticcall_input)| - map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} + map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,n1,amber,cobalt,vcf,vcfindex)} | purple } @@ -500,7 +506,7 @@ workflow CNVhuman_novc { bamwithsample | amber_tn bamwithsample | cobalt_tn purplein=amber_tn.out |join(cobalt_tn.out) - purplein | map{t1,amber,cobalt,n1 -> tuple(t1,amber,cobalt)} + purplein | map{t1,amber,cobalt,n1 -> tuple(t1,n1,amber,cobalt)} | purple_novc } diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index f6d9402..a099e4f 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -35,7 +35,7 @@ include {manta_tonly; svaba_tonly; survivor_sv; gunzip; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; annotsv_tonly as annotsv_survivor_tonly} from '../../modules/local/structural_variant.nf' -include {freec; amber_tonly; cobalt_tonly; purple } from '../../modules/local/copynumber.nf' +include {freec; amber_tonly; cobalt_tonly; purple_tonly_novc; purple_tonly } from '../../modules/local/copynumber.nf' include {splitinterval} from '../../modules/local/splitbed.nf' @@ -266,8 +266,12 @@ workflow CNVmouse_tonly { take: bamwithsample - main: + main: + cnvcall_list = params.cnvcallers.split(',') as List + + if ("freec" in cnvcall_list){ freec(bamwithsample) + } } @@ -291,7 +295,7 @@ workflow CNVhuman_tonly { purplein=amber_tonly.out.join(cobalt_tonly.out) purplein.join(somaticcall_input)| map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)} - | purple + | purple_tonly } } @@ -302,8 +306,8 @@ workflow CNVhuman_novc_tonly { main: if ("freec" in cnvcall_list){ - //FREEC-Unpaired only - bamwithsample | freec + //FREEC-Unpaired only + bamwithsample | freec } if ("purple" in cnvcall_list){ @@ -312,7 +316,7 @@ workflow CNVhuman_novc_tonly { bamwithsample | cobalt_tonly purplein=amber_tonly.out.join(cobalt_tonly.out) map{t1,amber,cobalt -> tuple(t1,amber,cobalt)} - | purple_novc + | purple_tonly_novc } } From a177856e499884d207fdbb7b95b9db42ba405db7 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 21 May 2024 16:51:24 -0400 Subject: [PATCH 3/7] fix: call_list typo --- subworkflows/local/workflows_tonly.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index a099e4f..fe5ad17 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -131,6 +131,8 @@ workflow VC_TONLY { bambyinterval=bamwithsample.combine(splitout.flatten()) //Common steps + call_list = params.callers.split(',') as List + vc_tonly=Channel.empty() if ("mutect2" in call_list | "varscan" in call_list){ From 29fe134e6cc4fc6ff1de7d169eddd30d12adfc0d Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 21 May 2024 20:22:16 -0400 Subject: [PATCH 4/7] fix: purple only for human --- conf/genomes.config | 13 +++++++++++-- modules/local/copynumber.nf | 18 ++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index d598da1..1c12223 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -61,10 +61,18 @@ params { vepspecies = "homo_sapiens" vepbuild = "GRCh37" annotsvgenome = "GRCh37" - octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" - octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" + octopus_sforest= "" //"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" + octopus_gforest= "" //"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + //PURPLE + GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz" + GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp" + DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz' + ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/' + DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv' + HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz' + } 'mm10' { @@ -101,6 +109,7 @@ params { FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz" } chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM'] + } } } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index e00ae88..2a08ad2 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -14,16 +14,14 @@ if (params.genome=="mm10"){ FREECPLOT = params.freec_plot } -GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) -GCPROFILE=file(params.genomes[params.genome].GCPROFILE) -DIPLODREG=file(params.genomes[params.genome].DIPLODREG) -ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE) -DRIVERS=file(params.genomes[params.genome].DRIVERS) -HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS) - - - - +if (params.genome=="hg38" | params.genome=="hg19"){ + GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) + GCPROFILE=file(params.genomes[params.genome].GCPROFILE) + DIPLODREG=file(params.genomes[params.genome].DIPLODREG) + ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE) + DRIVERS=file(params.genomes[params.genome].DRIVERS) + HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS) +} //mm10 Paired-Sequenza, FREEC-tumor only process seqz_sequenza_bychr { From 7898dfcd57e5026a765d8a4435a922b38dc4d0ca Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 21 May 2024 20:23:04 -0400 Subject: [PATCH 5/7] fix: remove not needed text --- modules/local/copynumber.nf | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 2a08ad2..225ca65 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -80,9 +80,6 @@ process sequenza { path("${pairid}_gc_plots.pdf"), path("${pairid}_sequenza_extract.RData") - //samtools mpileup ${tumor} -f $GENOMEREF -Q 20 |gzip > ${tumorname}.mpileup.gz - //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz - //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz shell: ''' @@ -262,11 +259,7 @@ process amber_tonly { output: tuple val(tumorname), path("${tumorname}_amber") - //path("${samplename}.amber.baf.tsv.gz"), - //path("${samplename}.amber.baf.pcf"), - //path("${samplename}.amber.qc") - //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor - + script: """ @@ -299,11 +292,7 @@ process amber_tn { output: tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber") - //path("${samplename}.amber.baf.tsv.gz"), - //path("${samplename}.amber.baf.pcf"), - //path("${samplename}.amber.qc") - //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor - + script: """ @@ -373,7 +362,6 @@ process cobalt_tn { script: """ - java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ -tumor ${tumorname} -tumor_bam ${tumorname} \ -reference ${normalname} -reference_bam ${normal} \ From 60a359bd598bdc9504716d25b1cccd3c668a7fce Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 23 May 2024 10:05:57 -0400 Subject: [PATCH 6/7] fix: hg19 references --- conf/base.config | 2 +- conf/genomes.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index aaba800..006a35b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,7 @@ process { time = { check_max( 72.h * task.attempt, 'time' ) } } withLabel:process_somaticcaller_high { - cpus = { check_max( 17 * task.attempt, 'cpus' ) } + cpus = { check_max( 18 * task.attempt, 'cpus' ) } memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 72.h * task.attempt, 'time' ) } } diff --git a/conf/genomes.config b/conf/genomes.config index 1c12223..a0f6084 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -43,7 +43,7 @@ params { genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai" bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict" - intervals= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg19/hg19_noblacklistsort_vc.bed" + intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist.bed" INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' From 21133f7c7644ec5b73beb5836714342d8e7a3ea0 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 23 May 2024 14:36:39 -0400 Subject: [PATCH 7/7] fix: change hg19 reference intervals --- conf/genomes.config | 2 +- modules/local/variant_calling_tonly.nf | 3 +-- subworkflows/local/workflows.nf | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index a0f6084..d489f56 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -43,7 +43,7 @@ params { genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai" bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict" - intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist.bed" + intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed" INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 7b9f2ff..530836f 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -338,8 +338,7 @@ process octopus_tonly { """ octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AF AC AD DP \ - --target-working-memory 92Gb \ - -B 90Gb \ + -B 92Gb \ -t ${bed} \ --threads ${task.cpus}\ $SOMATIC_FOREST \ diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 713612c..af9d31f 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -393,7 +393,7 @@ workflow VC { | annotvep_tn_combined } - + //Implement PCGR Annotator/CivIC Next if ("octopus" in call_list){ somaticcall_input=octopus_in_sc