Skip to content

Commit

Permalink
Merge pull request #47 from CCBR/cnvcalllist
Browse files Browse the repository at this point in the history
CNV callers
  • Loading branch information
dnousome authored May 29, 2024
2 parents 01e6a3e + 21133f7 commit 76043b0
Show file tree
Hide file tree
Showing 9 changed files with 293 additions and 116 deletions.
2 changes: 1 addition & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ process {
time = { check_max( 72.h * task.attempt, 'time' ) }
}
withLabel:process_somaticcaller_high {
cpus = { check_max( 17 * task.attempt, 'cpus' ) }
cpus = { check_max( 18 * task.attempt, 'cpus' ) }
memory = { check_max( 96.GB * task.attempt, 'memory' ) }
time = { check_max( 72.h * task.attempt, 'time' ) }
}
Expand Down
2 changes: 1 addition & 1 deletion conf/biowulf.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ singularity {
autoMounts = true
cacheDir = "/data/CCBR_Pipeliner/SIFS"
envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
}

env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"
Expand Down
27 changes: 22 additions & 5 deletions conf/genomes.config
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
params {
genomes {
'hg38' {
genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta"
genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai"
genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa"
genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa.fai"
bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
Expand All @@ -28,14 +28,22 @@ params {
octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
//PURPLE
GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz"
GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp"
DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz'

}

'hg19' {
genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa"
genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai"
bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict"
intervals= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg19/hg19_noblacklistsort_vc.bed"
intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed"
INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf"
KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
Expand All @@ -53,10 +61,18 @@ params {
vepspecies = "homo_sapiens"
vepbuild = "GRCh37"
annotsvgenome = "GRCh37"
octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
octopus_sforest= "" //"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
octopus_gforest= "" //"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
//PURPLE
GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz"
GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp"
DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz'

}

'mm10' {
Expand Down Expand Up @@ -93,6 +109,7 @@ params {
FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz"
}
chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM']

}
}
}
15 changes: 7 additions & 8 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ log.info """\


include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL;
VC; SV; CNVmouse; CNVhuman;
VC; SV; CNVmouse; CNVhuman; CNVhuman_novc;
QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"

include {INPUT_TONLY; INPUT_TONLY_BAM;
ALIGN_TONLY;
VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
VC_TONLY; SV_TONLY; CNVmouse_tonly; CNVhuman_tonly; CNVhuman_novc_tonly;
QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"


workflow.onComplete {
Expand Down Expand Up @@ -56,8 +57,7 @@ workflow {
CNVmouse(ALIGN.out.bamwithsample)
} else if (params.genome== "hg38"){
if (!params.vc){
VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
CNVhuman_novc(ALIGN.out.bamwithsample)
} else {
CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
}
Expand All @@ -73,7 +73,7 @@ workflow {

//TUMOR-NOMRAL BAM INPUT
if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){
println "Tumor-Normal with BAMs"
println "Tumor-Normal BAM"
INPUT_BAM()
if (params.vc){
VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
Expand All @@ -84,10 +84,9 @@ workflow {
if (params.cnv){
if (params.genome == "mm10"){
CNVmouse(INPUT_BAM.out.bamwithsample)
} else if (params.genome== "hg38"){
} else if (params.genome == "hg38"){
if (!params.vc){
VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
CNVhuman_novc(INPUT_BAM.out.bamwithsample)
}else {
CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
}
Expand Down
159 changes: 124 additions & 35 deletions modules/local/copynumber.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ if (params.genome=="mm10"){
FREECPLOT = params.freec_plot
}

GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz"
GCPROFILE='/data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp'
DIPLODREG='/data/SCLC-BRAINMETS/cn/copy_number/DiploidRegions.38.bed.gz'
ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'

//ascatR=

if (params.genome=="hg38" | params.genome=="hg19"){
GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET)
GCPROFILE=file(params.genomes[params.genome].GCPROFILE)
DIPLODREG=file(params.genomes[params.genome].DIPLODREG)
ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE)
DRIVERS=file(params.genomes[params.genome].DRIVERS)
HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS)
}

//mm10 Paired-Sequenza, FREEC-tumor only
process seqz_sequenza_bychr {
Expand Down Expand Up @@ -81,9 +80,6 @@ process sequenza {
path("${pairid}_gc_plots.pdf"),
path("${pairid}_sequenza_extract.RData")

//samtools mpileup ${tumor} -f $GENOMEREF -Q 20 |gzip > ${tumorname}.mpileup.gz
//samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz
//sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz

shell:
'''
Expand Down Expand Up @@ -263,11 +259,7 @@ process amber_tonly {

output:
tuple val(tumorname), path("${tumorname}_amber")
//path("${samplename}.amber.baf.tsv.gz"),
//path("${samplename}.amber.baf.pcf"),
//path("${samplename}.amber.qc")
//path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor


script:

"""
Expand Down Expand Up @@ -300,11 +292,7 @@ process amber_tn {

output:
tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber")
//path("${samplename}.amber.baf.tsv.gz"),
//path("${samplename}.amber.baf.pcf"),
//path("${samplename}.amber.qc")
//path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor


script:

"""
Expand Down Expand Up @@ -337,9 +325,6 @@ process cobalt_tonly {

output:
tuple val(tumorname), path("${tumorname}_cobalt")
//path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
//path("${samplename}/${samplename}.cobalt.ratio.pcf"),
//path("${samplename}/${samplename}.cobalt.gc.median.tsv")

script:

Expand Down Expand Up @@ -373,20 +358,15 @@ process cobalt_tn {

output:
tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt")
//path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
//path("${samplename}/${samplename}.cobalt.ratio.pcf"),
//path("${samplename}/${samplename}.cobalt.gc.median.tsv")

script:

"""
java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
-tumor ${tumorname} -tumor_bam ${tumorname} \
-reference ${normalname} -reference_bam ${normal} \
-output_dir ${tumorname}_vs_${normalname}_cobalt \
-threads $task.cpus \
-tumor_only_diploid_bed $DIPLODREG \
-gc_profile $GCPROFILE
"""
Expand All @@ -405,11 +385,85 @@ process purple {
label 'process_medium'

input:
tuple val(tumorname),
path(cobaltin),
path(amberin),
path(somaticvcf),
path(somaticvcfindex)
tuple val(tumorname), val(normalname),
path(cobaltin), path(amberin),
path(somaticvcf), path(somaticvcfindex)

output:
tuple val(tumorname), path("${tumorname}")

script:

"""
java -jar /opt2/hmftools/purple.jar \
-tumor ${tumorname} \
-reference ${normalname} \
-amber ${amberin} \
-cobalt ${cobaltin} \
-gc_profile $GCPROFILE \
-ref_genome_version 38 \
-ref_genome $GENOME \
-ensembl_data_dir $ENSEMBLCACHE \
-somatic_vcf ${somaticvcf} \
-driver_gene_panel $DRIVERS \
-somatic_hotspots $HOTSPOTS \
-output_dir ${tumorname}
"""

stub:

"""
mkdir ${tumorname}
touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
"""

}


process purple_novc {
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple val(tumorname), val(normalname),
path(cobaltin), path(amberin)

output:
tuple val(tumorname), path("${tumorname}")

script:

"""
java -jar /opt2/hmftools/purple.jar \
-tumor ${tumorname} \
-reference ${normalname} \
-amber ${amberin} \
-cobalt ${cobaltin} \
-gc_profile $GCPROFILE \
-ref_genome_version 38 \
-ref_genome $GENOME \
-ensembl_data_dir $ENSEMBLCACHE \
-output_dir ${tumorname}
"""

stub:

"""
mkdir ${tumorname}
touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
"""

}


process purple_tonly {
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple val(tumorname),
path(cobaltin), path(amberin),
path(somaticvcf), path(somaticvcfindex)

output:
tuple val(tumorname), path("${tumorname}")
Expand Down Expand Up @@ -440,6 +494,41 @@ process purple {

}


process purple_tonly_novc {
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple val(tumorname), val(normalname),
path(cobaltin), path(amberin)

output:
tuple val(tumorname), path("${tumorname}")

script:

"""
java -jar /opt2/hmftools/purple.jar \
-tumor ${tumorname} \
-amber ${amberin} \
-cobalt ${cobaltin} \
-gc_profile $GCPROFILE \
-ref_genome_version 38 \
-ref_genome $GENOME \
-ensembl_data_dir $ENSEMBLCACHE \
-output_dir ${tumorname}
"""

stub:

"""
mkdir ${tumorname}
touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
"""

}

/*
process ascat_tn {
module=["java/12.0.1","R/3.6.3"]
Expand Down
3 changes: 1 addition & 2 deletions modules/local/variant_calling_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,7 @@ process octopus_tonly {
"""
octopus -R $GENOMEREF -C cancer -I ${tumor} \
--annotations AF AC AD DP \
--target-working-memory 92Gb \
-B 90Gb \
-B 92Gb \
-t ${bed} \
--threads ${task.cpus}\
$SOMATIC_FOREST \
Expand Down
5 changes: 4 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ params {
bam_input=null
BAMINPUT=null

callers = "mutect2,octopus,vardict,varscan"
cnvcallers= "purple,sequenza,freec"

publish_dir_mode = 'symlink'
outdir = 'results'

Expand All @@ -74,7 +77,7 @@ profiles {
autoMounts = true
cacheDir = "$PWD/singularity"
envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
}
biowulf {
includeConfig 'conf/biowulf.config'
Expand Down
Loading

0 comments on commit 76043b0

Please sign in to comment.