Skip to content

Commit 2beaaa8

Browse files
authored
Merge pull request #20 from CCBR/dev/feature-SVCNV
Dev/feature svcnv
2 parents 21d7526 + fc25c2d commit 2beaaa8

8 files changed

+173
-61
lines changed

docker/logan_base/Dockerfile

+10-6
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ WORKDIR /opt2
1717
# This section installs system packages required for your project
1818
# If you need extra system packages add them here.
1919
# python/3.8.0 and python/2.7.16 (strelka and manta)
20-
# JDK 17 for DISCVRSeq
2120
RUN apt-get update \
2221
&& apt-get -y upgrade \
2322
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
24-
bc \
25-
openjdk-17-jdk
23+
bc
2624

2725
# Common bioinformatics tools
2826
# bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1
@@ -54,9 +52,15 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4
5452
&& /opt2/gatk-4.3.0.0/gatk --list
5553
ENV PATH="/opt2/gatk-4.3.0.0:$PATH"
5654

57-
# Use DISCVRSeq For CombineVariants Replacement
58-
RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar
59-
ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar"
55+
# Install last release of GATK3 (GATK/3.8-1)
56+
# Only being used for the CombineVariants
57+
# command that is not available in GATK4
58+
# Available via env variable: $GATK_JAR
59+
# Requires Java8 or 1.8
60+
RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
61+
&& tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
62+
&& rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
63+
ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar"
6064

6165
# Install dependencies needed to add a new repository over HTTPS
6266
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \

nextflow.config

+5-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ params {
2323
script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl"
2424
freec_significance = "${projectDir}/workflow/scripts/assess_significance.R"
2525
freec_plot = "${projectDir}/workflow/scripts/makeGraph.R"
26-
lofreq_convert = "${projectDir}/workflow/scripts/lofreq_convert.sh"
26+
lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh"
2727
vep_cache = "/fdb/VEP/102/cache"
2828

2929
//Biowulf
@@ -84,6 +84,10 @@ profiles {
8484
withLabel: process_somaticcaller {
8585
container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
8686
}
87+
//Name Based
88+
withName:bwamem2 {
89+
container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
90+
}
8791
withName:fastq_screen {
8892
container = 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0'
8993
}

workflow/modules/trim_align.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ process bwamem2 {
6363
${GENOMEREF} \
6464
${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \
6565
samblaster -M | \
66-
samtools sort -@$task.cpus -m 4G - -o ${samplename}.bam
66+
samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam
6767
6868
samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai
6969

workflow/modules/variant_calling.nf

+58-28
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,19 @@ process strelka_tn {
304304
--runDir=wd \
305305
--callRegions ${bed}.gz
306306
./wd/runWorkflow.py -m local -j $task.cpus
307-
mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
308-
mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
309-
mv wd/results/variants/somatic.snvs.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
310-
mv wd/results/variants/somatic.indels.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
307+
mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz
308+
mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz
309+
310+
printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname
311+
312+
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \
313+
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
314+
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \
315+
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
316+
317+
bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
318+
bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
319+
311320
"""
312321

313322
stub:
@@ -329,7 +338,7 @@ process vardict_tn {
329338

330339
output:
331340
tuple val(tumorname), val(normalname),
332-
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf")
341+
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz")
333342
//bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and
334343
//filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))”
335344
script:
@@ -351,12 +360,18 @@ process vardict_tn {
351360
-S \
352361
-f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
353362
363+
printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname
364+
365+
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \
366+
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
367+
368+
354369
"""
355370

356371
stub:
357372

358373
"""
359-
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
374+
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
360375
361376
"""
362377

@@ -376,7 +391,7 @@ process varscan_tn {
376391

377392
output:
378393
tuple val(tumorname), val(normalname),
379-
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf")
394+
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz")
380395

381396
shell:
382397
'''
@@ -388,23 +403,25 @@ process varscan_tn {
388403
eval "$varscan_cmd"
389404
390405
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
391-
| sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp
406+
| sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz
392407
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
393-
| sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp
408+
| sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz
394409
395-
java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \
396-
-R !{GENOMEREF} \
397-
--assumeIdenticalSamples \
398-
--filteredrecordsmergetype KEEP_UNCONDITIONAL \
399-
--variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \
400-
--variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \
401-
-O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf
410+
gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
411+
-I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \
412+
-R !{GENOMEREF} -SD !{GENOMEDICT} \
413+
-O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
414+
415+
printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname
416+
417+
bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \
418+
| bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz
402419
403420
'''
404421

405422
stub:
406423
"""
407-
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf
424+
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz
408425
"""
409426

410427
}
@@ -476,8 +493,15 @@ process lofreq_tn {
476493
${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
477494
${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz
478495
479-
$LOFREQ_CONVERT ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz ${tumorname} \
480-
| bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
496+
$LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \
497+
-n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz
498+
499+
bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead
500+
501+
sed 's/^##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">/##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref\\/fwd, ref\\/rev, var\\/fwd, var\\/rev">/' temphead > temphead1
502+
bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\
503+
bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
504+
481505
bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
482506
483507
"""
@@ -514,7 +538,13 @@ process muse_tn {
514538
MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \
515539
-O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G
516540
517-
bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
541+
bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz
542+
543+
printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname
544+
545+
bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \
546+
| bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
547+
518548
"""
519549

520550
stub:
@@ -596,8 +626,8 @@ process combineVariants_alternative {
596626
"""
597627
mkdir ${vc}
598628
bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
599-
bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf.gz
600-
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
629+
bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
630+
bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz
601631
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
602632
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
603633
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
@@ -715,12 +745,12 @@ process somaticcombine {
715745
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
716746
vcfin2="-V:" + vcfin1.join(" -V:")
717747

718-
"""
719-
java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
720-
-R $GENOMEREF \
721-
--genotypeMergeOption PRIORITIZE \
722-
--priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
723-
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
748+
"""
749+
java -jar \$GATK_JAR -T CombineVariants \
750+
-nt $task.cpus \
751+
--filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
752+
--genotypemergeoption PRIORITIZE \
753+
--rod_priority_list mutect2,strelka,muse,lofreq,vardict,varscan \
724754
-O ${tumorsample}_vs_${normal}_combined.vcf.gz \
725755
$vcfin2
726756
"""

workflow/modules/variant_calling_tonly.nf

+22-12
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ process varscan_tonly {
241241

242242
output:
243243
tuple val(tumorname),
244-
path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf")
244+
path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz")
245245

246246
shell:
247247

@@ -251,13 +251,17 @@ process varscan_tonly {
251251
varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
252252
253253
eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
254+
255+
printf "TUMOR\t!{tumorname}\n" > sampname
256+
257+
bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \
258+
| bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz
259+
254260
'''
255261

256262
stub:
257-
258263
"""
259-
touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf
260-
264+
touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz
261265
"""
262266

263267
}
@@ -270,19 +274,20 @@ process vardict_tonly {
270274

271275
output:
272276
tuple val(tumorname),
273-
path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf")
277+
path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz")
274278

275279
script:
276280

277281
"""
278282
bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed}
283+
279284
VarDict -G $GENOMEREF \
280-
-f 0.05 \
285+
-f 0.01 \
281286
-x 500 \
282287
--nosv \
283288
-b ${tumor} --fisher \
284289
-t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \
285-
-R temp_${bed} | var2vcf_valid.pl \
290+
temp_${bed} | var2vcf_valid.pl \
286291
-N ${tumor} \
287292
-Q 20 \
288293
-d 10 \
@@ -291,12 +296,17 @@ process vardict_tonly {
291296
-E \
292297
-f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
293298
299+
printf "${tumor.Name}\t${tumorname}\n" > sampname
300+
301+
bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \
302+
| bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
303+
294304
"""
295305

296306
stub:
297307

298308
"""
299-
touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
309+
touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
300310
301311
"""
302312

@@ -354,12 +364,12 @@ process somaticcombine_tonly {
354364
vcfin2="-V:" + vcfin1.join(" -V:")
355365

356366
"""
357-
java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
358-
-R $GENOMEREF \
367+
java -jar \$GATK_JAR -T CombineVariants \
368+
-nt $task.cpus \
359369
--genotypeMergeOption PRIORITIZE \
360-
--priority_list mutect2,octopus,vardict,varscan \
370+
--priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \
361371
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
362-
-O ${tumorsample}_combined.vcf.gz \
372+
-O ${tumorsample}_combined_tonly.vcf.gz \
363373
$vcfin2
364374
"""
365375

0 commit comments

Comments
 (0)