@@ -304,10 +304,19 @@ process strelka_tn {
304
304
--runDir=wd \
305
305
--callRegions ${ bed} .gz
306
306
./wd/runWorkflow.py -m local -j $task . cpus
307
- mv wd/results/variants/somatic.snvs.vcf.gz ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.snvs.vcf.gz
308
- mv wd/results/variants/somatic.indels.vcf.gz ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.indels.vcf.gz
309
- mv wd/results/variants/somatic.snvs.vcf.gz.tbi ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.snvs.vcf.gz.tbi
310
- mv wd/results/variants/somatic.indels.vcf.gz.tbi ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.indels.vcf.gz.tbi
307
+ mv wd/results/variants/somatic.snvs.vcf.gz ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic_temp.snvs.vcf.gz
308
+ mv wd/results/variants/somatic.indels.vcf.gz ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic_temp.indels.vcf.gz
309
+
310
+ printf "NORMAL\t ${ normalname} \n TUMOR\t ${ tumorname} \n " >sampname
311
+
312
+ bcftools reheader -s sampname ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic_temp.snvs.vcf.gz \
313
+ | bcftools view -Oz -o ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.snvs.vcf.gz
314
+ bcftools reheader -s sampname ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic_temp.indels.vcf.gz \
315
+ | bcftools view -Oz -o ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.indels.vcf.gz
316
+
317
+ bcftools index -t ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.snvs.vcf.gz
318
+ bcftools index -t ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .somatic.indels.vcf.gz
319
+
311
320
"""
312
321
313
322
stub:
@@ -329,7 +338,7 @@ process vardict_tn {
329
338
330
339
output:
331
340
tuple val(tumorname), val(normalname),
332
- path(" ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf" )
341
+ path(" ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf.gz " )
333
342
// bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and
334
343
// filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))”
335
344
script:
@@ -351,12 +360,18 @@ process vardict_tn {
351
360
-S \
352
361
-f 0.05 > ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf
353
362
363
+ printf "${ normal.Name} \t ${ normalname} \n ${ tumor.Name} \t ${ tumorname} \n " > sampname
364
+
365
+ bcftools reheader -s sampname ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf \
366
+ | bcftools view -Oz -o ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf.gz
367
+
368
+
354
369
"""
355
370
356
371
stub:
357
372
358
373
"""
359
- touch ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf
374
+ touch ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .vardict.vcf.gz
360
375
361
376
"""
362
377
@@ -376,7 +391,7 @@ process varscan_tn {
376
391
377
392
output:
378
393
tuple val(tumorname), val(normalname),
379
- path(" ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .varscan.vcf" )
394
+ path(" ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .varscan.vcf.gz " )
380
395
381
396
shell:
382
397
'''
@@ -388,23 +403,25 @@ process varscan_tn {
388
403
eval "$varscan_cmd"
389
404
390
405
awk '{{gsub(/\\ y[W|K|Y|R|S|M]\\ y/,"N",$4); OFS = "\\ t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
391
- | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp
406
+ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp. vcf.gz
392
407
awk '{{gsub(/\\ y[W|K|Y|R|S|M]\\ y/,"N",$4); OFS = "\\ t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
393
- | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp
408
+ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp. vcf.gz
394
409
395
- java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \
396
- -R !{GENOMEREF} \
397
- --assumeIdenticalSamples \
398
- --filteredrecordsmergetype KEEP_UNCONDITIONAL \
399
- --variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \
400
- --variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \
401
- -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf
410
+ gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
411
+ -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \
412
+ -R !{GENOMEREF} -SD !{GENOMEDICT} \
413
+ -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
414
+
415
+ printf "NORMAL\t !{normalname}\n TUMOR\t !{tumorname}\n " > sampname
416
+
417
+ bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \
418
+ | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz
402
419
403
420
'''
404
421
405
422
stub:
406
423
"""
407
- touch ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .varscan.vcf
424
+ touch ${ tumor.simpleName} _vs_${ normal.simpleName} _${ bed.simpleName} .varscan.vcf.gz
408
425
"""
409
426
410
427
}
@@ -476,8 +493,15 @@ process lofreq_tn {
476
493
${ tumorname} _vs_${ normalname} _${ bed.simpleName} _somatic_final_minus-dbsnp.indels.vcf.gz --threads $task . cpus -Oz -o \
477
494
${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp_lofreq.vcf.gz
478
495
479
- $LOFREQ_CONVERT ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp_lofreq.vcf.gz ${ tumorname} \
480
- | bcftools view -Oz -o ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _lofreq.vcf.gz
496
+ $LOFREQ_CONVERT -i ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp_lofreq.vcf.gz -g 1/0 \
497
+ -n ${ tumorname} -o ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp1_lofreq.vcf.gz
498
+
499
+ bcftools view -h ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp1_lofreq.vcf.gz >temphead
500
+
501
+ sed 's/^##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">/##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref\\ /fwd, ref\\ /rev, var\\ /fwd, var\\ /rev">/' temphead > temphead1
502
+ bcftools reheader ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _temp1_lofreq.vcf.gz -h temphead1 |\
503
+ bcftools view -Oz -o ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _lofreq.vcf.gz
504
+
481
505
bcftools index -t ${ tumorname} _vs_${ normalname} _${ bed.simpleName} _lofreq.vcf.gz
482
506
483
507
"""
@@ -514,7 +538,13 @@ process muse_tn {
514
538
MuSE sump -I ${ tumorname} _vs_${ normalname} .MuSE.txt \
515
539
-O ${ tumorname} _vs_${ normalname} .vcf -n $task . cpus -D $DBSNP -G
516
540
517
- bcftools view ${ tumorname} _vs_${ normalname} .vcf -Oz -o ${ tumorname} _vs_${ normalname} .vcf.gz
541
+ bcftools view ${ tumorname} _vs_${ normalname} .vcf -Oz -o ${ tumorname} _vs_${ normalname} _temp.vcf.gz
542
+
543
+ printf "NORMAL\t ${ normalname} \n TUMOR\t ${ tumorname} \n " > sampname
544
+
545
+ bcftools reheader -s sampname ${ tumorname} _vs_${ normalname} _temp.vcf.gz \
546
+ | bcftools view -Oz -o ${ tumorname} _vs_${ normalname} .vcf.gz
547
+
518
548
"""
519
549
520
550
stub:
@@ -596,8 +626,8 @@ process combineVariants_alternative {
596
626
"""
597
627
mkdir ${ vc}
598
628
bcftools concat $vcfin -a -Oz -o ${ sample} .${ vc} .temp1.vcf.gz
599
- bcftools reheader -f $GENOMEFAI ${ sample} .${ vc} .temp1.vcf.gz -o ${ sample} .${ vc} .temp.vcf.gz
600
- bcftools sort ${ sample} .${ vc} .temp.vcf.gz -Oz -o ${ sample} .${ vc} .marked.vcf.gz
629
+ bcftools reheader -f $GENOMEFAI ${ sample} .${ vc} .temp1.vcf.gz -o ${ sample} .${ vc} .temp.vcf
630
+ bcftools sort ${ sample} .${ vc} .temp.vcf -Oz -o ${ sample} .${ vc} .marked.vcf.gz
601
631
bcftools norm ${ sample} .${ vc} .marked.vcf.gz -m- --threads $task . cpus --check-ref s -f $GENOMEREF -O v |\
602
632
awk '{{gsub(/\\ y[W|K|Y|R|S|M]\\ y/,"N",\$ 4); OFS = "\\ t"; print}}' |\
603
633
sed '/^\$ /d' > ${ sample} .${ vc} .temp.vcf
@@ -715,12 +745,12 @@ process somaticcombine {
715
745
vcfin1= [callers, vcfs]. transpose(). collect { a , b -> a + " " + b }
716
746
vcfin2= " -V:" + vcfin1. join(" -V:" )
717
747
718
- """
719
- java -jar \$ DISCVRSeq_JAR MergeVcfsAndGenotypes \
720
- -R $G ENOMEREF \
721
- --genotypeMergeOption PRIORITIZE \
722
- --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
723
- --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
748
+ """
749
+ java -jar \$ GATK_JAR -T CombineVariants \
750
+ -nt $t ask . cpus \
751
+ --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
752
+ --genotypemergeoption PRIORITIZE \
753
+ --rod_priority_list mutect2,strelka,muse,lofreq,vardict,varscan \
724
754
-O ${ tumorsample} _vs_${ normal} _combined.vcf.gz \
725
755
$vcfin2
726
756
"""
0 commit comments