diff --git a/Makefile b/Makefile
index ad57ecbe..b8554f01 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ MAKELOG = log/$(@).$(NOW).log
 
 USE_CLUSTER ?= true
 QMAKE = modules/scripts/qmake.pl -n $@.$(NOW) $(if $(SLACK_CHANNEL),-c $(SLACK_CHANNEL)) -r $(NUM_ATTEMPTS) -m -s -- make
-NUM_JOBS ?= 50
+NUM_JOBS ?= 100
 
 define RUN_QMAKE
 $(QMAKE) -e -f $1 -j $2 $(TARGET) && \
@@ -29,11 +29,11 @@ RUN_MAKE = $(if $(findstring false,$(USE_CLUSTER))$(findstring n,$(MAKEFLAGS)),+
 #==================================================
 
 TARGETS += somatic_indels
-somatic_indels:
+somatic_indels :
 	$(call RUN_MAKE,modules/variant_callers/somatic/somaticIndels.mk)
 	
 TARGETS += somatic_variants
-somatic_variants:
+somatic_variants :
 	$(call RUN_MAKE,modules/variant_callers/somatic/somaticVariants.mk)
 	
 
@@ -69,11 +69,11 @@ tophat :
 	$(call RUN_MAKE,modules/aligners/tophatAligner.mk)
 
 TARGETS += star
-star:
+star :
 	$(call RUN_MAKE,modules/aligners/starAligner.mk)
 
 TARGETS += star_fusion_aligner
-star_fusion_aligner:
+star_fusion_aligner :
 	$(call RUN_MAKE,modules/aligners/starFusionAligner.mk)
 	
 TARGETS += blast_reads
@@ -102,11 +102,11 @@ snvmix :
 	$(call RUN_MAKE,modules/variant_callers/snvmix.mk)
 	
 TARGETS += tvcTN
-tvcTN:
+tvcTN :
 	$(call RUN_MAKE,modules/variant_callers/somatic/tvcTN.mk)
 
 TARGETS += tvc
-tvc:
+tvc :
 	$(call RUN_MAKE,modules/variant_callers/tvc.mk)
 
 TARGETS += varscanTN
@@ -150,13 +150,17 @@ samtools_het :
 	$(call RUN_MAKE,modules/variant_callers/samtoolsHet.mk)
 
 TARGETS += platypus
-platypus:
+platypus :
 	$(call RUN_MAKE,modules/variant_callers/somatic/platypus.mk)
 	
 TARGETS += msisensor
 msisensor :
 	$(call RUN_MAKE,modules/variant_callers/somatic/msisensor.mk)	
 
+TARGETS += mimsi
+mimsi :
+	$(call RUN_MAKE,modules/variant_callers/somatic/mimsi.mk)
+
 TARGETS += hla_polysolver
 hla_polysolver :
 	$(call RUN_MAKE,modules/variant_callers/somatic/polysolver.mk)
@@ -170,17 +174,9 @@ museqTN :
 	$(call RUN_MAKE,modules/variant_callers/somatic/museqTN.mk)
 	
 TARGETS += hotspot
-hotspot: 
+hotspot : 
 	$(call RUN_MAKE,modules/variant_callers/hotspot.mk)
 	
-TARGETS += genotype_hotspot
-genotype_hotspot:
-	$(call RUN_MAKE,modules/variant_callers/genotypehotspots.mk)
-	
-TARGETS += genotype_pdx
-genotype_pdx:
-	$(call RUN_MAKE,modules/variant_callers/genotypepdx.mk)
-	
 TARGETS += jsm
 jsm :
 	$(call RUN_MAKE,modules/variant_callers/somatic/jsm.mk)
@@ -188,10 +184,18 @@ jsm :
 TARGETS += sufam
 sufam:
 	$(call RUN_MAKE,modules/variant_callers/sufamsampleset.mk)
+
+TARGETS += sufam_gt
+sufam_gt :
+	$(call RUN_MAKE,modules/variant_callers/sufam_gt.mk)
+
+TARGETS += get_basecount
+get_basecount :
+	$(call RUN_MAKE,modules/variant_callers/get_basecounts.mk)
 	
-TARGETS += sufam_summary
-sufam_summary:
-	$(call RUN_MAKE,modules/variant_callers/sufammultisample.mk)
+TARGETS += strelka_varscan_indels
+strelka_varscan_indels :
+	$(call RUN_MAKE,modules/variant_callers/somatic/strelkaVarscanIndels.mk)
 
 
 #==================================================
@@ -201,7 +205,11 @@ sufam_summary:
 TARGETS += facets
 facets :
 	$(call RUN_MAKE,modules/copy_number/facets.mk)
-	
+
+TARGETS += facets_suite
+facets_suite :
+	$(call RUN_MAKE,modules/copy_number/facets_suite.mk)
+
 TARGETS += ascat
 ascat :
 	$(call RUN_MAKE,modules/copy_number/ascat.mk)
@@ -214,10 +222,6 @@ TARGETS += titan
 titan :
 	$(call RUN_MAKE,modules/copy_number/titan.mk)
 
-TARGETS += strelka_varscan_indels
-strelka_varscan_indels:
-	$(call RUN_MAKE,modules/variant_callers/somatic/strelkaVarscanIndels.mk)
-
 TARGETS += varscan_cnv
 varscan_cnv :
 	$(call RUN_MAKE,modules/copy_number/varscanCNV.mk)
@@ -256,75 +260,15 @@ snp6 :
 	
 TARGETS += cnv_kit
 cnv_kit :
-	$(call RUN_MAKE,modules/test/workflows/cnvkit.mk)
-
-TARGETS += cnvkit_coverage
-cnvkit_coverage :
-	$(call RUN_MAKE,modules/copy_number/cnvkitcoverage.mk)
-	
-TARGETS += cnvkit_reference
-cnvkit_reference :
-	$(call RUN_MAKE,modules/copy_number/cnvkitreference.mk)
-	
-TARGETS += cnvkit_fix
-cnvkit_fix :
-	$(call RUN_MAKE,modules/copy_number/cnvkitfix.mk)
-
-TARGETS += cnvkit_plot
-cnvkit_plot :
-	$(call RUN_MAKE,modules/copy_number/cnvkitplot.mk)
-	
-TARGETS += cnvkit_heatmap
-cnvkit_heatmap :
-	$(call RUN_MAKE,modules/copy_number/cnvkitheatmap.mk)
-	
-TARGETS += cnvkit_pca
-cnvkit_pca :
-	$(call RUN_MAKE,modules/copy_number/cnvkitprcomp.mk)
-	
-TARGETS += cnvkit_qc
-cnvkit_qc :
-	$(call RUN_MAKE,modules/copy_number/cnvkitqc.mk)
-	
-TARGETS += qdna_seq
-qdna_seq :
-	$(call RUN_MAKE,modules/test/workflows/qdnaseq.mk)
-	
-TARGETS += qdnaseq_extract_test
-qdnaseq_extract_test:
-	$(call RUN_MAKE,modules/test/copy_number/qdnaseqextract.mk)
-	
-TARGETS += qdnaseq_copynumber_test
-qdnaseq_copynumber_test:
-	$(call RUN_MAKE,modules/test/copy_number/qdnaseqcopynumber.mk)
-	
-TARGETS += copynumber_summary
-copynumber_summary:
-	$(call RUN_MAKE,modules/test/workflows/copynumber_summary.mk)
-	
-TARGETS += genome_altered
-genome_altered :
-	$(call RUN_MAKE,modules/copy_number/genomealtered.mk)
-	
-TARGETS += lst_score
-lst_score :
-	$(call RUN_MAKE,modules/copy_number/lstscore.mk)
-	
-TARGETS += ntai_score
-ntai_score :
-	$(call RUN_MAKE,modules/copy_number/ntaiscore.mk)
-	
-TARGETS += myriad_score
-myriad_score :
-	$(call RUN_MAKE,modules/copy_number/myriadhrdscore.mk)
+	$(call RUN_MAKE,modules/copy_number/cnvkit.mk)
 
 
 #==================================================
-# structural variant callers
+# RNAseq structural variant callers
 #==================================================
 
 TARGETS += star_fusion
-star_fusion:
+star_fusion :
 	$(call RUN_MAKE,modules/sv_callers/starFusion.mk)
 
 TARGETS += tophat_fusion
@@ -334,23 +278,52 @@ tophat_fusion :
 TARGETS += manta_rnaseq
 manta_rnaseq :
 	$(call RUN_MAKE,modules/sv_callers/mantaRnaseq.mk)
+	
+TARGETS += integrate_rnaseq
+integrate_rnaseq :
+	$(call RUN_MAKE,modules/sv_callers/integrateRnaseq.mk)
 
+TARGETS += soapfuse
+soapfuse :
+	$(call RUN_MAKE,modules/sv_callers/soapFuse.mk)
+
+TARGETS += mapsplice
+mapsplice :
+	$(call RUN_MAKE,modules/sv_callers/mapsplice.mk)
+
+TARGETS += fusioncatcher
+fusioncatcher :
+	$(call RUN_MAKE,modules/sv_callers/fusioncatcher.mk)
+	
+TARGETS += oncofuse
+oncofuse :
+	$(call RUN_MAKE,modules/sv_callers/oncofuse.mk)
+
+
+#==================================================
+# DNA structural variant callers
+#==================================================	
+
+TARGETS += manta_tumor_normal
+manta_tumor_normal :
+	$(call RUN_MAKE,modules/sv_callers/manta_tumor_normal.mk)
+	
+TARGETS += svaba_tumor_normal
+svaba_tumor_normal :
+	$(call RUN_MAKE,modules/sv_callers/svaba_tumor_normal.mk)
+	
+TARGETS += gridss_tumor_normal
+gridss_tumor_normal :
+	$(call RUN_MAKE,modules/sv_callers/gridss_tumor_normal.mk)
+	
 TARGETS += manta
 manta :
 	$(call RUN_MAKE,modules/sv_callers/manta.mk)
 
-TARGETS += mantaTN
-mantaTN :
-	$(call RUN_MAKE,modules/sv_callers/mantaTN.mk)
-
 TARGETS += brass
 brass :
 	$(call RUN_MAKE,modules/sv_callers/brass.mk)
 
-TARGETS += integrate_rnaseq
-integrate_rnaseq :
-	$(call RUN_MAKE,modules/sv_callers/integrateRnaseq.mk)
-
 TARGETS += integrate
 integrate :
 	$(call RUN_MAKE,modules/sv_callers/integrate.mk)
@@ -364,10 +337,6 @@ TARGETS += chimscan
 chimscan :
 	$(call RUN_MAKE_J,modules/sv_callers/chimerascan.mk,$(NUM_CHIMSCAN_JOBS))
 
-TARGETS += oncofuse
-oncofuse :
-	$(call RUN_MAKE,modules/sv_callers/oncofuse.mk)
-
 TARGETS += lumpy
 lumpy :
 	$(call RUN_MAKE,modules/sv_callers/lumpy.mk)
@@ -380,18 +349,6 @@ TARGETS += nfuse_wgss_wtss
 nfuse_wgss_wtss :
 	$(call RUN_MAKE,modules/sv_callers/nfuseWGSSWTSS.mk)
 
-TARGETS += soapfuse
-soapfuse :
-	$(call RUN_MAKE,modules/sv_callers/soapFuse.mk)
-
-TARGETS += mapsplice
-mapsplice :
-	$(call RUN_MAKE,modules/sv_callers/mapsplice.mk)
-
-TARGETS += fusioncatcher
-fusioncatcher :
-	$(call RUN_MAKE,modules/sv_callers/fusioncatcher.mk)
-
 TARGETS += crest
 crest :
 	$(call RUN_MAKE,modules/sv_callers/crest.mk)
@@ -399,23 +356,65 @@ crest :
 TARGETS += delly
 delly :
 	$(call RUN_MAKE,modules/sv_callers/delly.mk)
-
+	
 
 #==================================================
-# pre-processing
+# BAM tools
 #==================================================
 
-TARGETS += merge_fastq
-merge_fastq : 
-	$(call RUN_MAKE,modules/fastq_tools/mergeFastq.mk)
-
 TARGETS += fix_bam
 fix_bam :
-	$(call RUN_MAKE,modules/bam_tools/fixBam.mk)
+	$(call RUN_MAKE,modules/bam_tools/fix_bam.mk)
 
 TARGETS += fix_rg
 fix_rg :
-	$(call RUN_MAKE,modules/bam_tools/fixRG.mk)
+	$(call RUN_MAKE,modules/bam_tools/fix_rg.mk)
+
+TARGETS += fix_mate
+fix_mate :
+	$(call RUN_MAKE,modules/bam_tools/fix_mate.mk)
+
+TARGETS += merge_bam
+merge_bam :
+	$(call RUN_MAKE,modules/bam_tools/merge_bam.mk)
+	
+TARGETS += process_bam
+process_bam : 
+	$(call RUN_MAKE,modules/bam_tools/processBam.mk)
+	
+TARGETS += getbam_irb_mirror
+getbam_irb_mirror : 
+	$(call RUN_MAKE,modules/bam_tools/get_bam_irb_mirror.mk)
+	
+TARGETS += getbam_data_mirror
+getbam_data_mirror : 
+	$(call RUN_MAKE,modules/bam_tools/get_bam_data_mirror.mk)
+	
+TARGETS += putbam_data_mirror
+putbam_data_mirror : 
+	$(call RUN_MAKE,modules/bam_tools/put_bam_data_mirror.mk)
+	
+
+#==================================================
+# VCF tools
+#==================================================
+
+TARGETS += merge_sv
+merge_sv : 
+	$(call RUN_MAKE,modules/vcf_tools/merge_sv.mk)
+	
+TARGETS += annotate_sv
+annotate_sv : 
+	$(call RUN_MAKE,modules/vcf_tools/annotate_sv.mk)
+	
+
+#==================================================
+# FASTQ tools
+#==================================================
+
+TARGETS += merge_fastq
+merge_fastq : 
+	$(call RUN_MAKE,modules/fastq_tools/mergeFastq.mk)
 
 TARGETS += merge_split_fastq
 merge_split_fastq :
@@ -436,27 +435,23 @@ extract_unmapped_pairs :
 TARGETS += bam_to_fasta
 bam_to_fasta :
 	$(call RUN_MAKE,modules/fastq_tools/bamtoFasta.mk)
-	
-TARGETS += process_bam
-process_bam : 
-	$(call RUN_MAKE,modules/bam_tools/processBam.mk)
-
-TARGETS += merge_bam
-merge_bam :
-	$(call RUN_MAKE,modules/bam_tools/mergeBam.mk)
 
 
 #==================================================
-# quality control
+# QC
 #==================================================
 
 TARGETS += bam_metrics
 bam_metrics :
-	$(call RUN_MAKE,modules/qc/bamMetrics.mk)
+	$(call RUN_MAKE,modules/qc/bam_metrics.mk)
 
 TARGETS += bam_interval_metrics
 bam_interval_metrics :
-	$(call RUN_MAKE,modules/qc/bamIntervalMetrics.mk)
+	$(call RUN_MAKE,modules/qc/bam_interval_metrics.mk)
+
+TARGETS += wgs_metrics
+wgs_metrics :
+	$(call RUN_MAKE,modules/qc/wgs_metrics.mk)
 
 TARGETS += rnaseq_metrics
 rnaseq_metrics :
@@ -484,44 +479,30 @@ bam_stats :
 
 
 #==================================================
-# rna sequencing
+# RNA sequencing
 #==================================================
 
-TARGETS += cufflinks
-cufflinks : 
-	$(call RUN_MAKE,modules/rnaseq/cufflinks.mk)
-
 TARGETS += sum_reads
 sum_reads :
-	$(call RUN_MAKE,modules/rnaseq/sumRNASeqReads.mk)
+	$(call RUN_MAKE,modules/rnaseq/sumreads.mk)
 
-TARGETS += exon_counts
-exon_counts :
-	$(call RUN_MAKE,modules/rnaseq/dexseq.mk)
+TARGETS += kallisto
+kallisto :
+	$(call RUN_MAKE,modules/rnaseq/kallisto.mk)
 	
-
-#==================================================
-# chip sequencing
-#==================================================
+TARGETS += immune_deconv
+immune_deconv :
+	$(call RUN_MAKE,modules/rnaseq/immunedeconv.mk)
 	
-TARGETS += macs2TN
-macs2TN:
-	$(call RUN_MAKE,modules/variant_callers/somatic/macs2TN.mk)
-
 
 #==================================================
-# ploidy
+# Ploidy / Clonality
 #==================================================
 
 TARGETS += pyloh
 pyloh :
 	$(call RUN_MAKE,modules/ploidy/pyloh.mk)
 
-
-#==================================================
-# clonality
-#==================================================
-
 TARGETS += clonehd
 clonehd :
 	$(call RUN_MAKE,modules/clonality/clonehd.mk)
@@ -530,31 +511,34 @@ TARGETS += absolute_seq
 absolute_seq :
 	$(call RUN_MAKE,modules/clonality/absoluteSeq.mk)
 	
-TARGETS += ms_pyclone
-ms_pyclone :
-	$(call RUN_MAKE,modules/test/workflows/mspyclone.mk)
-	
-TARGETS += ss_pyclone
-ss_pyclone :
-	$(call RUN_MAKE,modules/test/workflows/pyclone.mk)
+TARGETS += pyclone_13
+pyclone_13 :
+	$(call RUN_MAKE,modules/clonality/pyclone_13.mk)
 	
+TARGETS += pyclone_vi
+pyclone_vi :
+	$(call RUN_MAKE,modules/clonality/pyclone_vi.mk)
 
 #==================================================
 # mutational signatures
 #==================================================
 
-TARGETS += emu
-emu :
-	$(call RUN_MAKE,modules/signatures/emu.mk)
-	
-TARGETS += mut_sig
-mut_sig :
-	$(call RUN_MAKE,modules/signatures/mut_sig.mk)
-	
 TARGETS += deconstruct_sigs
 deconstruct_sigs :
 	$(call RUN_MAKE,modules/signatures/deconstruct_sigs.mk)
 
+TARGETS += sv_signature
+sv_signature :
+	$(call RUN_MAKE,modules/signatures/sv_signature.mk)
+	
+TARGETS += star_fish
+star_fish :
+	$(call RUN_MAKE,modules/signatures/star_fish.mk)
+	
+TARGETS += hr_detect
+hr_detect :
+	$(call RUN_MAKE,modules/signatures/hr_detect.mk)
+
 
 #==================================================
 # miscellaneous
@@ -573,29 +557,16 @@ virus_detection_bowtie2 :
 	$(call RUN_MAKE,modules/virus/virus_detection_bowtie2.mk)
 	
 TARGETS += viral_detection
-viral_detection:
+viral_detection :
 	$(call RUN_MAKE,modules/test/workflows/viral_detection.mk)
 	
 TARGETS += krona_classify
 krona_classify :
 	$(call RUN_MAKE,modules/virus/krona_classify.mk)
 	
-TARGETS += fetch_impact
-fetch_impact :
-	$(call RUN_MAKE,modules/test/workflows/fetchimpact.mk)
-
-
-#==================================================
-# phylogeny
-#==================================================
-
-TARGETS += medicc
-medicc :
-	$(call RUN_MAKE,modules/test/workflows/medicc.mk)
-	
-TARGETS += pratchet
-pratchet :
-	$(call RUN_MAKE,modules/test/workflows/pratchet.mk)
+TARGETS += medicc2
+medicc2 :
+	$(call RUN_MAKE,modules/copy_number/medicc2.mk)
 
 
 #==================================================
@@ -614,9 +585,9 @@ TARGETS += mutation_summary
 mutation_summary :
 	$(call RUN_MAKE,modules/summary/mutationsummary.mk)
 	
-TARGETS += cravat_summary
-cravat_summary :
-	$(call RUN_MAKE,modules/summary/cravat_summary.mk)
+TARGETS += delmh_summary
+delmh_summary :
+	$(call RUN_MAKE,modules/summary/delmh_summary.mk)
 
 
 #==================================================
@@ -624,24 +595,28 @@ cravat_summary :
 #==================================================
 
 TARGETS += ann_ext_vcf
-ann_ext_vcf: 
+ann_ext_vcf : 
 	$(call RUN_MAKE,modules/vcf_tools/annotateExtVcf.mk)
 
 TARGETS += ann_somatic_vcf
-ann_somatic_vcf: 
+ann_somatic_vcf : 
 	$(call RUN_MAKE,modules/vcf_tools/annotateSomaticVcf.mk)
 
 TARGETS += ann_vcf
-ann_vcf: 
+ann_vcf : 
 	$(call RUN_MAKE,modules/vcf_tools/annotateVcf.mk)
 	
-TARGETS += cravat_annotation
-cravat_annotation :
-	$(call RUN_MAKE,modules/test/workflows/cravat_annotation.mk)
-	
 TARGETS += cravat_annotate
 cravat_annotate :
 	$(call RUN_MAKE,modules/vcf_tools/cravat_annotation.mk)
+	
+TARGETS += cravat_summary
+cravat_summary :
+	$(call RUN_MAKE,modules/summary/cravat_summary.mk)
+	
+TARGETS += ann_summary_vcf
+ann_summary_vcf : 
+	$(call RUN_MAKE,modules/vcf_tools/annotateSummaryVcf.mk)
 
 
 #==================================================
@@ -649,13 +624,9 @@ cravat_annotate :
 #==================================================
 
 TARGETS += hotspot_summary
-hotspot_summary:
+hotspot_summary :
 	$(MAKE) -f modules/variant_callers/genotypehotspots.mk -j $(NUM_JOBS)
 	$(call RUN_MAKE,modules/summary/hotspotsummary.mk)
 	
-#==================================================
-# alpha testing
-#==================================================
-
-
+	
 .PHONY : $(TARGETS)
diff --git a/Makefile.inc b/Makefile.inc
index 41f26d88..2a63beec 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -99,7 +99,13 @@ CREATE_SEQ_DICT = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CreateSequenceDic
 CALC_HS_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CalculateHsMetrics.jar $(PICARD_OPTS)
 COLLECT_MULT_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CollectMultipleMetrics.jar $(PICARD_OPTS)
 COLLECT_TARGETED_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CollectTargetedPcrMetrics.jar $(PICARD_OPTS)
-
+COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryMetrics $(PICARD_OPTS)
+COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICARD_OPTS)
+COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OPTS)
+COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICARD_OPTS)
+COLLECT_WGS_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectWgsMetrics $(PICARD_OPTS)
+COLLECT_DUP_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectDuplicateMetrics $(PICARD_OPTS)
+BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICARD_OPTS)
 FIX_MATE = $(call FIX_MATE_MEM,$(PICARD_MEM))
 FIX_MATE_MEM = $(JAVA) -Xmx$(1) -jar $(PICARD_DIR)/FixMateInformation.jar $(PICARD_OPTS) TMP_DIR=$(TMPDIR)
 SAM_TO_FASTQ = $(call SAM_TO_FASTQ_MEM,$(PICARD_MEM))
@@ -120,7 +126,7 @@ SNP_EFF_MEM = $(JAVA8) -Xmx$1 -jar $(SNP_EFF_JAR)
 SNP_SIFT_MEM = $(JAVA8) -Xmx$1 -jar $(SNP_SIFT_JAR)
 SNP_SIFT = $(call SNP_SIFT_MEM,$(DEFAULT_JAVA_MEM))
 VCF_EFF_ONE_PER_LINE = $(HOME)/share/usr/snpEff-4.1/scripts/vcfEffOnePerLine.pl
-VCF_JOIN_EFF = modules/scripts/joinEff.pl
+VCF_JOIN_EFF = modules/scripts/join_eff.pl
 
 COUNT_SAMPLES = $(shell expr `sed 's/\..*//; s:.*/::' <<< $1 | grep -o "_" | wc -l` + 1)
 
diff --git a/README.md b/README.md
index df602c2f..aa552e97 100644
--- a/README.md
+++ b/README.md
@@ -1,61 +1,2 @@
-# jrflab modules
-[![Build Status](https://travis-ci.org/cBioPortal/cbioportal.svg?branch=master)](https://travis-ci.org/jrflab/modules)
+# modules
 
-## Introduction
-This is the implementation of the jrflab pipeline.
-
-## Installation
-The easiest way to download this pipeline is to clone the repository.
-
-```
-git clone https://github.com/jrflab/modules.git
-```
-
-## Dependencies
-- An instance of [anaconda](https://www.anaconda.com) or [miniconda](https://conda.io/en/latest/miniconda.html)
-- IMB's Platform Load Sharing Facility (LSF) or Oracle's Sun Grid Engine (SGE) for resource management
-
-### Following R Packages
-- [xxx](https://)
-
-## Best practices
-	
-### Conventions
-- Sample names cannot have "/" or "." in them
-- Fastq files end in ".fastq.gz"
-- Fastq files are stored in DATA_DIR (Set as Environment Variable) 
-
-### Whole genome, whole exome and targeted sequencing
-- QC
-- BWA
-- Broad Standard Practices on bwa bam  
-- Haplotype Caller, Platypus, MuTect, Strelka
-- snpEff, Annovar, SIFT, pph2, vcf2maf, VEP, OncoKB, ClinVar
-- Copy number, tumor purity using Facets
-- Contamination using 
-- HLA Typing
-	* [xxx](http://)
-
-### RNA transcriptome sequencing
-- QC
-- Tophat, STAR
-- Cufflinks (ENS and UCSC)
-- In-house Exon Expression (ENS and UCSC)
-- fusion-catcher, tophat-fusion, deFuse
-- OncoFuse actionable fusion classification
-
-### Patient:
-- Genotyping On Patient. 
-	1000g sites are evaluated for every library and then compared (all vs all)
-	If two libraries come from a patient the match should be pretty good >80%
-- Still to develop:
-	If the match is below a certain threshold, break the pipeline for patient
-
-## Detailed usage
-[wiki](https://github.com/jrflab/modules/wiki)
-
-## Known issues
-
-### Known bugs
-
-### Currently under development
diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk
index 6cf97dc7..c1e32194 100644
--- a/aligners/bwamemAligner.mk
+++ b/aligners/bwamemAligner.mk
@@ -21,12 +21,6 @@ BWAMEM_REF_FASTA ?= $(REF_FASTA)
 BWAMEM_THREADS = 8
 BWAMEM_MEM_PER_THREAD = $(if $(findstring true,$(PDX)),4G,2G)
 
-..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt )
-.SECONDARY:
-.DELETE_ON_ERROR: 
-.PHONY: bwamem
-
-
 BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam)
 
 bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS))
@@ -50,6 +44,12 @@ bwamem/bam/%.bwamem.bam : fastq/%.fastq.gz
 
 fastq/%.fastq.gz : fastq/%.fastq
 	$(call RUN,,"gzip -c $< > $(@) && $(RM) $<")
+	
+
+..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt )
+.SECONDARY:
+.DELETE_ON_ERROR: 
+.PHONY: bwamem
 
 include modules/bam_tools/processBam.mk
 include modules/fastq_tools/fastq.mk
diff --git a/aligners/tmapAligner.mk b/aligners/tmapAligner.mk
index 6e4d415e..42d1493a 100644
--- a/aligners/tmapAligner.mk
+++ b/aligners/tmapAligner.mk
@@ -5,6 +5,7 @@ include modules/aligners/align.inc
 ALIGNER := tmap
 LOGDIR := log/tmap.$(NOW)
 
+
 SAMTOOLS_SORT_MEM = 2000000000
 
 FASTQ_CHUNKS := 10
diff --git a/bam_tools/fixBam.mk b/bam_tools/fixBam.mk
deleted file mode 100644
index 28e39a7b..00000000
--- a/bam_tools/fixBam.mk
+++ /dev/null
@@ -1,72 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/fix_bam.$(NOW)
-PHONY += fixed_bam
-
-VPATH = fixed_bam unprocessed_bam
-PICARD_JAR = ~/share/usr/picard/bin/picard.jar
-
-fix_bam : $(foreach sample,$(SAMPLES),fixed_bam/$(sample).bam)
-
-define fix-bam
-unprocessed_bam/%.ubam : unprocessed_bam/%.bam
-	$$(call RUN,-c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) RevertSam \
-											   I=$$(<) \
-											   O=unprocessed_bam/$$(*).ubam \
-											   SANITIZE=true \
-											   MAX_DISCARD_FRACTION=0.005 \
-											   ATTRIBUTE_TO_CLEAR=XT \
-											   ATTRIBUTE_TO_CLEAR=XN \
-											   ATTRIBUTE_TO_CLEAR=AS \
-											   ATTRIBUTE_TO_CLEAR=OC \
-											   ATTRIBUTE_TO_CLEAR=OP \
-											   SORT_ORDER=queryname \
-											   RESTORE_ORIGINAL_QUALITIES=true \
-											   REMOVE_DUPLICATE_INFORMATION=true \
-											   REMOVE_ALIGNMENT_INFORMATION=true \
-											   TMP_DIR=$(TMPDIR)")
-unprocessed_bam/%.fixed.bam : unprocessed_bam/%.ubam
-	$$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \
-												R=$$(DMP_FASTA) \
-												UNMAPPED_BAM=$$(<) \
-												ALIGNED_BAM=unprocessed_bam/$$(*).bam \
-												O=unprocessed_bam/$$(*).fixed.bam \
-												CREATE_INDEX=true \
-												ADD_MATE_CIGAR=true \
-												CLIP_ADAPTERS=true \
-												CLIP_OVERLAPPING_READS=true \
-												INCLUDE_SECONDARY_ALIGNMENTS=false \
-												MAX_INSERTIONS_OR_DELETIONS=-1 \
-												TMP_DIR=$(TMPDIR)")
-unprocessed_bam/%.dedup.bam : unprocessed_bam/%.fixed.bam
-	$$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MarkDuplicates \
-												I=$$(<) \
-												O=unprocessed_bam/$$(*).dedup.bam \
-												M=unprocessed_bam/$$(*).txt \
-												TMP_DIR=$$(TMPDIR)")
-fixed_bam/%.bam : unprocessed_bam/%.dedup.bam
-	$$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) AddOrReplaceReadGroups \
-												I=$$(<) \
-												O=fixed_bam/$$(*).bam \
-												RGID=$$(*) \
-												RGLB=$$(*) \
-												RGPL=illumina \
-												RGPU=NA \
-												RGSM=$$(*) \
-												TMP_DIR=$(TMPDIR) && \
-												samtools index fixed_bam/$$(*).bam && \
-												cp fixed_bam/$$(*).bam.bai fixed_bam/$$(*).bai && \
-												rm -rf unprocessed_bam/$$(*).ubam && \
-												rm -rf unprocessed_bam/$$(*).fixed.bam && \
-												rm -rf unprocessed_bam/$$(*).dedup.bam && \
-												rm -rf unprocessed_bam/$$(*).fixed.bai && \
-												rm -rf unprocessed_bam/$$(*).dedup.bai && \
-												rm -rf unprocessed_bam/$$(*).txt")
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call fix-bam,$(sample))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/bam_tools/fix_bam.mk b/bam_tools/fix_bam.mk
new file mode 100644
index 00000000..3e6ce446
--- /dev/null
+++ b/bam_tools/fix_bam.mk
@@ -0,0 +1,75 @@
+include modules/Makefile.inc
+include modules/genome_inc/b37.inc
+
+LOGDIR = log/fix_bam.$(NOW)
+
+PICARD_JAR = ~/share/usr/picard/bin/picard.jar
+
+fix_bam : $(foreach sample,$(SAMPLES),fixed_bam/$(sample).bam)
+
+define fix-bam
+unprocessed_bam/$1.ubam : unprocessed_bam/$1.bam
+	$$(call RUN,-c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) RevertSam \
+						       I=$$(<) \
+						       O=$$(@) \
+						       SANITIZE=true \
+						       MAX_DISCARD_FRACTION=0.005 \
+						       ATTRIBUTE_TO_CLEAR=XT \
+						       ATTRIBUTE_TO_CLEAR=XN \
+						       ATTRIBUTE_TO_CLEAR=AS \
+						       ATTRIBUTE_TO_CLEAR=OC \
+						       ATTRIBUTE_TO_CLEAR=OP \
+						       SORT_ORDER=queryname \
+						       RESTORE_ORIGINAL_QUALITIES=true \
+						       REMOVE_DUPLICATE_INFORMATION=true \
+						       REMOVE_ALIGNMENT_INFORMATION=true \
+						       TMP_DIR=$(TMPDIR)")
+
+unprocessed_bam/$1.fixed.bam : unprocessed_bam/$1.bam unprocessed_bam/$1.ubam
+	$$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \
+							R=$$(DMP_FASTA) \
+							ALIGNED_BAM=$$(<) \
+							UNMAPPED_BAM=$$(<<) \
+							O=$$(@) \
+							CREATE_INDEX=true \
+							ADD_MATE_CIGAR=true \
+							CLIP_ADAPTERS=true \
+							CLIP_OVERLAPPING_READS=true \
+							INCLUDE_SECONDARY_ALIGNMENTS=false \
+							MAX_INSERTIONS_OR_DELETIONS=-1 \
+							TMP_DIR=$(TMPDIR)")
+
+unprocessed_bam/$1.dedup.bam : unprocessed_bam/$1.fixed.bam
+	$$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MarkDuplicates \
+							I=$$(<) \
+							O=$$(@) \
+							M=unprocessed_bam/$1.txt \
+							TMP_DIR=$$(TMPDIR)")
+
+fixed_bam/$1.bam : unprocessed_bam/$1.dedup.bam
+	$$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) AddOrReplaceReadGroups \
+							I=$$(<) \
+							O=$$(@) \
+							RGID=$1 \
+							RGLB=$1 \
+							RGPL=illumina \
+							RGPU=NA \
+							RGSM=$1 \
+							TMP_DIR=$(TMPDIR) && \
+							samtools index $$(@) && \
+							cp fixed_bam/$1.bam.bai fixed_bam/$1.bai && \
+							rm -rf unprocessed_bam/$1.ubam && \
+							rm -rf unprocessed_bam/$1.fixed.bam && \
+							rm -rf unprocessed_bam/$1.dedup.bam && \
+							rm -rf unprocessed_bam/$1.fixed.bai && \
+							rm -rf unprocessed_bam/$1.dedup.bai && \
+							rm -rf unprocessed_bam/$1.txt")
+endef
+ $(foreach sample,$(SAMPLES),\
+		$(eval $(call fix-bam,$(sample))))
+
+..DUMMY := $(shell mkdir -p version; \
+             echo "picard" > version/fix_bam.txt)
+.SECONDARY: 
+.DELETE_ON_ERROR:
+.PHONY: fix_bam
diff --git a/bam_tools/fixMate.mk b/bam_tools/fix_mate.mk
similarity index 100%
rename from bam_tools/fixMate.mk
rename to bam_tools/fix_mate.mk
diff --git a/bam_tools/fixRG.mk b/bam_tools/fix_rg.mk
similarity index 91%
rename from bam_tools/fixRG.mk
rename to bam_tools/fix_rg.mk
index d957c9ad..0e7735f8 100644
--- a/bam_tools/fixRG.mk
+++ b/bam_tools/fix_rg.mk
@@ -2,13 +2,13 @@ include modules/Makefile.inc
 include modules/variant_callers/gatk.inc
 include modules/aligners/align.inc
 
-LOGDIR ?= log/fixRG.$(NOW)
+LOGDIR ?= log/fix_rg.$(NOW)
 
 BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam)
+
 fixed_bams : $(BAMS) $(addsuffix .bai,$(BAMS))
 
 bam/%.bam : unprocessed_bam/%.rg.bam
 	$(INIT) ln -f $(<) $(@)
 
-
 include modules/bam_tools/processBam.mk
diff --git a/bam_tools/get_bam_data_mirror.mk b/bam_tools/get_bam_data_mirror.mk
new file mode 100644
index 00000000..1f616d50
--- /dev/null
+++ b/bam_tools/get_bam_data_mirror.mk
@@ -0,0 +1,36 @@
+include modules/Makefile.inc
+
+LOGDIR = log/getbam_data_mirror.$(NOW)
+
+get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \
+	  $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \
+	  $(foreach sample,$(SAMPLES),bam/$(sample).bai)
+	  
+PROJECT_NAME = $(shell basename $(PWD))
+
+define get-bam
+bam/$1.bam :
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam \
+					  bam/")
+					  
+bam/$1.bam.bai :
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai \
+					  bam/")
+					  
+bam/$1.bai :
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai \
+					  bam/")
+
+
+endef
+ $(foreach sample,$(SAMPLES),\
+		$(eval $(call get-bam,$(sample))))
+
+..DUMMY := $(shell mkdir -p version; \
+             which scp > version/getbam_data_mirror.txt)
+.SECONDARY: 
+.DELETE_ON_ERROR:
+.PHONY: get_bam
\ No newline at end of file
diff --git a/bam_tools/get_bam_irb_mirror.mk b/bam_tools/get_bam_irb_mirror.mk
new file mode 100644
index 00000000..02a78b4b
--- /dev/null
+++ b/bam_tools/get_bam_irb_mirror.mk
@@ -0,0 +1,32 @@
+include modules/Makefile.inc
+
+LOGDIR = log/getbam_irb_mirror.$(NOW)
+
+get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \
+	  $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \
+	  $(foreach sample,$(SAMPLES),bam/$(sample).bai)
+
+define get-bam
+bam/$1.bam :
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  scp $(USER)@juno-xfer01.mskcc.org:/juno/dmp/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \
+					  bam/")
+					  
+bam/$1.bam.bai : bam/$1.bam
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  $(SAMTOOLS) index $$(<)")
+					  
+bam/$1.bai : bam/$1.bam bam/$1.bam.bai
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  cp $$(<<) $$(@)")
+
+
+endef
+ $(foreach sample,$(SAMPLES),\
+		$(eval $(call get-bam,$(sample))))
+
+..DUMMY := $(shell mkdir -p version; \
+             which scp > version/getbam_irb_mirror.txt)
+.SECONDARY: 
+.DELETE_ON_ERROR:
+.PHONY: get_bam
\ No newline at end of file
diff --git a/bam_tools/mergeBam.mk b/bam_tools/merge_bam.mk
similarity index 100%
rename from bam_tools/mergeBam.mk
rename to bam_tools/merge_bam.mk
index bfaeedb8..68eda7b0 100644
--- a/bam_tools/mergeBam.mk
+++ b/bam_tools/merge_bam.mk
@@ -2,10 +2,6 @@ include modules/Makefile.inc
 
 LOGDIR = log/merge.$(NOW)
 
-.SECONDARY:
-.DELETE_ON_ERROR: 
-.PHONY : merged_bam
-
 merged_bam : $(foreach sample,$(MERGE_SAMPLES),bam/$(sample).bam bam/$(sample).bam.bai)
 
 define merged-bam
@@ -32,4 +28,8 @@ $(foreach sample,$(MERGE_SAMPLES),\
 bam/%.bam : merged_bam/%.rg.bam
 	$(INIT) ln -f $< $@
 
+.SECONDARY:
+.DELETE_ON_ERROR: 
+.PHONY : merged_bam
+
 include modules/bam_tools/processBam.mk
diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk
new file mode 100644
index 00000000..8bc19bdc
--- /dev/null
+++ b/bam_tools/put_bam_data_mirror.mk
@@ -0,0 +1,25 @@
+include modules/Makefile.inc
+
+LOGDIR = log/putbam_data_mirror.$(NOW)
+
+put_bam : $(foreach sample,$(SAMPLES),bam/$(sample).taskcomplete)
+	  
+PROJECT_NAME = $(shell basename $(PWD))
+
+define put-bam
+bam/$1.taskcomplete : bam/$1.bam
+	$$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \
+					  rsync -aP -e ssh bam/$1.bam $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam && \
+					  rsync -aP -e ssh bam/$1.bam.bai $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \
+					  rsync -aP -e ssh bam/$1.bam.bai $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai && \
+					  echo 'finished!' > $$(@)")
+					  
+endef
+ $(foreach sample,$(SAMPLES),\
+		$(eval $(call put-bam,$(sample))))
+
+..DUMMY := $(shell mkdir -p version; \
+             which scp > version/putbam_data_mirror.txt)
+.SECONDARY: 
+.DELETE_ON_ERROR:
+.PHONY: put_bam
\ No newline at end of file
diff --git a/clonality/plotpyclone.R b/clonality/plotpyclone.R
deleted file mode 100644
index 1648e82c..00000000
--- a/clonality/plotpyclone.R
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-suppressPackageStartupMessages(library("ggplot2"))
-
-optList = list(
-			   make_option("--sample_set", default = NULL, help = "sample set name"),
-			   make_option("--normal_samples", default = NULL, help = "normal sample names"),
-			   make_option("--min_depth", default = NA, help = "minimum depth to consider")
-			   )
-			   
-parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))
-normal_sample = tumor_samples[tumor_samples %in% normal_sample]
-tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)]
-min_depth = ifelse(is.na(opt$min_depth) | is.null(opt$min_depth) | opt$min_depth=="" | opt$min_depth==" ", 50, opt$min_depth)
-
-mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv")) %>%
-				   mutate(mutation_id = paste0(Gene_Symbol, "_", HGVSp))
-index = apply(mutation_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=min_depth)})==length(tumor_samples)
-mutation_summary = mutation_summary[index,,drop=FALSE]
-pyclone_summary = read_tsv(file=paste0("pyclone/", opt$sample_set, "/report/pyclone.tsv"), col_types = cols(.default = col_character())) %>%
-				  type_convert() %>%
-				  full_join(mutation_summary, by="mutation_id") %>%
-				  arrange(cluster_id) %>%
-				  mutate(mutation_type = ifelse(Variant_Caller=="mutect", "SNV", "Indel")) %>%
-				  mutate(nref = nchar(Ref)) %>%
-				  mutate(nalt = nchar(Alt)) %>%
-				  filter(nref<=2 & nalt<=2)
-
-df = pyclone_summary[,c("mutation_id", "cluster_id", "mutation_type"),drop=FALSE]
-for (i in 1:length(tumor_samples)) {
-	x = pyclone_summary[,tumor_samples[i]] %>%
-		.[[1]]
-	c_x = pyclone_summary %>%
-		  .[[paste0("CALL_", tumor_samples[i])]]
-	m_x = pyclone_summary %>%
-		  .[[paste0("MAF_", tumor_samples[i])]]		  
-	x[x<.025 | c_x==0 | m_x<.05] = 0
-	df = cbind(df, x)
-	colnames(df)[i+3] = tumor_samples[i]
-}
-index = apply(df[,tumor_samples], 1, function(x) {sum(x==0)})==length(tumor_samples)
-df = df[!index,,drop=FALSE]
-pyclone_summary = pyclone_summary[!index,,drop=FALSE]
-index = apply(pyclone_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=500)})>=1
-df = df[!index,,drop=FALSE]
-pyclone_summary = pyclone_summary[!index,,drop=FALSE]
-
-
-pyclone_summary[,tumor_samples] = df[,tumor_samples]
-
-
-clusters = table(pyclone_summary$cluster_id)
-if (any(clusters==1)) {
-	pyclone_summary = pyclone_summary %>%
-					  filter(!(cluster_id %in% names(clusters)[clusters==1]))
-}
-
-df = pyclone_summary[,c("mutation_id", "cluster_id", "mutation_type"),drop=FALSE]
-for (i in 1:length(tumor_samples)) {
-	x = pyclone_summary[,tumor_samples[i]] %>%
-		.[[1]]
-	df = cbind(df, x)
-	colnames(df)[i+3] = tumor_samples[i]
-}
-
-
-pdf(file=paste0("pyclone/", opt$sample_set, "/report/pyclone.pdf"), width=6.5, height=6)
-for (i in 1:(length(tumor_samples)-1)) {
-	for (j in (i+1):length(tumor_samples)) {
-		x = df[,tumor_samples[i]]
-		y = df[,tumor_samples[j]]
-		z1 = df[,"cluster_id"]
-		z2 = df[,"mutation_type"]
-		tmp.0 = data_frame(x=x, y=y, z1=factor(z1, ordered=TRUE), z2=z2)
-		plot.0 =  ggplot(tmp.0, aes(x=x, y=y, fill=z1, color=z1, shape=z2)) +
-				  geom_point(alpha = .55, size=2.5) +
-				  theme_classic() +
-				  coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
-				  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-				  labs(x=paste0("\n",tumor_samples[i],"\n"), y=paste0("\n",tumor_samples[j],"\n")) +
-				  guides(color=guide_legend(title=c("Cluster")), shape=guide_legend(title=c("Type"))) +
-				  guides(fill=FALSE)
-		print(plot.0)
-	}
-}
-dev.off()
-
-write_tsv(pyclone_summary, path=paste0("pyclone/", opt$sample_set, "/report/summary.tsv"))
diff --git a/clonality/plotpyclone.mk b/clonality/plotpyclone.mk
deleted file mode 100644
index 20abed4c..00000000
--- a/clonality/plotpyclone.mk
+++ /dev/null
@@ -1,15 +0,0 @@
-include modules/Makefile.inc
-include modules/clonality/setuppyclone.mk
-
-LOGDIR ?= log/plot_pyclone.$(NOW)
-PHONY += pyclone
-
-plot_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/report/pyclone.pdf)
-
-define plot-pyclone
-pyclone/%/report/pyclone.pdf : pyclone/%/report/pyclone.tsv
-	$$(call RUN,-s 4G -m 6G -w 7200,"$(RSCRIPT) modules/clonality/plotpyclone.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES) --min_depth $(MIN_DEPTH)")
-
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call plot-pyclone,$(set))))
diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk
new file mode 100644
index 00000000..7df04299
--- /dev/null
+++ b/clonality/pyclone_13.mk
@@ -0,0 +1,134 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/pyclone_13.$(NOW)
+
+SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
+SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000'
+
+MCMC_ITER = 10000
+MCMC_BURNIN = 2000
+MCMC_THIN = 1
+
+pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \
+	  $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \
+	  $(foreach set,$(SAMPLE_SETS), \
+	  		$(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_clusters.txt) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_loci.txt) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/scatter_by_sample.pdf) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/heatmap_by_sample.pdf)
+
+
+define r-sufam
+pyclone_13/$1/$1.vcf : summary/tsv/all.tsv
+	$$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \
+					 $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+					 --option 1 \
+					 --sample_set '$(set.$1)' \
+					 --normal_sample '$(normal.$1)' \
+					 --input_file $$(<) \
+					 --output_file $$(@)")
+					 
+pyclone_13/$1/$1.txt : pyclone_13/$1/$1.vcf bam/$1.bam
+	$$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \
+					 		 sufam \
+							 --sample_name $1 \
+							 $$(SUFAM_OPTS) \
+							 $$(REF_FASTA) \
+							 $$(<) \
+							 $$(<<) \
+							 > $$(@)")
+							 
+endef
+$(foreach sample,$(TUMOR_SAMPLES),\
+		$(eval $(call r-sufam,$(sample))))
+		
+define r-pyclone-input
+pyclone_13/$1/taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt)
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \
+							   --option 1 \
+							   --sample_set $1 \
+							   --normal_sample '$(normal.$1)' && \
+							   echo 'success' > $$(@)")
+							   
+pyclone_13/$1/config.yaml : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt)
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \
+							   --option 2 \
+							   --sample_set $1 \
+							   --normal_sample '$(normal.$1)' \
+							   --output_file $$(@) \
+							   --num_iter $$(MCMC_ITER)")
+							   
+endef
+$(foreach set,$(SAMPLE_SETS),\
+		$(eval $(call r-pyclone-input,$(set))))
+		
+define r-pyclone-build-mutations
+pyclone_13/$1/$2.yaml : pyclone_13/$1/taskcomplete pyclone_13/$1/config.yaml
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \
+							      PyClone build_mutations_file \
+							      --in_file pyclone_13/$1/$2.tsv \
+							      --out_file $$(@) \
+							      --prior total_copy_number")
+							   
+endef
+$(foreach set,$(SAMPLE_SETS),\
+	$(foreach sample,$(tumors.$(set)),\
+		$(eval $(call r-pyclone-build-mutations,$(set),$(sample)))))
+		
+define r-pyclone-run-analysis
+pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/$(sample).yaml) pyclone_13/$1/config.yaml
+	$$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV) -w 72:00:00,"set -o pipefail && \
+									   PyClone run_analysis \
+									   --config_file pyclone_13/$1/config.yaml")
+									   
+pyclone_13/$1/summary/by_clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 pyclone_13/$1/config.yaml
+	$$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \
+							       PyClone build_table \
+							       --config_file $$(<<) \
+							       --out_file $$(@) \
+							       --table_type cluster \
+							       --burnin $$(MCMC_BURNIN) \
+							       --thin $$(MCMC_THIN)")
+							       
+pyclone_13/$1/summary/by_loci.txt : pyclone_13/$1/trace/alpha.tsv.bz2 pyclone_13/$1/config.yaml
+	$$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \
+							       PyClone build_table \
+							       --config_file $$(<<) \
+							       --out_file $$(@) \
+							       --table_type loci \
+							       --burnin $$(MCMC_BURNIN) \
+							       --thin $$(MCMC_THIN)")
+							       
+pyclone_13/$1/summary/scatter_by_sample.pdf : pyclone_13/$1/summary/by_loci.txt pyclone_13/$1/summary/by_clusters.txt
+	$$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \
+							   --option 3 \
+							   --sample_set '$(tumors.$1)' \
+							   --input_file $$(<) \
+							   --output_file $$(@)")
+							   
+pyclone_13/$1/summary/heatmap_by_sample.pdf : pyclone_13/$1/summary/by_loci.txt pyclone_13/$1/summary/by_clusters.txt
+	$$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \
+							   --option 4 \
+							   --sample_set '$(tumors.$1)' \
+							   --input_file $$(<) \
+							   --output_file $$(@)")
+
+							   
+endef
+$(foreach set,$(SAMPLE_SETS),\
+	$(eval $(call r-pyclone-run-analysis,$(set))))
+		
+
+..DUMMY := $(shell mkdir -p version; \
+	     R --version > version/pyclone_13.txt)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: pyclone
diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk
new file mode 100644
index 00000000..746908b7
--- /dev/null
+++ b/clonality/pyclone_vi.mk
@@ -0,0 +1,114 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/pyclone_vi.$(NOW)
+
+SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
+SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000'
+
+pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) \
+	  $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \
+	  $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \
+	  pyclone_vi/summary.txt
+
+
+define r-sufam
+pyclone_vi/$1/$1.vcf : summary/tsv/all.tsv
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \
+							      $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							      --option 1 \
+							      --sample_set '$(set.$1)' \
+							      --normal_sample '$(normal.$1)' \
+							      --input_file $$(<) \
+							      --output_file $$(@)")
+					 
+pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.vcf bam/$1.bam
+	$$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \
+					 		 sufam \
+							 --sample_name $1 \
+							 $$(SUFAM_OPTS) \
+							 $$(REF_FASTA) \
+							 $$(<) \
+							 $$(<<) \
+							 > $$(@)")
+							 
+pyclone_vi/$1/$1.maf : pyclone_vi/$1/$1.vcf
+	$$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \
+							$$(VCF2MAF) \
+							--input-vcf $$< \
+							--tumor-id $1 \
+							--filter-vcf $$(EXAC_NONTCGA) \
+							--ref-fasta $$(REF_FASTA) \
+							--vep-path $$(VEP_PATH) \
+							--vep-data $$(VEP_DATA) \
+							--tmp-dir `mktemp -d` \
+							--output-maf $$(@)")
+
+endef
+$(foreach sample,$(TUMOR_SAMPLES),\
+		$(eval $(call r-sufam,$(sample))))
+		
+define r-pyclone
+pyclone_vi/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt)
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \
+							   --option 1 \
+							   --sample_set $1 \
+							   --normal_sample '$(normal.$1)' \
+							   --output_file $$(@)")
+							   
+pyclone_vi/$1/$1.hd5 : pyclone_vi/$1/$1.tsv
+	$$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \
+							   		 pyclone-vi fit \
+									 --in-file $$(<) \
+									 --out-file $$(@) \
+									 --num-clusters 10 \
+									 --density beta-binomial \
+									 --num-grid-points 100 \
+									 --max-iters 1000000 \
+									 --mix-weight-prior 1 \
+									 --precision 500 \
+									 --num-restarts 100")
+									 
+pyclone_vi/$1/summary/by_loci.txt : pyclone_vi/$1/$1.hd5
+	$$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   pyclone-vi write-results-file \
+							   --in-file $$(<) \
+							   --out-file $$(@)")
+							     
+pyclone_vi/$1/summary/scatter_by_sample.pdf : pyclone_vi/$1/summary/by_loci.txt
+	$$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \
+							   --option 2 \
+							   --sample_set '$(tumors.$1)' \
+							   --input_file $$(<) \
+							   --output_file $$(@)")
+							   
+pyclone_vi/$1/summary/heatmap_by_sample.pdf : pyclone_vi/$1/summary/by_loci.txt
+	$$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \
+							   --option 3 \
+							   --sample_set '$(tumors.$1)' \
+							   --input_file $$(<) \
+							   --output_file $$(@)")
+
+endef
+$(foreach set,$(SAMPLE_SETS),\
+		$(eval $(call r-pyclone,$(set))))
+		
+
+pyclone_vi/summary.txt : $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \
+							    $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \
+							    --option 4 \
+							    --sample_set '$(SAMPLE_SETS)'")
+
+		
+..DUMMY := $(shell mkdir -p version; \
+	     R --version > version/pyclone_vi.txt)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: pyclone
diff --git a/clonality/pycloneconfig.R b/clonality/pycloneconfig.R
deleted file mode 100644
index 110c8866..00000000
--- a/clonality/pycloneconfig.R
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-optList = list(make_option("--sample_set", default = NULL, help = "sample set name"),
-			   make_option("--normal_samples", default = NULL, help = "normal sample names"))
-
-parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))
-normal_sample = tumor_samples[tumor_samples %in% normal_sample]
-tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)]
-
-cat("num_iters: 10000\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = FALSE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("base_measure_params:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  alpha: 1\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  beta: 1\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("concentration:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  value: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  prior:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("    shape: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("    rate: 0.001\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("density: pyclone_beta_binomial\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("beta_binomial_precision_params:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  value: 1000\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  prior:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("    shape: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("    rate: 0.0001\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("  proposal:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("    precision: 0.5\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat(paste0("working_dir: pyclone/",opt$sample_set, "\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("trace_dir: trace", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("init_method: connected\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-cat("samples:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-
-for (i in 1:length(tumor_samples)) {
-	if (i!=1) {
-		cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	}
-	cat(paste0("  ", tumor_samples[i], ":\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	cat(paste0("    mutations_file: ", tumor_samples[i], ".yaml\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	cat("    tumour_content:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_sample, ".RData"))
-	cat(paste0("      value: ", ifelse(is.na(purity), 1.0, signif(purity, 2)),"\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	cat("    error_rate: 0.01", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	if (i!=length(tumor_samples)) {
-		cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE)
-	}
-}
-
-for (i in 1:length(tumor_samples)) {
-	system(paste0("source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/PyClone-0.13.1 && PyClone build_mutations_file --in_file pyclone/",  opt$sample_set, "/", tumor_samples[i], ".tsv --out_file pyclone/", opt$sample_set, "/", tumor_samples[i], ".yaml  --prior parental_copy_number"))
-}
diff --git a/clonality/runpyclone.mk b/clonality/runpyclone.mk
deleted file mode 100644
index 0c86ddfa..00000000
--- a/clonality/runpyclone.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/run_pyclone.$(NOW)
-PHONY += pyclone
-
-run_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/report/pyclone.tsv)
-
-define run-pyclone
-pyclone/%/trace/alpha.tsv.bz2 : pyclone/%/config.yaml
-	$$(call RUN,-s 4G -m 6G -w 7200,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \
-							 		 PyClone run_analysis --config_file pyclone/$$*/config.yaml --seed 0")
-
-pyclone/%/report/pyclone.tsv : pyclone/%/trace/alpha.tsv.bz2
-	$$(call RUN,-s 4G -m 6G -w 7200,"make -p pyclone/$$*/report && \
-									 source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \
-							 		 PyClone build_table --config_file pyclone/$$*/config.yaml --out_file pyclone/$$*/report/pyclone.tsv --max_cluster 10 --table_type old_style --burnin 5000")
-							 
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call run-pyclone,$(set))))
diff --git a/clonality/setuppyclone.mk b/clonality/setuppyclone.mk
deleted file mode 100644
index ebd2f8f9..00000000
--- a/clonality/setuppyclone.mk
+++ /dev/null
@@ -1,22 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/setup_pyclone.$(NOW)
-PHONY += pyclone
-
-MIN_DEPTH ?= 50
-
-setup_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/config.yaml)
-
-define make-input-pyclone
-pyclone/%/config.yaml : sufam/%.tsv
-	$$(call RUN, -s 4G -m 6G,"mkdir -p pyclone/$$(*) && \
-							  $(RSCRIPT) modules/clonality/tsvforpyclone.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES) --min_depth $(MIN_DEPTH) && \
-							  $(RSCRIPT) modules/clonality/pycloneconfig.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES)")
-
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call make-input-pyclone,$(set))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/clonality/tsvforpyclone.R b/clonality/tsvforpyclone.R
deleted file mode 100644
index e6bc7be6..00000000
--- a/clonality/tsvforpyclone.R
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-
-optList = list(
-			   make_option("--sample_set", default = NULL, help = "sample set name"),
-			   make_option("--normal_samples", default = NULL, help = "normal sample names"),
-			   make_option("--min_depth", default = NA, help = "minimum depth to consider")
-			   )
-
-parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))
-normal_sample = tumor_samples[tumor_samples %in% normal_sample]
-tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)]
-min_depth = ifelse(is.na(opt$min_depth) | is.null(opt$min_depth) | opt$min_depth=="" | opt$min_depth==" ", 50, opt$min_depth)
-
-mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv"))
-index = apply(mutation_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=min_depth)})==length(tumor_samples)
-mutation_summary = mutation_summary[index,,drop=FALSE]
-index = apply(mutation_summary[,paste0("CALL_", tumor_samples)], 1, function(x) {sum(x==0)})==length(tumor_samples)
-mutation_summary = mutation_summary[!index,,drop=FALSE]
-
-for (i in 1:length(tumor_samples)) {
-	mutation_id = paste0(mutation_summary$Gene_Symbol, "_", mutation_summary$HGVSp)
-	fsq = mutation_summary %>%
-		  .[[paste0("MAF_", tumor_samples[i])]]
-	qt = mutation_summary %>%
-		  .[[paste0("qt_", tumor_samples[i])]]
-	q2 = mutation_summary %>%
-		  .[[paste0("q2_", tumor_samples[i])]]
-	q1 = qt - q2
-	n = mutation_summary %>%
-		.[[paste0("DP_", tumor_samples[i])]]
-	flag = mutation_summary %>%
-		   .[[paste0("CALL_", tumor_samples[i])]]
-	
-	fsq[flag==0] = 0
-	var_counts = round(fsq*n)
-	ref_counts = round((1-fsq)*n)
-	normal_cn = rep(2, length(mutation_id))
-	major_cn = q2
-	minor_cn = q1
-	sample_summary = data.frame(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn)
-	write.table(sample_summary, paste0("pyclone/", opt$sample_set, "/", tumor_samples[i], ".tsv"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE, append=FALSE)
-}
diff --git a/config.inc b/config.inc
index d1c44fcf..38cb6d34 100644
--- a/config.inc
+++ b/config.inc
@@ -14,20 +14,39 @@ R ?= R
 MY_RSCRIPT ?= Rscript
 RSCRIPT ?= Rscript
 
-# General python 2.7 environment
-ANACONDA_27_ENV ?= $(HOME)/share/usr/anaconda-envs/anaconda-2.7
-
-# SUFAM python environment
-SUFAM_ENV ?= $(HOME)/share/usr/anaconda-envs/sufam-dev
-
+ANACONDA_27_ENV = $(HOME)/share/usr/anaconda-envs/anaconda-2.7
+SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
 MUTSIG_REPORT_ENV = $(HOME)/share/usr/anaconda-envs/mutsig-report-0.0.1
+JRFLAB_MODULES_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4
+ONCOTATOR_ENV = $(HOME)/share/usr/venv/oncotator-1.9.2.0
+VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86
+ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat
+INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1
+PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6
+KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2
+IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0
+SUMREADS_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6
+PYCLONE_ENV = $(HOME)/share/usr/env/pyclone-vi-0.1.2
+PYCLONE_13_ENV = $(HOME)/share/usr/env/pyclone-0.13.1
+GRIDSS_ENV = $(HOME)/share/usr/env/gridss-2.13.2
+SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0
+SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7
+ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3
+VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2
+SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0
+CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9
+STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11
+MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1
+VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0
+FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8
+CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat
+POLYSOLVER_ENV = $(HOME)/share/usr/anaconda-envs/hla-polysolver
+MSISENSOR_ENV = $(HOME)/share/usr/anaconda-envs/msisensor
+MIMSI_ENV = $(HOME)/share/usr/env/mimsi-0.4.4
 
 JARDIR ?= $(HOME)/share/usr/lib/java
 
-# jrflab modules environment
-JRFLAB_MODULES_ENV ?= $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4
-
-### Applications
+## Applications
 UNZIP ?= /usr/bin/unzip
 FASTQC ?= $(PERL) $(HOME)/share/usr/FastQC/fastqc
 MUTECT_JAR ?= $(JARDIR)/muTect-1.1.7.jar
@@ -38,31 +57,30 @@ SAMTOOLS2 ?= samtools
 VCFUTILS ?= $(HOME)/share/usr/bin/vcfutils.pl
 BCFTOOLS2 ?= bcftools
 BCFTOOLS ?= bcftools
+PIGZ ?= pigz
 BEDTOOLS ?= $(HOME)/share/usr/bin/bedtools
 BGZIP ?= $(HOME)/share/usr/bin/bgzip
 IGVTOOLS ?= $(HOME)/share/usr/IGVTools/igvtools
 VCFTOOLS ?= $(HOME)/share/usr/bin/vcftools-0.1.10
 VCF_SORT ?= $(PERL) $(HOME)/share/usr/bin/vcfsorter.pl
-
 SNP_EFF_JAR ?= $(JARDIR)/snpEff-4.3.jar
 SNP_SIFT_JAR ?= $(JARDIR)/SnpSift-4.3.jar
 SNP_EFF_CONFIG ?= modules/config/snpEff.conf
 DB_NSFP ?= $(HOME)/share/reference/snpEff-4.1/dbNSFP3.0b1a.hg19.txt.gz
 NSFP_FIELDS ?= Uniprot_acc_Polyphen2 Uniprot_id_Polyphen2 Polyphen2_HVAR_score Polyphen2_HVAR_pred 1000Gp3_AF ESP6500_AA_AF ESP6500_EA_AF MutationAssessor_pred MutationAssessor_score MutationTaster_pred MutationTaster_score PROVEAN_pred ExAC_Adj_AF clinvar_rs clinvar_clnsig Interpro_domain
-
 CUFFLINKS ?= cufflinks
 CUFFCMP ?= cuffcompare
 TOPHAT ?= tophat
 DEFUSE ?= $(PERL) $(HOME)/share/usr/defuse-0.6.1/scripts/defuse.pl
-
 ONCOFUSE_JAR ?= $(HOME)/share/usr/oncofuse-1.0.9b2/Oncofuse.jar
 VARSCAN_JAR ?= $(JARDIR)/VarScan.v2.3.9.jar
+MEDICC ?= medicc2
 
-# PICARD tools
+## PICARD tools
 PICARD_DIR ?= $(JARDIR)/picard-1.92
 PICARD_JAR ?= $(JARDIR)/picard-tools-1.141/picard.jar
 
-# scripts
+## scripts
 SCRIPTS_DIR ?= modules/scripts
 MERGE ?= $(SCRIPTS_DIR)/merge.R
 VCF_TO_TABLE ?= $(SCRIPTS_DIR)/vcfToTable.R
@@ -70,6 +88,9 @@ INTRON_POSN_LOOKUP ?= $(SCRIPTS_DIR)/posnGeneLookup.pl
 RBIND ?= $(SCRIPTS_DIR)/rbind.R
 NORMAL_FILTER ?= $(PERL) $(SCRIPTS_DIR)/normalFilterVCF.pl
 SOMATIC_FILTER_VCF ?= $(PERL) $(SCRIPTS_DIR)/somaticFilterVCF.pl
+SUM_READS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads.R
+SUM_EXONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byexon.R
+SUM_INTRONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byintron.R
 
 JAVA_BIN ?=  $(JAVA8_BIN)
 JAVA6_BIN ?= $(HOME)/share/usr/jdk1.6.0_45/bin/java
@@ -78,7 +99,7 @@ JAVA8_BIN ?= $(HOME)/share/usr/jdk1.8.0_121/bin/java
 
 GET_INSERT_SIZE ?= $(HOME)/share/usr/bin/getInsertSize.py
 
-#GATK
+## GATK
 GATK_JAR ?= $(JARDIR)/GenomeAnalysisTK.jar
 GATK_JAR2 ?= $(JARDIR)/GenomeAnalysisTK-3.7.jar
 
@@ -125,9 +146,6 @@ TVC ?= $(HOME)/share/usr/bin/tvc
 
 ANNOVAR = $(PERL) $(HOME)/share/usr/annovar-2017-07-16/table_annovar.pl
 
-ONCOTATOR_ENV = $(HOME)/share/usr/venv/oncotator-1.9.2.0
-
-VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86
 VEP_PATH = $(VEP_ENV)/bin
 
 SPLIT_BED = python modules/scripts/split_bed.py
@@ -137,35 +155,8 @@ SNP_FILTER_VCF = python modules/vcf_tools/snp_filter_vcf.py
 
 MERGE_VCF = python modules/vcf_tools/merge_vcf.py
 
-ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat
-
-MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc
-MEDICC_VAR = $(MEDICC_ENV)/PROFILE
-MEDICC_BIN = $(MEDICC_ENV)/opt/medicc
-
-PHYLO_ENV = $(HOME)/share/usr/anaconda-envs/phylotools
-
-CNTILP_ENV = $(HOME)/share/usr/anaconda-envs/cnt-ilp
-CNTILP_CONF = $(CNTILP_ENV)/PROFILE
-CNTILP_BIN = $(CNTILP_ENV)/opt/CNT-ILP/build
-
-CNTMD_ENV = $(HOME)/share/usr/anaconda-envs/cnt-md
-CNTMD_CONF = $(CNTMD_ENV)/PROFILE
-CNTMD_BIN = $(CNTMD_ENV)/opt/CNT-MD/build
-
-MACHINA_ENV = $(HOME)/share/usr/anaconda-envs/machina
-MACHINA_VAR = $(MACHINA_ENV)/PROFILE
-MACHINA_BIN = $(MACHINA_ENV)/opt/machina/build
-
-HATCHET_ENV = $(HOME)/share/usr/anaconda-envs/hatchet
-HATCHET_VAR = $(HATCHET_ENV)/PROFILE
-HATCHET_BIN = $(HATCHET_ENV)/opt/machina/build
-
-DECONSTRUCTSIGS_ENV = $(HOME)/share/usr/anaconda-envs/deconstructsigs
-
-PHANGORN_ENV = $(HOME)/share/usr/anaconda-envs/phangorn
-
-FGBIO_ENV = $(HOME)/share/usr/anaconda-envs/fgbio-0.8.1
+# gbc command line
+GBC ?= $(HOME)/share/usr/GetBaseCounts/GetBaseCounts
 
 endif
 CONFIG_INC = true
diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R
index ca114d7e..35ee71cc 100644
--- a/contamination/clusterSampleVcf.R
+++ b/contamination/clusterSampleVcf.R
@@ -2,51 +2,88 @@
 
 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("VariantAnnotation"))
-suppressPackageStartupMessages(library("gplots"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("ComplexHeatmap"))
+suppressPackageStartupMessages(library("RColorBrewer"))
 
-options(error = quote(dump.frames("testdump", TRUE)))
-
-optList <- list(
-                make_option("--genome", default = 'b37', help = "genome build [default %default]"),
-                make_option("--outPrefix", default = NULL, help = "output prefix [default %default]"))
-
-parser <- OptionParser(usage = "%prog vcf.files", option_list = optList);
-arguments <- parse_args(parser, positional_arguments = T);
-opt <- arguments$options;
-
-if (is.null(opt$outPrefix)) {
-    cat("Need output prefix\n");
-    print_help(parser);
-    stop();
-} else if (length(arguments$args) < 1) {
-    cat("Need vcf files\n");
-    print_help(parser);
-    stop();
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
 }
 
-vcfFile <- arguments$args[1]
+optList <- list(make_option("--input_file", default = 'snp_vcf/snps_ft.vcf', help = "input file"),
+		make_option("--output_file", default = 'snp_vcf/snps_ft.pdf', help = "output file"),
+		make_option("--sample_pairs", default = NA, type = 'character', help = "sample pairs"),
+		make_option("--genome", default = 'b37', help = "genome build"))
 
+parser <- OptionParser(usage = "%prog vcf.files", option_list = optList)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
 
-vcf <- readVcf(vcfFile, opt$genome)
-gt <- geno(vcf)$GT
-ad <- geno(vcf)$AD
-af <- structure(sapply(ad, function(x) x[2] / sum(x)), dim = dim(ad))
-X <- matrix(0, nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt)))
-X[is.na(af)] <- NA
-X[af > 0.15 & af < 0.95] <- 1
-X[af >= 0.95] <- 2
-X[!gt %in% c("0/0", "0/1", "1/1")] <- NA
-#plot(hclust(dist(t(X), method = 'manhattan')))
+vcf = readVcf(as.character(opt$input_file), as.character(opt$genome))
+gt = geno(vcf)$GT
+ad = geno(vcf)$AD
+af = structure(sapply(ad, function(x) x[2] / sum(x)), dim = dim(ad))
+X = matrix(0, nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt)))
+X[is.na(af)] = NA
+X[af > 0.15 & af < 0.95] = 1
+X[af >= 0.95] = 2
+X[!gt %in% c("0/0", "0/1", "1/1")] = NA
 
-gt <- matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt)))
+gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt)))
+dt = as.matrix(dist(t(gt)))
 
-fn <- paste(opt$outPrefix, ".clust.pdf", sep = '')
-pdf(fn, height = 9, width = 15)
-null <- plot(hclust(dist(t(gt)), method = 'ward'))
+tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] }))
+normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] }))
+sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE),
+			     normal_samples = c(normal_samples, unique(normal_samples))) %>%
+	       dplyr::arrange(tumor_samples) %>%
+	       dplyr::mutate(normal_samples = factor(normal_samples, levels = unique(normal_samples), ordered = TRUE))
+cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]])))
+names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]]))
+		      
+row_annot = rowAnnotation(
+	cluster_id = sample_pairs %>% .[["normal_samples"]],
+	col = list(cluster_id = cluster_color),
+	show_annotation_name = FALSE,
+	simple_anno_size = unit(.5, "cm"),
+	show_legend = FALSE
+)
+col_annot = columnAnnotation(
+	cluster_id = sample_pairs %>% .[["normal_samples"]],
+	col = list(cluster_id = cluster_color),
+	show_annotation_name = FALSE,
+	simple_anno_size = unit(.5, "cm"),
+	show_legend = FALSE
+)
+col_pal = c(rep("#662506", 3),
+	    rev(brewer.pal(n = 7, name = "YlOrBr")),
+	    rep("#fff7bc", 3))
+	       
+pdf(as.character(opt$output_file), height = 21, width = 22)
+draw(Heatmap(matrix = dt,
+	     name = " ",
+	     rect_gp = gpar(col = "white"),
+	     border = NA,
+	     col = col_pal,
+	     cluster_rows = TRUE,
+	     show_row_dend = TRUE,
+	     row_dend_width = unit(3, "cm"),
+	     row_names_side = "right",
+	     row_names_gp = gpar(fontsize = 12),
+	     show_row_names = TRUE,
+	     left_annotation = row_annot,
+	          
+	     show_column_names = TRUE,
+	     column_names_side = "bottom",
+	     column_names_gp = gpar(fontsize = 12),
+	     cluster_columns = TRUE,
+	     show_column_dend = TRUE,
+	     column_dend_height = unit(3, "cm"),
+	     top_annotation = col_annot,
+	     
+	     use_raster = FALSE,
+	     show_heatmap_legend = TRUE,
+	     heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm"))))
 dev.off()
-
-fn <- paste(opt$outPrefix, ".heatmap.pdf", sep = '')
-pdf(fn, height = 30, width = 30)
-null <- heatmap.2(as.matrix(dist(t(gt))), scale = 'none', trace = 'none', keysize = 0.3, cexRow = 2, cexCol = 2, margins = c(20,20))
-dev.off()
-
diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk
index d3f953d5..975898c6 100644
--- a/contamination/clusterSamples.mk
+++ b/contamination/clusterSamples.mk
@@ -1,39 +1,60 @@
-# Run unified genotyper on snp positions and cluster samples using results
-##### DEFAULTS ######
-LOGDIR = log/cluster_samples.$(NOW)
-
-##### MAKE INCLUDES #####
 include modules/Makefile.inc
 include modules/variant_callers/gatk.inc
-VPATH ?= bam
 
-.DELETE_ON_ERROR:
-.SECONDARY: 
-.PHONY : all
+LOGDIR = log/cluster_samples.$(NOW)
 
+VPATH ?= bam
 ifeq ($(EXOME),true)
 DBSNP_SUBSET ?= $(HOME)/share/reference/dbsnp_137_exome.bed
 else
 DBSNP_SUBSET = $(HOME)/share/reference/dbsnp_tseq_intersect.bed
 endif
 
-CLUSTER_VCF = $(RSCRIPT) modules/contamination/clusterSampleVcf.R
+CLUSTER_VCF = modules/contamination/clusterSampleVcf.R
 
-all : snp_vcf/snps_filtered.clust.png
+snp_cluster : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) \
+	      snp_vcf/snps.vcf \
+	      snp_vcf/snps_ft.vcf \
+	      snp_vcf/snps_ft.pdf
 
-#snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),bam/$(sample).bam)
-#$(call RUN,-s 4G -m 8G,"$(SAMTOOLS) mpileup -f $(REF_FASTA) -g -l <(sed '/^#/d' $(DBSNP) | cut -f 1,2) $^ | $(BCFTOOLS) view -g - > $@")
+snp_vcf/%.snps.vcf : bam/%.bam 
+	$(call RUN,-n 4 -s 2.5G -m 3G,"set -o pipefail && \
+				       $(call GATK_MEM,8G) \
+				       -T UnifiedGenotyper \
+				       -rf BadCigar \
+				       -nt 4 \
+				       -R $(REF_FASTA) \
+				       --dbsnp $(DBSNP) \
+				       $(foreach bam,$(filter %.bam,$^),-I $(bam) ) \
+				       -L $(DBSNP_SUBSET) \
+				       -o $@ \
+				       --output_mode EMIT_ALL_SITES")
 
-snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf)
-	$(call RUN,-s 16G -m 20G,"$(call GATK_MEM,14G) -T CombineVariants $(foreach vcf,$^,--variant $(vcf) ) -o $@ --genotypemergeoption UNSORTED -R $(REF_FASTA)")
 
-snp_vcf/snps_filtered.vcf : snp_vcf/snps.vcf
+snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf)
+	$(call RUN,-s 16G -m 20G,"set -o pipefail && \
+				  $(call GATK_MEM,14G) -T CombineVariants \
+				  $(foreach vcf,$^,--variant $(vcf) ) \
+				  -o $@ \
+				  --genotypemergeoption UNSORTED \
+				  -R $(REF_FASTA)")
+
+snp_vcf/snps_ft.vcf : snp_vcf/snps.vcf
 	$(INIT) grep '^#' $< > $@ && grep -e '0/1' -e '1/1' $< >> $@
 
-snp_vcf/%.snps.vcf : bam/%.bam 
-	$(call RUN,-n 4 -s 2.5G -m 3G,"$(call GATK_MEM,8G) -T UnifiedGenotyper -nt 4 -R $(REF_FASTA) --dbsnp $(DBSNP) $(foreach bam,$(filter %.bam,$^),-I $(bam) ) -L $(DBSNP_SUBSET) -o $@ --output_mode EMIT_ALL_SITES")
-
-snp_vcf/%.clust.png : snp_vcf/%.vcf
-	$(INIT) $(CLUSTER_VCF) --outPrefix snp_vcf/$* $<
+snp_vcf/snps_ft.pdf : snp_vcf/snps_ft.vcf
+	$(call RUN,-n 1 -s 16G -m 20G -v $(VARIANT_ANNOTATION_ENV),"set -o pipefail && \
+								    $(RSCRIPT) modules/contamination/clusterSampleVcf.R \
+								    --input_file $(<) \
+								    --output_file $(@) \
+								    --sample_pairs '$(SAMPLE_PAIRS)' \
+								    --genome b37")
+	
+	
+..DUMMY := $(shell mkdir -p version; \
+             echo "GATK" > version/cluster_samples.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY : snp_cluster
 
 include modules/vcf_tools/vcftools.mk
diff --git a/copy_number/annotateFacetsCCF2Vcf.R b/copy_number/annotateFacetsCCF2Vcf.R
index 8285c9a7..dd80a89d 100644
--- a/copy_number/annotateFacetsCCF2Vcf.R
+++ b/copy_number/annotateFacetsCCF2Vcf.R
@@ -118,8 +118,8 @@ if (sum(pass) == 0) {
         alt <- sapply(geno(vcf[pass])$AD[!is.na(ol), tumorSample], function(x) x[2])
         vaf <- alt / (alt + ref)
 
-        ccfFit <- computeCCF(vaf = vaf, tcn, lcn, purity = purity)
-        conf <- confCCF(alt = alt, ref = ref, tcn, lcn, purity = purity,
+        ccfFit <- compute_ccf(vaf = vaf, tcn, lcn, purity = purity)
+        conf <- conf_ccf(alt = alt, ref = ref, tcn, lcn, purity = purity,
                             multiplicity = ccfFit$multiplicity)
         ccfLower <- conf$lower
         ccfUpper <- conf$upper
diff --git a/copy_number/ascat.R b/copy_number/ascat.R
index 24432af1..cacee796 100644
--- a/copy_number/ascat.R
+++ b/copy_number/ascat.R
@@ -10,13 +10,13 @@ if (!interactive()) {
 }
 
 args_list <- list(make_option("--type", default = NA, type = 'character', help = "type of analysis"),
-				  make_option("--file_in", default = NA, type = 'character', help = "input file name"),
-				  make_option("--file_out", default = NA, type = 'character', help = "output file name"),
-				  make_option("--gamma", default = NA, type = 'numeric', help = "gamma parameter in pcf"),
-				  make_option("--nlog2", default = NA, type = 'numeric', help = "number of clusters in Log2 ratio"),
-				  make_option("--nbaf", default = NA, type = 'numeric', help = "number of clusters in BAF"),
-				  make_option("--rho", default = NA, type = 'numeric', help = "purity for ASCAT"),
-				  make_option("--psi", default = NA, type = 'numeric', help = "ploidy for ASCAT"))
+		  make_option("--file_in", default = NA, type = 'character', help = "input file name"),
+		  make_option("--file_out", default = NA, type = 'character', help = "output file name"),
+		  make_option("--gamma", default = NA, type = 'numeric', help = "gamma parameter in pcf"),
+		  make_option("--nlog2", default = NA, type = 'numeric', help = "number of clusters in Log2 ratio"),
+		  make_option("--nbaf", default = NA, type = 'numeric', help = "number of clusters in BAF"),
+		  make_option("--rho", default = NA, type = 'numeric', help = "purity for ASCAT"),
+		  make_option("--psi", default = NA, type = 'numeric', help = "ploidy for ASCAT"))
 				  
 parser <- OptionParser(usage = "%prog", option_list = args_list)
 arguments <- parse_args(parser, positional_arguments = T)
@@ -51,9 +51,9 @@ if (opt$type=="log2") {
 	abline(v=max(CN[,"Position"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0, col="red")
 	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(CN[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
+    	rect(xleft=1-1e10, xright=max(CN[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
 	title(main = gsub(".pdf", "", gsub("ascat/log2/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
+    	box(lwd=1.5)
 	dev.off()
 
 } else if (opt$type=="bafall") {
@@ -83,9 +83,9 @@ if (opt$type=="log2") {
 	abline(v=max(BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0.5, col="red")
 	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
-	title(main = gsub(".pdf", "", gsub("ascat/bafall/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
+    	rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
+	title(main = gsub(".pdf", "", gsub("ascat/baf_all/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
+    	box(lwd=1.5)
 	dev.off()
 
 } else if (opt$type=="bafhet") {
@@ -117,14 +117,14 @@ if (opt$type=="log2") {
 	abline(v=max(BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0.5, col="red")
 	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
-	title(main = gsub(".pdf", "", gsub("ascat/bafhet/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
+    	rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
+	title(main = gsub(".pdf", "", gsub("ascat/baf_het/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
+    	box(lwd=1.5)
 	dev.off()
 
 } else if (opt$type=="aspcf") {
 
-	gamma = ifelse(is.na(as.numeric(opt$gamma)), 70, as.numeric(opt$gamma))
+	gamma = ifelse(is.na(as.numeric(opt$gamma)), 20, as.numeric(opt$gamma))
 	
 	CN_and_BAF = out2$jointseg[,c("chrom", "maploc", "cnlr", "vafT"),drop=FALSE]
 	index = out2$jointseg[,"het"]==1
@@ -132,7 +132,18 @@ if (opt$type=="log2") {
 	colnames(CN_and_BAF) = c("Chromosome", "Position", "Log2Ratio", "BAF")
 	index = CN_and_BAF[,"BAF"]>0.5
 	CN_and_BAF[index,"BAF"] = 1 - CN_and_BAF[index,"BAF"]
+	TMP = CN_and_BAF
+	for (i in 1:23) {
+		CN_and_BAF$Position[CN_and_BAF$Chromosome == i] = 1:sum(CN_and_BAF$Chromosome == i)
+	}
 	tmp = multipcf(data=winsorize(data=CN_and_BAF, method="mad", tau=2.5, k=25, verbose=FALSE), gamma=gamma, fast=FALSE, verbose=FALSE)
+	for (i in 1:23) {
+		tmp[tmp$chrom == i,"start.pos"] = (TMP$Position[TMP$Chromosome == i])[tmp$start.pos[tmp$chrom == i]]
+	}
+	for (i in 1:23) {
+		tmp[tmp$chrom == i,"end.pos"] = (TMP$Position[TMP$Chromosome == i])[tmp$end.pos[tmp$chrom == i]]
+	}
+	CN_and_BAF = TMP
 	colnames(tmp) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio", "BAF")
 	save(CN_and_BAF, tmp, file=opt$file_out)
 
@@ -218,9 +229,9 @@ if (opt$type=="log2") {
 	abline(v=max(CN_and_BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0, col="red")
 	axis(1, at = .5*(start+end), labels=rep(" ", 23), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
-	title(main = gsub(".pdf", "", gsub("ascat/log2nbaf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1)
-    box(lwd=1.5)
+    	rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
+	title(main = gsub(".pdf", "", gsub("ascat/log2_baf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1)
+    	box(lwd=1.5)
 
 	screen(zz[2])
 	plot(CN_and_BAF[,"Position"], CN_and_BAF[,"BAF"], type="p", pch=".", cex=1, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(0,1.125))
@@ -238,10 +249,10 @@ if (opt$type=="log2") {
 	abline(v=max(CN_and_BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0.5, col="red")
 	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
-	title(main = gsub(".pdf", "", gsub("ascat/log2nbaf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1)
-    box(lwd=1.5)
-    close.screen(all.screens=TRUE)
+    	rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5)
+	title(main = gsub(".pdf", "", gsub("ascat/log2_baf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1)
+    	box(lwd=1.5)
+    	close.screen(all.screens=TRUE)
 	dev.off()
 	
 } else if (opt$type=="run-ascat") {
@@ -317,31 +328,35 @@ if (opt$type=="log2") {
 	chrs = 1:23
 	gender = "2323"
 	sexchromosomes = c(23, 24)
-	tmp2 = list(Tumor_LogR=Tumor_LogR,
-		 	    Tumor_BAF=Tumor_BAF,
-			    Tumor_LogR_segmented=Tumor_LogR_segmented,
-			    Tumor_BAF_segmented=Tumor_BAF_segmented,
-			    SNPpos=SNPpos,
-			    chromosomes=ch,
-			    chrnames=chrs,
-			    gender=gender,
-			    sexchromosomes=sexchromosomes)
+	tmp2 = list(Tumor_LogR = Tumor_LogR,
+		    Tumor_BAF = Tumor_BAF,
+		    Tumor_LogR_segmented = Tumor_LogR_segmented,
+		    Tumor_BAF_segmented = Tumor_BAF_segmented,
+		    SNPpos = SNPpos,
+		    chromosomes = ch,
+		    chrnames = chrs,
+		    gender = gender,
+		    sexchromosomes = sexchromosomes)
 	
-    tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR,
-        	                baf=tmp2$Tumor_BAF,
-        	                lrrsegmented=tmp2$Tumor_LogR_segmented,
-        	                bafsegmented=tmp2$Tumor_BAF_segmented,
-        	                gender=tmp2$gender,
-        	                SNPpos=tmp2$SNPpos,
-        	                chromosomes=tmp2$chromosomes,
-        	                chrnames=tmp2$chrnames,
-        	                sexchromosomes=tmp2$sexchromosomes,
-        	                failedqualitycheck=FALSE,
-        	                distance = opt$file_out,
-        	                copynumberprofile = NULL,
-        	                nonroundedprofile = NULL, 
-        	                aberrationreliability = NULL,
-        	                gamma = 1, rho_manual = rho, psi_manual = psi, y_limit = 3, circos = NA))
+    	tmp3 = try(runASCAT(lrr = tmp2$Tumor_LogR,
+			    baf = tmp2$Tumor_BAF,
+			    lrrsegmented = tmp2$Tumor_LogR_segmented,
+			    bafsegmented = tmp2$Tumor_BAF_segmented,
+			    gender = tmp2$gender,
+			    SNPpos = tmp2$SNPpos,
+			    chromosomes = tmp2$chromosomes,
+			    chrnames = tmp2$chrnames,
+			    sexchromosomes = tmp2$sexchromosomes,
+			    failedqualitycheck = FALSE,
+			    distance = opt$file_out,
+			    copynumberprofile = NULL,
+			    nonroundedprofile = NULL, 
+			    aberrationreliability = NULL,
+			    gamma = 1,
+			    rho_manual = rho,
+			    psi_manual = psi,
+			    y_limit = 3,
+			    circos = NA))
                         
     if (!("try-error" %in% is(tmp3))) {
         purity = tmp3$rho
@@ -408,10 +423,10 @@ if (opt$type=="log2") {
 	abline(v=max(CN[,"pos"]), col="goldenrod3", lty=3, lwd=1)
 	abline(h=0, col="red")
 	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-    load(gsub(".pdf", ".RData", gsub("total", "ascat", opt$file_out)))
-    rect(xleft=1-1e10, xright=max(CN[,"pos"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
+    	load(gsub(".pdf", ".RData", gsub("total", "ascat", opt$file_out)))
+    	rect(xleft=1-1e10, xright=max(CN[,"pos"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
 	title(main = gsub(".pdf", "", gsub("ascat/total/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
+    	box(lwd=1.5)
 	dev.off()
 	
 } else if (opt$type=="plot-chr") {
diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk
index 3af26268..77cefcbd 100644
--- a/copy_number/ascat.mk
+++ b/copy_number/ascat.mk
@@ -1,29 +1,47 @@
 include modules/Makefile.inc
 
 LOGDIR ?= log/ascat.$(NOW)
-PHONY += ascat ascat/log2 ascat/bafall ascat/bafhet ascat/mad ascat/log2nbaf ascat/ascat ascat/total ascat/bychr
 
-ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf ascat/bafall/$(pair).pdf ascat/bafhet/$(pair).pdf ascat/mad/$(pair).RData ascat/log2nbaf/$(pair).pdf ascat/ascat/$(pair).pdf ascat/total/$(pair).pdf ascat/bychr/$(pair)/timestamp)
+ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/baf_all/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/baf_het/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \
+	$(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp)
 
 define ascat-plot-log2
 ascat/log2/$1_$2.pdf : facets/cncf/$1_$2.Rdata
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type log2 --file_in $$< --file_out ascat/log2/$1_$2.pdf")
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type log2 \
+						    --file_in $$(<) \
+						    --file_out ascat/log2/$1_$2.pdf")
 	
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call ascat-plot-log2,$(tumor.$(pair)),$(normal.$(pair)))))
 		
 define ascat-plot-bafall
-ascat/bafall/$1_$2.pdf : facets/cncf/$1_$2.Rdata
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type bafall --file_in $$< --file_out ascat/bafall/$1_$2.pdf")
+ascat/baf_all/$1_$2.pdf : facets/cncf/$1_$2.Rdata
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type bafall \
+						    --file_in $$(<) \
+						    --file_out ascat/baf_all/$1_$2.pdf")
 	
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call ascat-plot-bafall,$(tumor.$(pair)),$(normal.$(pair)))))
 
 define ascat-plot-bafhet
-ascat/bafhet/$1_$2.pdf : facets/cncf/$1_$2.Rdata
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type bafhet --file_in $$< --file_out ascat/bafhet/$1_$2.pdf")
+ascat/baf_het/$1_$2.pdf : facets/cncf/$1_$2.Rdata
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type bafhet \
+						    --file_in $$(<) \
+						    --file_out ascat/baf_het/$1_$2.pdf")
 	
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
@@ -31,15 +49,26 @@ $(foreach pair,$(SAMPLE_PAIRS),\
 		
 define ascat-aspcf
 ascat/mad/$1_$2.RData : facets/cncf/$1_$2.Rdata
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type aspcf --file_in $$< --file_out ascat/mad/$1_$2.RData --gamma '$${aspcf_gamma.$1}'")
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type aspcf \
+						    --file_in $$(<) \
+						    --file_out ascat/mad/$1_$2.RData \
+						    --gamma '$${aspcf_gamma.$1}'")
 	
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call ascat-aspcf,$(tumor.$(pair)),$(normal.$(pair)))))
 
 define ascat-plot-aspcf
-ascat/log2nbaf/$1_$2.pdf : ascat/mad/$1_$2.RData
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type plot-aspcf --file_in $$< --file_out ascat/log2nbaf/$1_$2.pdf --nlog2 '$${aspcf_nlog2.$1}' --nbaf '$${aspcf_nbaf.$1}'")
+ascat/log2_baf/$1_$2.pdf : ascat/mad/$1_$2.RData
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type plot-aspcf \
+						    --file_in $$(<) \
+						    --file_out ascat/log2_baf/$1_$2.pdf \
+						    --nlog2 '$${aspcf_nlog2.$1}' \
+						    --nbaf '$${aspcf_nbaf.$1}'")
 	
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
@@ -47,27 +76,41 @@ $(foreach pair,$(SAMPLE_PAIRS),\
 		
 define ascat-run-ascat
 ascat/ascat/$1_$2.pdf : ascat/mad/$1_$2.RData
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type run-ascat --file_in $$< --file_out ascat/ascat/$1_$2.pdf --rho '$${ascat_rho.$1}' --psi '$${ascat_psi.$1}' --nlog2 '$${aspcf_nlog2.$1}' --nbaf '$${aspcf_nbaf.$1}'")
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \
+						    $(RSCRIPT) modules/copy_number/ascat.R \
+						    --type run-ascat \
+						    --file_in $$(<) \
+						    --file_out ascat/ascat/$1_$2.pdf \
+						    --rho '$${ascat_rho.$1}' \
+						    --psi '$${ascat_psi.$1}' \
+						    --nlog2 '$${aspcf_nlog2.$1}' \
+						    --nbaf '$${aspcf_nbaf.$1}'")
 	
 ascat/total/$1_$2.pdf : facets/cncf/$1_$2.Rdata ascat/ascat/$1_$2.pdf
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"$(RSCRIPT) modules/copy_number/ascat.R --type total-copy --file_in $$< --file_out ascat/total/$1_$2.pdf")	
+	$$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"set -o pipefail && \
+						     $(RSCRIPT) modules/copy_number/ascat.R \
+						     --type total-copy \
+						     --file_in $$(<) \
+						     --file_out ascat/total/$1_$2.pdf")
 
 endef
-
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call ascat-run-ascat,$(tumor.$(pair)),$(normal.$(pair)))))
 
 define ascat-plot-chr
 ascat/bychr/$1_$2/timestamp : facets/cncf/$1_$2.Rdata ascat/ascat/$1_$2.pdf
-	$$(call RUN, -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p ascat/bychr/ && \
-											   mkdir -p ascat/bychr/$1_$2 && \
-											   $(RSCRIPT) modules/copy_number/ascat.R --type plot-chr --file_in $$< --file_out ascat/bychr/$1_$2")
+	$$(call RUN, -v $(ASCAT_ENV) -s 6G -m 12G,"set -o pipefail && \
+						   $(RSCRIPT) modules/copy_number/ascat.R \
+						   --type plot-chr \
+						   --file_in $$(<) \
+						   --file_out ascat/bychr/$1_$2")
 		
 endef
-
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call ascat-plot-chr,$(tumor.$(pair)),$(normal.$(pair)))))
-		
-.DELETE_ON_ERROR:
+
+..DUMMY := $(shell mkdir -p version; \
+	     R --version > version/ascat.txt;)
 .SECONDARY:
-.PHONY: $(PHONY)
+.DELETE_ON_ERROR:
+.PHONY: ascat
diff --git a/copy_number/cnvkit.R b/copy_number/cnvkit.R
deleted file mode 100644
index 95cbe66c..00000000
--- a/copy_number/cnvkit.R
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("copynumber"))
-suppressPackageStartupMessages(library("colorspace"))
-suppressPackageStartupMessages(library("ASCAT"))
-suppressPackageStartupMessages(library("GAP"))
-
-'plot_log2_' <- function(x, y, title = "", alpha=NA, psi=NA)
-{
-   	par(mar=c(5, 5, 4, 2)+.1)
-   	data("CytoBand")
-   	end = NULL
-	for (j in 1:23) {
-		end = c(end, max(CytoBand$End[CytoBand$Chromosome==j]))
-	}
-	end = cumsum(end)
-	start = rep(0, 23)
-	start[2:23] = end[1:22]+1
-	for (j in 1:23) {
-		y[y[,"Chromosome"]==j,"Start"] = y[y[,"Chromosome"]==j,"Start"] + start[j]
-		y[y[,"Chromosome"]==j,"End"] = y[y[,"Chromosome"]==j,"End"] + start[j]
-		x[x[,"chrom"]==j,"pos"] = x[x[,"chrom"]==j,"pos"] + start[j]
-	}
-	plot(x[,"pos"], x[,"Log2Ratio"], type="p", pch=".", cex=1, col="grey75", axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5))
-	for (j in 1:nrow(y)) {
- 		lines(x=c(y[j,"Start"], y[j,"End"]), y=rep(y[j,"Log2Ratio"],2), lty=1, lwd=1.75, col="red")
- 	}
-  	axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1)
-	mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25)
-	abline(v=1, col="goldenrod3", lty=3, lwd=.5)
-	abline(h=0, col="red", lty=1, lwd=1)
-	for (j in 2:23) {
-		v = start[j]
-		abline(v=v, col="goldenrod3", lty=3, lwd=.5)
-	}
-	abline(v=max(x[,"pos"]), col="goldenrod3", lty=3, lwd=.5)
-	axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)	
-	rect(xleft=1-1e10, xright=x[nrow(x),"pos"]+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
-	title(main = paste0(title, " | alpha = ", signif(alpha, 3), " | psi = ", signif(psi, 3)), line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
-}
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--type", default = NA, type = 'character', help = "type of analysis"),
-				  make_option("--sample_name", default = NA, type = 'character', help = "sample name"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-if (opt$type=="total-copy") {
-	
-	'prunesegments.cn' <- function(x, n=10)
-	{
-		cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x))
-		for (j in 1:nrow(x)) {
-			cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"])
-		}
-		cnt = hclust(as.dist(cnm), "average")
-		cnc = cutree(tree=cnt, k=n)
-		for (j in unique(cnc)) {
-			indx = which(cnc==j)
-			if (length(indx)>2) {
-				mcl = mean(x[indx,"Log2Ratio"])
-				scl = sd(x[indx,"Log2Ratio"])
-				ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl))
-				x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"])
-			} else {
-				x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"])
-			}
-		}
-		return(x)
-	}
-
-	data = read.csv(file=paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	CN = data[,c("chromosome", "start", "log2"),drop=FALSE]
-	colnames(CN) = c("Chromosome", "Position", "Log2Ratio")
-	CN[,"Chromosome"] = gsub(pattern="chr", replacement="", x=CN[,"Chromosome"], fixed=TRUE)
-	CN[CN[,"Chromosome"]=="X","Chromosome"] = 23
-	CN[CN[,"Chromosome"]=="Y","Chromosome"] = 24
-	CN[,"Chromosome"] = as.numeric(CN[,"Chromosome"])
-	CN[CN[,"Log2Ratio"]<(-4) | CN[,"Log2Ratio"]>(4),"Log2Ratio"] = 0
-	CN = subset(CN, CN[,"Chromosome"]<=23)
-	tmp = pcf(data=winsorize(data=CN, method="mad", tau=2.5, k=10, verbose=FALSE), kmin = 10, gamma=40, fast=FALSE, verbose=FALSE)[,2:7,drop=FALSE]
-	colnames(tmp) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio")
-	save(CN, tmp, file=paste0("cnvkit/totalcopy/", opt$sample_name, ".RData"))
-	tmp = prunesegments.cn(x=tmp, n=10)
-	CN = winsorize(data=CN[,c("Chromosome","Position","Log2Ratio")], tau=2.5, k=15, verbose=FALSE)
-	pdf(file=paste0("cnvkit/segmented/", opt$sample_name, ".pdf"), width=10, height=4.25)
-	file_names = dir(path="facets/cncf", pattern=opt$sample_name, full.names=TRUE)
-	file_names = file_names[grep(".Rdata", file_names, fixed=TRUE)]
-	if (length(file_names)==1) {
-		load(file_names)
-		alpha = fit$purity
-		psi = fit$ploidy
-	} else {
-		alpha = NA
-		psi = NA
-	}
-	plot_log2_(x=CN, y=tmp, title = opt$sample_name, alpha=alpha, psi=psi)
-	dev.off()
-
-} else if (opt$type=="call-cna") {
-	
-	'prunesegments.cn' <- function(x, n=10)
-	{
-		cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x))
-		for (j in 1:nrow(x)) {
-			cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"])
-		}
-		cnt = hclust(as.dist(cnm), "average")
-		cnc = cutree(tree=cnt, k=n)
-		for (j in unique(cnc)) {
-			indx = which(cnc==j)
-			if (length(indx)>2) {
-				mcl = mean(x[indx,"Log2Ratio"])
-				scl = sd(x[indx,"Log2Ratio"])
-				ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl))
-				x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"])
-			} else {
-				x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"])
-			}
-		}
-		return(x)
-	}
-	load(paste0("cnvkit/totalcopy/", opt$sample_name, ".RData"))
-	file_names = dir(path="facets/cncf", pattern=opt$sample_name, full.names=TRUE)
-	file_names = file_names[grep(".Rdata", file_names, fixed=TRUE)]
-	if (length(file_names)==1) {
-		load(file_names)
-		alpha = ifelse(is.na(fit$purity), 1, fit$purity)
-		psi = ifelse(is.na(fit$ploidy), 2, fit$ploid)
-	} else {
-		alpha = 1
-		psi = 2
-	}
-	tmp = prunesegments.cn(x=tmp, n=10)
-	qt = round((((2^(tmp[,"Log2Ratio"])) * (alpha*psi + 2*(1-alpha))) - 2*(1-alpha))/alpha)
-	qt[is.na(qt)] = 2
-	qt[is.infinite(qt)] = 2
-	cat5 = rep(0, length(qt))
-	if (round(psi)==1 | round(psi)==2) {
-		cat5t = c(0, 1, 3, 7)
-	} else if (round(psi)==3) {
-		cat5t = c(0, 1, 4, 9)
-	} else if (round(psi)==4) {
-		cat5t = c(0, 1, 5, 10)
-	} else if (round(psi)==5) {
-		cat5t = c(0, 2, 6, 12)
-	} else if (round(psi)>=6) {
-		cat5t = c(0, 2, 7, 15)
-	} else {
-		cat5t = c(0, 1, 3, 7)
-	}
-	cat5[qt <= cat5t[2]] = -1
-	cat5[qt <= cat5t[1]] = -2
-	cat5[qt >= cat5t[3]] = 1
-	cat5[qt >= cat5t[4]] = 2
-	tmp = cbind(tmp, "Cat5"=cat5)
-	save(CN, tmp, file=paste0("cnvkit/called/", opt$sample_name, ".RData"))
-	
-}
-
-warnings()
diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk
new file mode 100644
index 00000000..5e065c78
--- /dev/null
+++ b/copy_number/cnvkit.mk
@@ -0,0 +1,125 @@
+include modules/Makefile.inc
+include modules/genome_inc/b37.inc
+
+LOGDIR ?= log/cnv_kit.$(NOW)
+
+cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn) \
+	  $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn) \
+	  $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \
+	  cnvkit/reference/combined_reference.cnr \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \
+	  $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnr/$(sample).cnr) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) \
+	  $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/totalcopy/$(sample).pdf) \
+	  cnvkit/summary/total_copy.txt \
+	  cnvkit/summary/log2_ratio.txt
+	  
+ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed
+OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed
+
+define cnvkit-tumor-cnn
+cnvkit/cnn/tumor/$1.targetcoverage.cnn : bam/$1.bam
+	$$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+							  cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/tumor/$1.targetcoverage.cnn")
+
+cnvkit/cnn/tumor/$1.antitargetcoverage.cnn : bam/$1.bam
+	$$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+							  cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/tumor/$1.antitargetcoverage.cnn")
+endef
+ $(foreach sample,$(TUMOR_SAMPLES),\
+		$(eval $(call cnvkit-tumor-cnn,$(sample))))
+		
+define cnvkit-normal-cnn
+cnvkit/cnn/normal/$1.targetcoverage.cnn : bam/$1.bam
+	$$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+							  cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/normal/$1.targetcoverage.cnn")
+
+cnvkit/cnn/normal/$1.antitargetcoverage.cnn : bam/$1.bam
+	$$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+							  cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/normal/$1.antitargetcoverage.cnn")
+endef
+ $(foreach sample,$(NORMAL_SAMPLES),\
+		$(eval $(call cnvkit-normal-cnn,$(sample))))
+
+cnvkit/reference/combined_reference.cnr : $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn) $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn)
+	$(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \
+							sleep 30 && \
+							cnvkit.py reference cnvkit/cnn/normal/*.cnn -f $(REF_FASTA) --no-edge -o cnvkit/reference/combined_reference.cnr")
+
+define cnvkit-tumor-cnr
+cnvkit/cnr/$1.cnr : cnvkit/cnn/tumor/$1.targetcoverage.cnn cnvkit/cnn/tumor/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						     cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr")
+
+cnvkit/plots/log2/$1.pdf : cnvkit/cnr/$1.cnr
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						     $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+						     --option 1 \
+						     --sample_name $1")
+
+cnvkit/segmented/$1.txt : cnvkit/cnr/$1.cnr
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						     $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+						     --option 2 \
+						     --sample_name $1")
+						     
+cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						     $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+						     --option 3 \
+						     --sample_name $1")
+	
+endef
+ $(foreach sample,$(TUMOR_SAMPLES),\
+		$(eval $(call cnvkit-tumor-cnr,$(sample))))
+		
+define cnvkit-normal-cnr
+cnvkit/cnr/$1.cnr : cnvkit/cnn/normal/$1.targetcoverage.cnn cnvkit/cnn/normal/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						     cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr")
+
+endef
+ $(foreach sample,$(NORMAL_SAMPLES),\
+		$(eval $(call cnvkit-normal-cnr,$(sample))))
+
+
+define cnvkit-total-copy
+cnvkit/totalcopy/$1.txt : cnvkit/segmented/$1.txt facets/cncf/$1_$2.out
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						    $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+						    --option 4 \
+						    --sample_name $1_$2")
+						    
+cnvkit/plots/totalcopy/$1.pdf : cnvkit/cnr/$1.cnr cnvkit/totalcopy/$1.txt facets/cncf/$1_$2.out
+	$$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \
+						    $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+						    --option 5 \
+						    --sample_name $1_$2")
+	
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call cnvkit-total-copy,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+cnvkit/summary/total_copy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt)
+	$(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \
+							$(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+							--option 6 \
+							--sample_name '$(TUMOR_SAMPLES)'")
+							
+cnvkit/summary/log2_ratio.txt : $(foreach sample,$(SAMPLES),cnvkit/cnr/$(sample).cnr)
+	$(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \
+							$(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \
+							--option 7 \
+							--sample_name '$(SAMPLES)'")
+
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     python $(CNVKIT_ENV)/bin/cnvkit.py version &> version/cnvkit.txt)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: cnv_kit
diff --git a/copy_number/cnvkitbinqc.R b/copy_number/cnvkitbinqc.R
deleted file mode 100644
index 7d3e430e..00000000
--- a/copy_number/cnvkitbinqc.R
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal samples input file names"),
-				  make_option("--tumor_files", default = NA, type = 'character', help = "tumor samples input file names"),
-				  make_option("--out_file", default = NA, type = 'character', help = "output file name"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-in_file_normal = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE))
-in_file_tumor = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE))
-out_file = opt$out_file
-
-depth_n = list()
-for (i in 1:length(in_file_normal)) {
-	print(i)
-	data = read.csv(file=in_file_normal[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% as.character(1:22)
-	depth_n[[i]] = as.numeric(data[index,"depth"])
-}
-depth_n = do.call(cbind, depth_n)
-
-depth_t = list()
-for (i in 1:length(in_file_tumor)) {
-	print(i)
-	data = read.csv(file=in_file_tumor[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% as.character(1:22)
-	depth_t[[i]] = as.numeric(data[index,"depth"])
-}
-depth_t = do.call(cbind, depth_t)
-
-bin_size = as.numeric(data[index,"end"]) - as.numeric(data[index,"start"])
-var_bin_n = apply(depth_n, 1, sd, na.rm=TRUE)
-var_bin_t = apply(depth_t, 1, sd, na.rm=TRUE)
-data = data.frame(bin_size, var_bin_n, var_bin_t)
-write.table(data, file=out_file, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
-
-ymin = min(var_bin_n, var_bin_t)
-ymax = max(var_bin_n, var_bin_t)
-
-pdf(file=gsub(".tsv", ".pdf", x=out_file, fixed=TRUE), width=7, height=7)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(bin_size, var_bin_n, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="y", ylim=c(ymin, ymax))
-points(x=bin_size, y=var_bin_n, col = "grey50", bg = "grey90", pch = 21, cex = 1, lwd = .1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "Bin size (bp)", line = 4, cex = 1.5)
-mtext(side = 2, text = "SD", line = 5, cex = 1.5)
-plot(bin_size, var_bin_t, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="y", ylim=c(ymin, ymax))
-points(x=bin_size, y=var_bin_t, col = "black", bg = "steelblue", pch = 21, cex = 1, lwd = .1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "Bin size (bp)", line = 4, cex = 1.5)
-mtext(side = 2, text = "SD", line = 5, cex = 1.5)
-plot(var_bin_n, var_bin_t, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="xy", xlim=c(ymin, ymax), ylim=c(ymin, ymax))
-points(x=var_bin_n, y=var_bin_t, col = "black", bg = "steelblue", pch = 21, cex = 1, lwd = .1)
-abline(a=0, b=1, col="goldenrod3", lwd=2)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "Normal SD", line = 4, cex = 1.5)
-mtext(side = 2, text = "Tumor SD", line = 5, cex = 1.5)
-dev.off()
diff --git a/copy_number/cnvkitcoverage.mk b/copy_number/cnvkitcoverage.mk
deleted file mode 100644
index 696f395a..00000000
--- a/copy_number/cnvkitcoverage.mk
+++ /dev/null
@@ -1,30 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_coverage.$(NOW)
-PHONY += cnvkit cnvkit/cnn cnvkit/cnn/tumor cnvkit/cnn/normal
-
-cnvkit_coverage : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn) $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn)
-
-define cnvkit-tumor-cnn
-cnvkit/cnn/tumor/%.targetcoverage.cnn : bam/%.bam
-	$$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/tumor/$$(*).targetcoverage.cnn")
-
-cnvkit/cnn/tumor/%.antitargetcoverage.cnn : bam/%.bam
-	$$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/tumor/$$(*).antitargetcoverage.cnn")
-endef
- $(foreach sample,$(TUMOR_SAMPLES),\
-		$(eval $(call cnvkit-tumor-cnn,$(sample))))
-		
-define cnvkit-normal-cnn
-cnvkit/cnn/normal/%.targetcoverage.cnn : bam/%.bam
-	$$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/normal/$$(*).targetcoverage.cnn")
-
-cnvkit/cnn/normal/%.antitargetcoverage.cnn : bam/%.bam
-	$$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/normal/$$(*).antitargetcoverage.cnn")
-endef
- $(foreach sample,$(NORMAL_SAMPLES),\
-		$(eval $(call cnvkit-normal-cnn,$(sample))))
-		
-.PHONY: $(PHONY)
-
diff --git a/copy_number/cnvkitfix.mk b/copy_number/cnvkitfix.mk
deleted file mode 100644
index c83aa1af..00000000
--- a/copy_number/cnvkitfix.mk
+++ /dev/null
@@ -1,18 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_fix.$(NOW)
-PHONY += cnvkit cnvkit/cnr
-
-cnvkit_fix : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr)
-
-define cnvkit-cnr
-cnvkit/cnr/%.cnr : cnvkit/cnn/tumor/%.targetcoverage.cnn cnvkit/cnn/tumor/%.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr
-	$$(call RUN,-c -s 6G -m 8G,"cnvkit.py fix $$(<) $$(<<) cnvkit/reference/combined_reference.cnr -o cnvkit/cnr/$$(*).cnr")
-	
-endef
- $(foreach sample,$(TUMOR_SAMPLES),\
-		$(eval $(call cnvkit-cnr,$(sample))))
-				
-.PHONY: $(PHONY)
-
diff --git a/copy_number/cnvkitheatmap.R b/copy_number/cnvkitheatmap.R
deleted file mode 100644
index 1513a473..00000000
--- a/copy_number/cnvkitheatmap.R
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--in_file", default = NA, type = 'character', help = "input file names"),
-				  make_option("--out_file", default = NA, type = 'character', help = "output file name"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-in_file = unlist(strsplit(x=opt$in_file, split=" ", fixed=TRUE))
-out_file = opt$out_file
-
-depth = list()
-for (i in 1:length(in_file)) {
-	print(i)
-	data = read.csv(file=in_file[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% c(as.character(1:22), "X")
-	depth[[i]] = as.numeric(data[index,"depth"])
-}
-depth = do.call(cbind, depth)
-pdf(file=out_file, width=14, height=14)
-heatmap(x=depth, labRow=rep(" ", nrow(depth)), labCol=rep(" ", ncol(depth)), col=colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(256))
-dev.off()
-
-png(file=gsub(".pdf", ".png", out_file, fixed=TRUE), width=1440, height=1440)
-heatmap(x=depth, labRow=rep(" ", nrow(depth)), labCol=rep(" ", ncol(depth)), col=colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(256))
-dev.off()
diff --git a/copy_number/cnvkitheatmap.mk b/copy_number/cnvkitheatmap.mk
deleted file mode 100644
index cbbad3b1..00000000
--- a/copy_number/cnvkitheatmap.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/cnvkit_heatmap.$(NOW)
-PHONY += cnvkit cnvkit/heatmap
-
-CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn))
-CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn))
-CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn))
-CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn))
-
-cnvkit : cnvkit/heatmap/normal_samples_ontarget.pdf cnvkit/heatmap/normal_samples_offtarget.pdf cnvkit/heatmap/tumor_samples_ontarget.pdf cnvkit/heatmap/tumor_samples_offtarget.pdf
-
-cnvkit/heatmap/normal_samples_ontarget.pdf : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_NORMAL_ON_TARGET)' --out_file cnvkit/heatmap/normal_samples_ontarget.pdf")
-	
-cnvkit/heatmap/normal_samples_offtarget.pdf : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_NORMAL_OFF_TARGET)' --out_file cnvkit/heatmap/normal_samples_offtarget.pdf")
-	
-cnvkit/heatmap/tumor_samples_ontarget.pdf : $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_TUMOR_ON_TARGET)' --out_file cnvkit/heatmap/tumor_samples_ontarget.pdf")
-	
-cnvkit/heatmap/tumor_samples_offtarget.pdf : $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file cnvkit/heatmap/tumor_samples_offtarget.pdf")
-				
-.PHONY: $(PHONY)
diff --git a/copy_number/cnvkitplot.R b/copy_number/cnvkitplot.R
deleted file mode 100644
index 971b6654..00000000
--- a/copy_number/cnvkitplot.R
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("copynumber"))
-suppressPackageStartupMessages(library("colorspace"))
-suppressPackageStartupMessages(library("ASCAT"))
-suppressPackageStartupMessages(library("GAP"))
-
-'plot_log2_' <- function(x, title = "")
-{
-   	par(mar=c(5, 5, 4, 2)+.1)
-   	data("CytoBand")
-   	end = NULL
-   	for (i in 1:23) {
-   		end = c(end, max(CytoBand[CytoBand[,1]==i,"End"]))
-   	}
-   	end = cumsum(end)
-   	start = c(1, end[1:22]+1)
-   	CytoBand = cbind(start, end)
-   	index = NULL
-   	for (i in 1:23) {
-   		index = c(index, seq(from = CytoBand[i, "start"], to=CytoBand[i, "end"], length=sum(x$chromosome==i)))
-   	}
-	plot(index, x$log2, type="p", pch=".", cex=1.95, col="grey80", axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5))
-  	axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1)
-	mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25)
-	abline(v=1, col="goldenrod3", lty=3, lwd=.5)
-	abline(h=0, col="red", lty=1, lwd=1)
-	for (j in 1:23) {
-		abline(v=CytoBand[j,"end"], col="goldenrod3", lty=3, lwd=.5)
-	}
-	axis(1, at = .5*(CytoBand[,"start"]+CytoBand[,"end"]), labels=c(1:22, "X"), cex.axis = 0.85, las = 1)
-	rect(xleft=1-1e10, xright=CytoBand[23,"end"]+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
-	title(main = title, line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
-}
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--in_file", default = NA, type = 'character', help = "input file name"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-outfile_on_target = gsub("cnr", "log2", gsub(".cnr", ".ontarget.pdf", opt$in_file, fixed=TRUE), fixed=TRUE)
-outfile_off_target = gsub("cnr", "log2", gsub(".cnr", ".offtarget.pdf", opt$in_file, fixed=TRUE), fixed=TRUE)
-
-data = read.table(file=opt$in_file, header=TRUE, sep="\t", comment.char="#", stringsAsFactors=FALSE)
-data = subset(data, data[,"depth"]!=0)
-
-if (nrow(data)==0) {
-	system(paste0("touch ", outfile_on_target))
-	system(paste0("touch ", outfile_off_target))
-} else {
-	data[,"chromosome"] = gsub(pattern="chr", replacement="", x=data[,"chromosome"], fixed=TRUE)
-	data[data[,"chromosome"]=="X", "chromosome"] = 23
-	data[data[,"chromosome"]=="Y", "chromosome"] = 24
-	data[,"chromosome"] = as.numeric(data[,"chromosome"])
-	data = subset(data, data[,"chromosome"]<=23)
-	
-	if (sum(data$gene=="-")>0) {
-		flag = 1
-	} else if (sum(data$gene=="Antitarget")>0) {
-		flag = 2
-	}
-	
-	if (flag==1) {
-		ontarget = subset(data, data$gene=="-")
-	} else if (flag==2) {
-		ontarget = subset(data, data$gene!="Antitarget")
-	}
-	
-	pdf(file=outfile_on_target, width=10, height=4.25)
-	plot_log2_(x=ontarget, title=gsub("cnvkit/cnr/", "", gsub(".cnr", "", opt$in_file, fixed=TRUE), fixed=TRUE))
-	dev.off()
-	
-	if (flag==1) {
-		offtarget = subset(data, data$gene!="-")
-	} else if (flag==2) {
-		offtarget = subset(data, data$gene=="Antitarget")
-	}
-	
-	tmp = offtarget[,c("chromosome", "start", "log2"),drop=FALSE]
-	tmp = winsorize(data=tmp, tau=3.5, k=25, verbose=FALSE, return.outliers=TRUE)
-	offtarget[tmp$wins.outliers[,3]!=0,"log2"] = NA
-	pdf(file=outfile_off_target, width=10, height=4.25)
-	plot_log2_(x=offtarget, title=gsub("cnvkit/cnr/", "", gsub(".cnr", "", opt$in_file, fixed=TRUE), fixed=TRUE))
-	dev.off()
-}
diff --git a/copy_number/cnvkitplot.mk b/copy_number/cnvkitplot.mk
deleted file mode 100644
index ba16ff8d..00000000
--- a/copy_number/cnvkitplot.mk
+++ /dev/null
@@ -1,16 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_plot.$(NOW)
-PHONY += cnvkit cnvkit/log2
-
-cnvkit_plot : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/log2/$(sample).ontarget.pdf cnvkit/log2/$(sample).offtarget.pdf)
-
-define cnvkit-plot
-cnvkit/log2/%.ontarget.pdf cnvkit/log2/%.offtarget.pdf : cnvkit/cnr/%.cnr
-	$$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 4G -m 6G,"$(RSCRIPT) modules/copy_number/cnvkitplot.R --in_file $$(<)")
-endef
- $(foreach sample,$(TUMOR_SAMPLES),\
-		$(eval $(call cnvkit-plot,$(sample))))
-				
-.PHONY: $(PHONY)
diff --git a/copy_number/cnvkitprcomp.R b/copy_number/cnvkitprcomp.R
deleted file mode 100644
index 0353609e..00000000
--- a/copy_number/cnvkitprcomp.R
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal samples input file names"),
-				  make_option("--tumor_files", default = NA, type = 'character', help = "tumor samples input file names"),
-				  make_option("--out_file_normal", default = NA, type = 'character', help = "normal samples output file name"),
-				  make_option("--out_file_tumor", default = NA, type = 'character', help = "tumor samples output file name"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-in_file_normal = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE))
-normal_samples = gsub(".antitargetcoverage", "", x=gsub(".targetcoverage", "", x=gsub(pattern=".cnn", replacement="", x=gsub(pattern="cnvkit/cnn/normal/", replacement="", x=in_file_normal, fixed=TRUE), fixed=TRUE), fixed=TRUE), fixed=TRUE)
-in_file_tumor = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE))
-tumor_samples = gsub(".antitargetcoverage", "", x=gsub(".targetcoverage", "", x=gsub(pattern=".cnn", replacement="", x=gsub(pattern="cnvkit/cnn/tumor/", replacement="", x=in_file_tumor, fixed=TRUE), fixed=TRUE), fixed=TRUE), fixed=TRUE)
-out_file_normal = opt$out_file_normal
-out_file_tumor = opt$out_file_tumor
-
-depth_n = list()
-for (i in 1:length(in_file_normal)) {
-	print(i)
-	data = read.csv(file=in_file_normal[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% as.character(1:22)
-	depth_n[[i]] = as.numeric(data[index,"depth"])
-}
-depth_n = do.call(cbind, depth_n)
-
-depth_t = list()
-for (i in 1:length(in_file_tumor)) {
-	print(i)
-	data = read.csv(file=in_file_tumor[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% as.character(1:22)
-	depth_t[[i]] = as.numeric(data[index,"depth"])
-}
-depth_t = do.call(cbind, depth_t)
-
-pca_n = prcomp(t(depth_n), center=TRUE, scale.=TRUE)
-pca_t = predict(object=pca_n, newdata=t(depth_t))
-x = c(pca_n$x[,1], pca_t[,1])
-y = c(pca_n$x[,2], pca_t[,2])
-bg = c(rep("grey90", nrow(pca_n$x)), rep("steelblue", nrow(pca_t)))
-col = c(rep("grey50", nrow(pca_n$x)), rep("black", nrow(pca_t)))
-pch = 21
-index = c(rep(TRUE, nrow(pca_n$x)), rep(FALSE, nrow(pca_t)))
-
-pdf(file=out_file_normal, width=9, height=9)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(x=x, y=y, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "")
-points(x=x[index], y=y[index], col = col[index], bg = bg[index], pch = pch, cex = 1, lwd = .1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "PC 1", line = 4, cex = 1.5)
-mtext(side = 2, text = "PC 2", line = 4, cex = 1.5)
-dev.off()
-
-pdf(file=out_file_tumor, width=9, height=9)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(x=x, y=y, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "")
-points(x=x[!index], y=y[!index], col = col[!index], bg = bg[!index], pch = pch, cex = 1, lwd = .1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "PC 1", line = 4, cex = 1.5)
-mtext(side = 2, text = "PC 2", line = 4, cex = 1.5)
-dev.off()
-
-pdf(file=gsub("tumor", "all", out_file_tumor, fixed=TRUE), width=9, height=9)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(x=x, y=y, col = col, bg = bg, pch = pch, cex = 1, lwd = .1, axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "")
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "PC 1", line = 4, cex = 1.5)
-mtext(side = 2, text = "PC 2", line = 4, cex = 1.5)
-dev.off()
-
-data = rbind(pca_n$x, pca_t)
-rownames(data) = c(normal_samples, tumor_samples)
-colnames(data) = paste("PC", 1:ncol(data))
-file_name = paste0("cnvkit/pca/pc_", ifelse(grepl("offtarget", out_file_tumor, fixed=TRUE), "offtarget", "ontarget"), ".txt")
-write.table(data, file=file_name, sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE)
diff --git a/copy_number/cnvkitprcomp.mk b/copy_number/cnvkitprcomp.mk
deleted file mode 100644
index e787762f..00000000
--- a/copy_number/cnvkitprcomp.mk
+++ /dev/null
@@ -1,19 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/cnvkit_pca.$(NOW)
-PHONY += cnvkit cnvkit/pca
-
-CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn))
-CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn))
-CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn))
-CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn))
-
-cnvkit : cnvkit/pca/normal_samples_ontarget.pdf cnvkit/pca/normal_samples_offtarget.pdf cnvkit/pca/tumor_samples_ontarget.pdf cnvkit/pca/tumor_samples_offtarget.pdf
-
-cnvkit/pca/normal_samples_ontarget.pdf cnvkit/pca/tumor_samples_ontarget.pdf : $(wildcard cnvkit/cnn/tumor/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitprcomp.R --normal_files '$(CNVKIT_NORMAL_ON_TARGET)' --tumor_files '$(CNVKIT_TUMOR_ON_TARGET)' --out_file_normal cnvkit/pca/normal_samples_ontarget.pdf --out_file_tumor cnvkit/pca/tumor_samples_ontarget.pdf")
-	
-cnvkit/pca/normal_samples_offtarget.pdf cnvkit/pca/tumor_samples_offtarget.pdf : $(wildcard cnvkit/cnn/tumor/$(NORMAL_SAMPLES).antitargetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitprcomp.R --normal_files '$(CNVKIT_NORMAL_OFF_TARGET)' --tumor_files '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file_normal cnvkit/pca/normal_samples_offtarget.pdf --out_file_tumor cnvkit/pca/tumor_samples_offtarget.pdf")
-				
-.PHONY: $(PHONY)
diff --git a/copy_number/cnvkitqc.R b/copy_number/cnvkitqc.R
deleted file mode 100644
index a8002d3a..00000000
--- a/copy_number/cnvkitqc.R
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal input files"),
-				  make_option("--tumor_files", default = NA, type = 'character', help = "tumor input files"),
-				  make_option("--out_file", default = NA, type = 'character', help = "output file"),
-				  make_option("--option", default = NA, type = 'character', help = "1-0 for ontarget or offtarget"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-normal_files = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE))
-normal_samples = gsub(pattern=".cnr", replacement="", x=gsub(pattern="cnvkit/cnr/", replacement="", x=normal_files, fixed=TRUE), fixed=TRUE)
-tumor_files = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE))
-tumor_samples = gsub(pattern=".cnr", replacement="", x=gsub(pattern="cnvkit/cnr/", replacement="", x=tumor_files, fixed=TRUE), fixed=TRUE)
-out_file = opt$out_file
-
-'MAD' <- function(x)
-{
-	x = na.omit(x)
-	q2 = mad(x)
-	return(invisible(q2))
-}
-
-'MAPD' <- function(x)
-{
-	x = na.omit(x)
-	q2 = median(abs(x[1:(length(x)-1)] - x[2:length(x)]))
-	return(invisible(q2))
-}
-
-'MIQR' <- function(x)
-{
-	x = na.omit(x)
-	iq = stats::IQR(abs(x[1:(length(x)-1)] - x[2:length(x)]))
-	return(invisible(iq))
-}
-
-'scale.' <- function(x)
-{
-	y = (x-min(x))/(max(x)-min(x))
-	return(invisible(y))
-}
-
-'transparentRgb' <- function (col = "black", alpha = 85) 
-{
-    tmp = c(col2rgb(col), alpha, 255)
-    names(tmp) = c("red", "green", "blue", "alpha", "maxColorValue")
-    out = do.call("rgb", as.list(tmp))
-    return(invisible(out))
-}
-
-
-qc = matrix(NA, nrow=length(c(normal_samples, tumor_samples)), ncol=3, dimnames=list(c(normal_samples, tumor_samples), c("MAD", "MAPD", "IQR")))
-for (i in 1:length(normal_files)) {
-	print(i)
-	data = read.csv(file=normal_files[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% 1:22 & data[,"gene"] == ifelse(opt$option==1, "-", "Antitarget")
-	qc[normal_samples[i],1] = MAD(data[index,"log2"])
-	qc[normal_samples[i],2] = MAPD(data[index,"log2"])
-	qc[normal_samples[i],3] = MIQR(data[index,"log2"])
-}
-for (i in 1:length(tumor_files)) {
-	print(i)
-	data = read.csv(file=tumor_files[i], header=TRUE, sep="\t", stringsAsFactors=FALSE)
-	index = data[,"chromosome"] %in% 1:22 & data[,"gene"] == ifelse(opt$option==1, "-", "Antitarget")
-	qc[tumor_samples[i],1] = MAD(data[index,"log2"])
-	qc[tumor_samples[i],2] = MAPD(data[index,"log2"])
-	qc[tumor_samples[i],3] = MIQR(data[index,"log2"])
-}
-data = qc
-colnames(data) = c("MAD", "MAPD", "IQR")
-data = cbind("SAMPLE_NAME"=c(normal_samples, tumor_samples), "SAMPLE_TYPE"=c(rep("N", length(normal_samples)), rep("T", length(tumor_samples))), data)
-write.table(data, file=out_file, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
-
-# MAPD
-file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_mapd.pdf")
-x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "MAPD"])
-y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "MAPD"])
-dx = density(x, from=0, to=max(x,y))
-dx$y = scale.(dx$y)
-dy = density(y, from=0, to=max(x,y))
-dy$y = scale.(dy$y)
-pdf(file=file_name, width=7, height=7)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2))
-polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2)
-polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2)
-legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "MAPD", line = 4, cex = 1.5)
-mtext(side = 2, text = "Density", line = 4, cex = 1.5)
-dev.off()
-
-# MAD
-file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_mad.pdf")
-x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "MAD"])
-y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "MAD"])
-dx = density(x, from=0, to=max(x,y))
-dx$y = scale.(dx$y)
-dy = density(y, from=0, to=max(x,y))
-dy$y = scale.(dy$y)
-pdf(file=file_name, width=7, height=7)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2))
-polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2)
-polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2)
-legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "MAD", line = 4, cex = 1.5)
-mtext(side = 2, text = "Density", line = 4, cex = 1.5)
-dev.off()
-
-# IQR
-file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_iqr.pdf")
-x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "IQR"])
-y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "IQR"])
-dx = density(x, from=0, to=max(x,y))
-dx$y = scale.(dx$y)
-dy = density(y, from=0, to=max(x,y))
-dy$y = scale.(dy$y)
-pdf(file=file_name, width=7, height=7)
-par(mar = c(6.1, 6.5, 4.1, 1.1))
-plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2))
-polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2)
-polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2)
-legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1)
-axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15)
-axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15)
-mtext(side = 1, text = "IQR", line = 4, cex = 1.5)
-mtext(side = 2, text = "Density", line = 4, cex = 1.5)
-dev.off()
-
diff --git a/copy_number/cnvkitqc.mk b/copy_number/cnvkitqc.mk
deleted file mode 100644
index ab73e82f..00000000
--- a/copy_number/cnvkitqc.mk
+++ /dev/null
@@ -1,28 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/cnvkit_qc.$(NOW)
-PHONY += cnvkit cnvkit/qc
-
-CNVKIT_NORMAL ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnr/$(sample).cnr))
-CNVKIT_TUMOR ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr))
-CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn))
-CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn))
-CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn))
-CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn))
-
-cnvkit : cnvkit/qc/qc_ontarget.tsv cnvkit/qc/qc_offtarget.tsv cnvkit/qc/bin_qc_ontarget.tsv cnvkit/qc/bin_qc_offtarget.tsv
-
-cnvkit/qc/qc_ontarget.tsv : $(wildcard cnvkit/cnr/$(SAMPLES).cnr)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 8G -m 16G,"$(RSCRIPT) modules/copy_number/cnvkitqc.R --normal_files '$(CNVKIT_NORMAL)' --tumor_files '$(CNVKIT_TUMOR)' --out_file cnvkit/qc/qc_ontarget.tsv --option 1")
-	
-cnvkit/qc/qc_offtarget.tsv : $(wildcard cnvkit/cnr/$(SAMPLES).cnr)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 8G -m 16G,"$(RSCRIPT) modules/copy_number/cnvkitqc.R --normal_files '$(CNVKIT_NORMAL)' --tumor_files '$(CNVKIT_TUMOR)' --out_file cnvkit/qc/qc_offtarget.tsv --option 0")
-	
-cnvkit/qc/bin_qc_ontarget.tsv : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitbinqc.R --normal_files '$(CNVKIT_NORMAL_ON_TARGET)' --tumor_files '$(CNVKIT_TUMOR_ON_TARGET)' --out_file cnvkit/qc/bin_qc_ontarget.tsv")
-	
-cnvkit/qc/bin_qc_offtarget.tsv : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn)
-	$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitbinqc.R --normal_files '$(CNVKIT_NORMAL_OFF_TARGET)' --tumor_files '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file cnvkit/qc/bin_qc_offtarget.tsv")
-
-
-.PHONY: $(PHONY)
diff --git a/copy_number/cnvkitreference.mk b/copy_number/cnvkitreference.mk
deleted file mode 100644
index f4932a9f..00000000
--- a/copy_number/cnvkitreference.mk
+++ /dev/null
@@ -1,13 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_reference.$(NOW)
-PHONY += cnvkit cnvkit/reference
-
-cnvkit_reference : cnvkit/reference/combined_reference.cnr
-
-cnvkit/reference/combined_reference.cnr : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn)
-	$(call RUN,-n 1 -s 24G -m 32G,"cnvkit.py reference cnvkit/cnn/normal/*.cnn -f $(REF_FASTA) --no-edge -o cnvkit/reference/combined_reference.cnr")
-		
-.PHONY: $(PHONY)
-
diff --git a/copy_number/cnvkitsegment.mk b/copy_number/cnvkitsegment.mk
deleted file mode 100644
index 7c051d3d..00000000
--- a/copy_number/cnvkitsegment.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_segment.$(NOW)
-PHONY += cnvkit cnvkit/segmented cnvkit/totalcopy cnvkit/called
-
-cnvkit_segment : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).RData) $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).pdf) $(foreach sample,$(TUMOR_SAMPLES),cnvkit/called/$(sample).RData)
-
-define cnvkit-totalcopy
-cnvkit/segmented/%.pdf cnvkit/totalcopy/%.RData : cnvkit/cnr/%.cnr
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p cnvkit/segmented && \
-												 mkdir -p cnvkit/totalcopy && \
-												 $(RSCRIPT) modules/copy_number/cnvkit.R --type total-copy --sample_name $$(*)")
-												 
-cnvkit/called/%.RData : cnvkit/totalcopy/%.RData
-	$$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p cnvkit/called && \
-												 $(RSCRIPT) modules/copy_number/cnvkit.R --type call-cna --sample_name $$(*)")
-
-endef
- $(foreach sample,$(TUMOR_SAMPLES),\
-		$(eval $(call cnvkit-totalcopy,$(sample))))
-	
-.PHONY: $(PHONY)
diff --git a/copy_number/cnvkitsummary.R b/copy_number/cnvkitsummary.R
deleted file mode 100755
index cfa7bf5b..00000000
--- a/copy_number/cnvkitsummary.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-suppressPackageStartupMessages(library("GenomicRanges"))
-suppressPackageStartupMessages(library("plyr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("stringr"))
-suppressPackageStartupMessages(library("tidyr"))
-suppressPackageStartupMessages(library("magrittr"))
-suppressPackageStartupMessages(library("foreach"))
-suppressPackageStartupMessages(library("rtracklayer"))
-suppressPackageStartupMessages(library("grid"))
-suppressPackageStartupMessages(library("rlist"))
-
-optList <- list(
-				make_option("--sample_names", default = NULL, help = "list of sample names")
-			   )
-
-parser <- OptionParser(usage = "%prog [options] [facets files]", option_list = optList)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-sample_names = unlist(strsplit(opt$sample_names, split=" ", fixed=TRUE))
-genes = read.csv(file="~/share/reference/annotation_gene_lists/annotation_impact_468.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE) %>%
-		filter(Chromosome %in% as.character(c(1:22, "X", "Y"))) %>%
-		filter(!duplicated(Gene_Symbol)) %>%
-		arrange(as.integer(Chromosome), Start, End)
-
-genes_granges = genes %$%
-				GRanges(seqnames = Chromosome, ranges = IRanges(Start, End), Gene_Symbol = Gene_Symbol)
-mm = lapply(1:length(sample_names), function(i, sample_names, genes, genes_granges) {
-	cat(i, "of", length(sample_names), "\n")
-    load(paste0("cnvkit/called/", sample_names[i], ".RData"))
-	tmp[tmp[,"Chromosome"]==23,"Chromosome"] = "X"
-	tmp[tmp[,"Chromosome"]==24,"Chromosome"] = "Y"
-	tmp_granges = tmp %$% GRanges(seqnames = Chromosome, ranges = IRanges(Start, End))
-	mcols(tmp_granges) = tmp %>% select(Cat5)
-	fo = findOverlaps(tmp_granges, genes_granges)
-	x = mcols(genes_granges)[subjectHits(fo),]
-	y = mcols(tmp_granges)[queryHits(fo),]
-	df = data.frame("Gene_Symbol"=x, "Cat5"=y)
-	df = df %>%
-		 group_by(Gene_Symbol) %>%
-		 top_n(1, abs(Cat5))
-	z = as.numeric(df$Cat5)
-	names(z) = as.character(df$Gene_Symbol)
-	z = z[names(z) %in% genes[,1]]
-	res = rep(NA, nrow(genes))
-	names(res) = genes[,1]
-	res[names(z)] = z
-	return(res)
-}, sample_names, genes, genes_granges)
-bygene = do.call(cbind, mm)
-colnames(bygene) = sample_names
-bygene = cbind(genes, bygene) %>%
-	 	 arrange(as.integer(Chromosome), Start, End)
-
-save(bygene, file="cnvkit/summary/bygene.RData")
-write.table(bygene, file="cnvkit/summary/bygene.txt", sep="\t", col.names=TRUE, row.names=FALSE, na="", quote=FALSE)
diff --git a/copy_number/cnvkitsummary.mk b/copy_number/cnvkitsummary.mk
deleted file mode 100644
index 41aeeffb..00000000
--- a/copy_number/cnvkitsummary.mk
+++ /dev/null
@@ -1,13 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit_summary.$(NOW)
-PHONY += cnvkit cnvkit/summary
-
-cnvkit_summary : cnvkit/summary/bygene.txt
-
-cnvkit/summary/bygene.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/called/$(sample).RData)
-	$(call RUN,-c -s 24G -m 48G,"mkdir -p cnvkit/summary && \
-							 	 $(RSCRIPT) modules/copy_number/cnvkitsummary.R --sample_names '$(TUMOR_SAMPLES)'")
-												 
-.PHONY: $(PHONY)
diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk
new file mode 100644
index 00000000..dfd9dfa3
--- /dev/null
+++ b/copy_number/facets_suite.mk
@@ -0,0 +1,72 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/facets_suite.$(NOW)
+
+FACETS_MAX_DEPTH ?= 15000
+FACETS_CVAL ?= 50
+FACETS_PURITY_CVAL ?= 30
+FACETS_MIN_NHET ?= 15
+FACETS_PURITY_MIN_NHET ?= 10
+SNP_WINDOW_SIZE ?= 250
+NORMAL_DEPTH ?= 25
+
+facets_suite : facets_suite/targets_dbsnp.vcf \
+	       $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) \
+	       $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) \
+	       facets_suite/summary.txt
+
+facets_suite/targets_dbsnp.vcf : $(TARGETS_FILE)
+	$(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@
+	
+
+define snp-pileup
+facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/targets_dbsnp.vcf bam/$1.bam bam/$2.bam
+	$$(call RUN,-c -s 2G -m 4G -v $(FACETS_SUITE_ENV),"set -o pipefail && \
+							   snp-pileup-wrapper.R --verbose \
+							   -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \
+							   --vcf-file $$(<) \
+							   --tumor-bam $$(<<) \
+							   --normal-bam $$(<<<) \
+							   --output-prefix facets_suite/$1_$2/$1_$2 \
+							   --pseudo-snps NULL \
+							   --max-depth $$(FACETS_MAX_DEPTH)")
+	
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call snp-pileup,$(tumor.$(pair)),$(normal.$(pair)))))
+
+define run-facets
+facets_suite/$1_$2/taskcomplete : facets_suite/$1_$2/$1_$2.snp_pileup.gz
+	$$(call RUN,-c -s 4G -m 6G -v $(FACETS_SUITE_ENV),"set -o pipefail && \
+							   run-facets-wrapper.R --verbose \
+							   --counts-file $$(<) \
+							   --sample-id $1_$2 \
+							   --directory facets_suite/$1_$2/ \
+							   --everything \
+							   --genome hg19 \
+							   --cval $$(FACETS_CVAL) \
+							   --purity-cval $$(FACETS_PURITY_CVAL) \
+							   --min-nhet $$(FACETS_MIN_NHET) \
+							   --purity-min-nhet $$(FACETS_PURITY_MIN_NHET) \
+							   --snp-window-size $$(SNP_WINDOW_SIZE) \
+							   --normal-depth $$(NORMAL_DEPTH) \
+							   --seed 0 \
+							   --legacy-output True \
+							   --facets-lib-path /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/lib/R/library/ && \
+							   echo 'finished!' > $$(@)")
+	
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call run-facets,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+
+facets_suite/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete)
+	$(call RUN, -c -n 1 -s 24G -m 48G -v $(INNOVATION_ENV),"set -o pipefail && \
+								$(RSCRIPT) $(SCRIPTS_DIR)/facets_suite.R --option 1 --sample_pairs '$(SAMPLE_PAIRS)'")
+					  
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(FACETS_SUITE_ENV)/bin/R --version > version/facets_suite.txt)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: facets_suite
diff --git a/copy_number/genomealtered.R b/copy_number/genomealtered.R
deleted file mode 100644
index c2e10564..00000000
--- a/copy_number/genomealtered.R
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"),
-				  make_option("--file_out", default = NA, type = 'character', help = "output file name"))
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-load(opt$file_in)
-alpha = ifelse(is.na(fit$purity), 1, fit$purity)
-psi = ifelse(is.na(fit$ploidy), 2, fit$ploidy)
-gamma = 1
-x = fit$cncf[,"cnlr.median"]
-absolute_copies = round(((((2^(x/gamma))*(alpha*psi+(1-alpha)*2)) - ((1-alpha)*2))/alpha))
-index = absolute_copies!=round(psi)
-if (sum(index, na.rm=TRUE)!=0) {
-	genome_footprint = sum(as.numeric(fit$cncf[,"end"]-fit$cncf[,"start"]), na.rm=TRUE)
-	genome_altered = sum(as.numeric(fit$cncf[index,"end"]-fit$cncf[index,"start"]), na.rm=TRUE)/genome_footprint
-} else {
-	genome_altered = 0
-}
-cat(paste0(gsub("facets/cncf/","", gsub(".Rdata", "", opt$file_in)), "\t", genome_altered), file = opt$file_out, append=FALSE)
-cat("\n", file = opt$file_out, append=TRUE)
-
-warnings()
diff --git a/copy_number/genomealtered.mk b/copy_number/genomealtered.mk
deleted file mode 100644
index 66402738..00000000
--- a/copy_number/genomealtered.mk
+++ /dev/null
@@ -1,18 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/genome_altered.$(NOW)
-PHONY += genome_stats
-
-genome_altered : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga)
-
-define fraction-genome-altered
-genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata
-	$$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$< --file_out genome_stats/$1_$2.fga")
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
-
diff --git a/copy_number/lstscore.R b/copy_number/lstscore.R
deleted file mode 100644
index 517eaf5d..00000000
--- a/copy_number/lstscore.R
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"),
-				  make_option("--file_out", default = NA, type = 'character', help = "output file name"))
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-chromStrToNum <- function(str) {
-	suppressWarnings(cNum <- as.numeric(str))
-	if (is.na(cNum) && str == "X" ) { 
-		cNum <- 23
-	} else if (is.na(cNum) && str == "Y") {
-		cNum <- 24 
-	}
-	return(invisible(cNum))
-}
-
-GetChrominfo <- function() {
-  f <- "modules/copy_number/hg19_chrominfo.txt"
-  chrom <- read.table(file=f)
-  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
-  f <- "modules/copy_number/hg19_gaps.txt"
-  gaps <- read.table(file=f)
-  centro <- subset(gaps, gaps[,8] == "centromere")
-  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
-  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
-  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
-  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
-  chrominfo[,1] <- as.character(chrominfo[,1])
-  chrominfo$chr <- sub("chr", "", chrominfo$chr)
-  chrominfo$chr <- sub("X", "23", chrominfo$chr)
-  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
-  chrominfo[,1] <- as.numeric(chrominfo[,1])
-  chrominfo <- chrominfo[order(chrominfo$chr), ]  
-  rownames(chrominfo) <- as.character(chrominfo[,1])
-  chrominfo <- as.matrix(chrominfo)
-  return(invisible(chrominfo))
-}
-
-fix_facets_column_names <- function(dat) {
-	colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
-	colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
-	colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
-	colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
-	sz <- dat[,"endBP"] - dat[,"startBP"]
-	dat <- cbind(dat, size=sz)
-    nA <- dat[,"tcn.em"] - dat[,"nB"]
-    dat <- cbind(dat, nA=nA)
-	return(invisible(dat))
-}
-
-join_adjacent_segments <- function(dat) {
-	cur_segs <- dat
-	something_changed <- 1
-	while ( something_changed ) {
-		new_segs <- c()
-		something_changed <- 0
-		x <- 2
-		last_changed <- 0
-		while (x <= nrow(cur_segs)) {
-			last_changed <- 0
-			if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
-					(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
-					(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
-			) {
-				t <- cur_segs[x-1,]
-				t["endBP"] <- cur_segs[x,"endBP"]
-				t["end"] <- cur_segs[x,"end"]
-				t["size"] <- t["endBP"] - t["startBP"]
-				something_changed <- 1
-				new_segs <- rbind(t, new_segs)
-				x <- x+2
-				last_changed <- 1
-			} else {
-				new_segs <- rbind(cur_segs[x-1,], new_segs)
-				x<-x+1
-			}
-		}
-		if (! last_changed ) {
-			new_segs <- rbind(cur_segs[x-1,],new_segs)
-		}
-		n <- nrow(new_segs)
-		new_segs <- new_segs[n:1,]
-		cur_segs <- new_segs
-	}	
-	return(invisible(cur_segs))
-}
-
-fix_facet_segs <- function(dat) {
-    i <- which(is.na(dat$nB))
-    if ( length(i) > 0 )  {
-        dat <- dat[-i, ]
-    }
-    dat <- join_adjacent_segments(dat)
-    return(invisible(dat))
-}
-
-chrom_arm_LST_score <- function(dat) {
-	score <- 0
-	segs <- c()
-	SIZE_THRESH <- 10e6
-	SPACE_THRESH <- 3e6
-	if ( nrow(dat) >= 2 ) {
-		for (x in 2:nrow(dat)) {
-			if ( 	(dat[x-1,"size"] >= SIZE_THRESH) && 
-					(dat[x,"size"] >= SIZE_THRESH) &&
-					( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH)
-			) {
-				score <- score +1
-				segs <- rbind(dat[x-1,], segs)
-			}
-		}
-	}
-	tmp <- list()
-	tmp$score <- score
-	tmp$segs <- segs
-	return(invisible(tmp))
-}
-
-lst_filter <- function(dat, size_thresh) {
-	i <- which(dat[,"size"] < size_thresh)
-	sz <- dat[i,"size"]
-	i <- i[order(sz)]
-	segs_removed <- 0
-	while (length(i) > 0) {
-		dat <- dat[-i[1], ]
-		dat <- join_adjacent_segments(dat)
-		i<- which(dat[,"size"] < size_thresh)
-		sz <- dat[i,"size"]
-		i <- i[order(sz)]	
-		segs_removed <- segs_removed + 1
-	}
-	return(invisible(dat))
-}
-
-score_LST <- function(dat, chromInfo) {
-	score <- 0
-	segs <- c()
-	dat <- lst_filter(dat, 3e6)
-	for (c in unique(dat[,"chromosome"]) ) {
-		i <- which(dat[,"chromosome"] == c)
-		csegs <- dat[i,]
-		cNum <- chromStrToNum(c)
-		i <- which(csegs[,"startBP"] <= chromInfo[cNum,"centstart"])
-		parm <- csegs[i,]
-		tmp <- chrom_arm_LST_score(parm)
-		score <- score + tmp$score
-		segs <- rbind(tmp$segs, segs)
-		i <- which(csegs[,"endBP"] >= chromInfo[cNum,"centend"])
-		qarm <- csegs[i,]
-		tmp <- chrom_arm_LST_score(qarm)
-		score <- score + tmp$score
-		segs <- rbind(tmp$segs, segs)
-	}
-	tmp <- list()
-	tmp$score <- score
-	tmp$segs <- segs
-	return(invisible(tmp))
-}
-
-dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
-dat = fix_facets_column_names(dat)
-segs = fix_facet_segs(dat)
-chromInfo = GetChrominfo()
-lst = score_LST(segs, chromInfo)
-cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", lst$score), file = opt$file_out, append=FALSE)
-cat("\n", file = opt$file_out, append=TRUE)
-
-warnings()
-
diff --git a/copy_number/lstscore.mk b/copy_number/lstscore.mk
deleted file mode 100644
index b8664c7d..00000000
--- a/copy_number/lstscore.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/lst_score.$(NOW)
-PHONY += genome_stats
-
-lst_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst)
-
-define lst-score
-genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt
-	$$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out genome_stats/$1_$2.lst")
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk
new file mode 100644
index 00000000..faa0c457
--- /dev/null
+++ b/copy_number/medicc2.mk
@@ -0,0 +1,74 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/medicc2.$(NOW)
+
+medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) \
+	 $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).txt) \
+	 $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).tsv) \
+	 $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set)_summary.tsv)
+
+define collect-copy-number
+medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata
+	$$(call RUN,-c -n 1 -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \
+							  $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \
+							  --option 1 \
+							  --tumor_sample_name $1 \
+							  --normal_sample_name $2 \
+							  --file_in $$(<) \
+							  --file_out $$(@)")
+	
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call collect-copy-number,$(tumor.$(pair)),$(normal.$(pair)))))
+
+
+define aggregate-copy-number
+medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt)
+	$$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \
+							  $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \
+							  --option 2 \
+							  --tumor_sample_name '$(tumors.$1)' \
+							  --normal_sample_name '$(normal.$1)' \
+							  --file_out $$(@)")
+
+medicc2/$1/$1.tsv : medicc2/$1/$1.txt
+	$$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \
+							  $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \
+							  --option 3 \
+							  --tumor_sample_name '$(tumors.$1)' \
+							  --normal_sample_name '$(normal.$1)' \
+							  --file_in $$(<) \
+							  --file_out $$(@)")
+
+endef
+$(foreach set,$(SAMPLE_SETS),\
+		$(eval $(call aggregate-copy-number,$(set))))
+		
+		
+define r-medicc2
+medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv
+	$$(call RUN,-c -n 4 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \
+							  $$(MEDICC) \
+							  $$(<) \
+							  medicc2/$1/ \
+							  --input-type tsv \
+							  --normal-name diploid \
+							  --total-copy-numbers \
+							  --input-allele-columns 'nAB' \
+							  --plot both \
+							  --maxcn 8 \
+							  --bootstrap-method 'segment-wise' \
+							  --bootstrap-nr 100 \
+							  --n-cores 4")
+
+endef
+$(foreach set,$(SAMPLE_SETS),\
+		$(eval $(call r-medicc2,$(set))))
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(MEDICC_ENV)/bin/R --version > version/medicc2.txt; \
+	     $(MEDICC_ENV)/bin/medicc2 --help >> version/medicc2.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: medicc
diff --git a/copy_number/myriadhrdscore.R b/copy_number/myriadhrdscore.R
deleted file mode 100644
index 392fa195..00000000
--- a/copy_number/myriadhrdscore.R
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"),
-				  make_option("--file_out", default = NA, type = 'character', help = "output file name"))
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-chromStrToNum <- function(str) {
-	suppressWarnings(cNum <- as.numeric(str))
-	if (is.na(cNum) && str == "X" ) { 
-		cNum <- 23
-	} else if (is.na(cNum) && str == "Y") {
-		cNum <- 24 
-	}
-	return(invisible(cNum))
-}
-
-GetChrominfo <- function() {
-  f <- "modules/copy_number/hg19_chrominfo.txt"
-  chrom <- read.table(file=f)
-  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
-  f <- "modules/copy_number/hg19_gaps.txt"
-  gaps <- read.table(file=f)
-  centro <- subset(gaps, gaps[,8] == "centromere")
-  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
-  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
-  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
-  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
-  chrominfo[,1] <- as.character(chrominfo[,1])
-  chrominfo$chr <- sub("chr", "", chrominfo$chr)
-  chrominfo$chr <- sub("X", "23", chrominfo$chr)
-  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
-  chrominfo[,1] <- as.numeric(chrominfo[,1])
-  chrominfo <- chrominfo[order(chrominfo$chr), ]  
-  rownames(chrominfo) <- as.character(chrominfo[,1])
-  chrominfo <- as.matrix(chrominfo)
-  return(invisible(chrominfo))
-}
-
-fix_facets_column_names <- function(dat) {
-	colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
-	colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
-	colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
-	colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
-	sz <- dat[,"endBP"] - dat[,"startBP"]
-	dat <- cbind(dat, size=sz)
-    nA <- dat[,"tcn.em"] - dat[,"nB"]
-    dat <- cbind(dat, nA=nA)
-	return(invisible(dat))
-}
-
-join_adjacent_segments <- function(dat) {
-	cur_segs <- dat
-	something_changed <- 1
-	while ( something_changed ) {
-		new_segs <- c()
-		something_changed <- 0
-		x <- 2
-		last_changed <- 0
-		while (x <= nrow(cur_segs)) {
-			last_changed <- 0
-			if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
-					(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
-					(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
-			) {
-				t <- cur_segs[x-1,]
-				t["endBP"] <- cur_segs[x,"endBP"]
-				t["end"] <- cur_segs[x,"end"]
-				t["size"] <- t["endBP"] - t["startBP"]
-				something_changed <- 1
-				new_segs <- rbind(t, new_segs)
-				x <- x+2
-				last_changed <- 1
-			} else {
-				new_segs <- rbind(cur_segs[x-1,], new_segs)
-				x<-x+1
-			}
-		}
-		if (! last_changed ) {
-			new_segs <- rbind(cur_segs[x-1,],new_segs)
-		}
-		n <- nrow(new_segs)
-		new_segs <- new_segs[n:1,]
-		cur_segs <- new_segs
-	}	
-	return(invisible(cur_segs))
-}
-
-fix_facet_segs <- function(dat) {
-    i <- which(is.na(dat$nB))
-    if ( length(i) > 0 )  {
-        dat <- dat[-i, ]
-    }
-    dat <- join_adjacent_segments(dat)
-    return(invisible(dat))
-}
-
-chrom_arm_LST_score <- function(dat) {
-	score <- 0
-	segs <- c()
-	SIZE_THRESH <- 10e6
-	SPACE_THRESH <- 3e6
-	if ( nrow(dat) >= 2 ) {
-		for (x in 2:nrow(dat)) {
-			if ( 	(dat[x-1,"size"] >= SIZE_THRESH) && 
-					(dat[x,"size"] >= SIZE_THRESH) &&
-					( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH)
-			) {
-				score <- score +1
-				segs <- rbind(dat[x-1,], segs)
-			}
-		}
-	}
-	tmp <- list()
-	tmp$score <- score
-	tmp$segs <- segs
-	return(invisible(tmp))
-}
-
-lst_filter <- function(dat, size_thresh) {
-	i <- which(dat[,"size"] < size_thresh)
-	sz <- dat[i,"size"]
-	i <- i[order(sz)]
-	segs_removed <- 0
-	while (length(i) > 0) {
-		dat <- dat[-i[1], ]
-		dat <- join_adjacent_segments(dat)
-		i<- which(dat[,"size"] < size_thresh)
-		sz <- dat[i,"size"]
-		i <- i[order(sz)]	
-		segs_removed <- segs_removed + 1
-	}
-	return(invisible(dat))
-}
-
-score_myriad_HRD <- function(dat, thresh=15e6) {
-	chrDel <- NULL
-	hrdSegs <- NULL
-  	hrd_score <- 0
-	chrList <- unique(dat[,"chromosome"])
-	for (x in chrList) {
-		index <- which(dat[,"chromosome"] == x)
-      		totalnB <- sum(dat[index,"nB"], na.rm=TRUE)
-		if (totalnB == 0) {
-			chrDel <- c(x, chrDel)
-		}
-	}
-	for (x in 1:nrow(dat)) {
-      		if ( dat[x,"chromosome"] %in% chrDel ) {
-			next
-		}
-		if ( dat[x,"nB"] != 0 ) {
-			next
-		}
-		if (dat[x,"size"] < thresh) {
-			next
-		}
-		hrd_score <- hrd_score + 1
-		hrdSegs <- rbind(dat[x,], hrdSegs)
-	}
-	tmp <- list()
-	tmp$score = hrd_score
-	tmp$segs = hrdSegs
-	return(invisible(tmp))
-}
-
-
-dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
-dat = fix_facets_column_names(dat)
-segs = fix_facet_segs(dat)
-chromInfo = GetChrominfo()
-mrs = score_myriad_HRD(segs)
-cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", mrs$score), file = opt$file_out, append=FALSE)
-cat("\n", file = opt$file_out, append=TRUE)
-
-warnings()
diff --git a/copy_number/myriadhrdscore.mk b/copy_number/myriadhrdscore.mk
deleted file mode 100644
index 8d619938..00000000
--- a/copy_number/myriadhrdscore.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/myriad_score.$(NOW)
-PHONY += genome_stats
-
-myriad_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs)
-
-define myriad-score
-genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt
-	$$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out genome_stats/$1_$2.mrs")
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/copy_number/ntaiscore.R b/copy_number/ntaiscore.R
deleted file mode 100644
index bb35c010..00000000
--- a/copy_number/ntaiscore.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"),
-				  make_option("--file_out", default = NA, type = 'character', help = "output file name"))
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-chromStrToNum <- function(str) {
-	suppressWarnings(cNum <- as.numeric(str))
-	if (is.na(cNum) && str == "X" ) { 
-		cNum <- 23
-	} else if (is.na(cNum) && str == "Y") {
-		cNum <- 24 
-	}
-	return(invisible(cNum))
-}
-
-GetChrominfo <- function() {
-  f <- "modules/copy_number/hg19_chrominfo.txt"
-  chrom <- read.table(file=f)
-  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
-  f <- "modules/copy_number/hg19_gaps.txt"
-  gaps <- read.table(file=f)
-  centro <- subset(gaps, gaps[,8] == "centromere")
-  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
-  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
-  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
-  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
-  chrominfo[,1] <- as.character(chrominfo[,1])
-  chrominfo$chr <- sub("chr", "", chrominfo$chr)
-  chrominfo$chr <- sub("X", "23", chrominfo$chr)
-  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
-  chrominfo[,1] <- as.numeric(chrominfo[,1])
-  chrominfo <- chrominfo[order(chrominfo$chr), ]  
-  rownames(chrominfo) <- as.character(chrominfo[,1])
-  chrominfo <- as.matrix(chrominfo)
-  return(invisible(chrominfo))
-}
-
-fix_facets_column_names <- function(dat) {
-	colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
-	colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
-	colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
-	colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
-	sz <- dat[,"endBP"] - dat[,"startBP"]
-	dat <- cbind(dat, size=sz)
-    nA <- dat[,"tcn.em"] - dat[,"nB"]
-    dat <- cbind(dat, nA=nA)
-	return(invisible(dat))
-}
-
-join_adjacent_segments <- function(dat) {
-	cur_segs <- dat
-	something_changed <- 1
-	while ( something_changed ) {
-		new_segs <- c()
-		something_changed <- 0
-		x <- 2
-		last_changed <- 0
-		while (x <= nrow(cur_segs)) {
-			last_changed <- 0
-			if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
-					(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
-					(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
-			) {
-				t <- cur_segs[x-1,]
-				t["endBP"] <- cur_segs[x,"endBP"]
-				t["end"] <- cur_segs[x,"end"]
-				t["size"] <- t["endBP"] - t["startBP"]
-				something_changed <- 1
-				new_segs <- rbind(t, new_segs)
-				x <- x+2
-				last_changed <- 1
-			} else {
-				new_segs <- rbind(cur_segs[x-1,], new_segs)
-				x<-x+1
-			}
-		}
-		if (! last_changed ) {
-			new_segs <- rbind(cur_segs[x-1,],new_segs)
-		}
-		n <- nrow(new_segs)
-		new_segs <- new_segs[n:1,]
-		cur_segs <- new_segs
-	}	
-	return(invisible(cur_segs))
-}
-
-fix_facet_segs <- function(dat) {
-    i <- which(is.na(dat$nB))
-    if ( length(i) > 0 )  {
-        dat <- dat[-i, ]
-    }
-    dat <- join_adjacent_segments(dat)
-    return(invisible(dat))
-}
-
-score_ntAI <- function(dat, chromInfo, min_size=1000, shrink=FALSE) {
-	index <- dat[,"chromosome"] %in% c("MT", "Y", "24")
-	dat <- dat[!index,]
-	index <- dat[,"size"] < min_size
-	dat <- dat[!index,]
-	if (shrink) {
-		dat <- join_adjacent_segments(dat)
-	}
-	chrList <- unique(dat[,"chromosome"])
-	ntAI_score <- 0
-	ntAI_segs <- NULL
-	for (x in chrList) {
-		index <- dat[,"chromosome"] == x
-		chr_segs <- dat[index,]
-		cNum <- chromStrToNum(x)
-		if (nrow(chr_segs) < 2 ) {
-			next
-		}
-		if ( (chr_segs[1,"nA"] != chr_segs[1,"nB"]) && (chromInfo[cNum,"centstart"] > chr_segs[1,"endBP"]) ) {
-			ntAI_score <- ntAI_score+1
-			ntAI_segs <- rbind(chr_segs[1,],ntAI_segs)
-		}
-		eSeg <- nrow(chr_segs)
-		if ( (chr_segs[eSeg, "nA"] != chr_segs[eSeg, "nB"]) && (chr_segs[eSeg,"startBP"] > chromInfo[cNum,"centend"]) ) {
-			ntAI_score <- ntAI_score+1
-			ntAI_segs <- rbind(chr_segs[eSeg,],ntAI_segs)
-		}
-	}
-	tmp <- list()
-	tmp$segs <- ntAI_segs
-	tmp$score <- ntAI_score
-	return(invisible(tmp))
-}
-
-dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
-dat = fix_facets_column_names(dat)
-segs = fix_facet_segs(dat)
-chromInfo = GetChrominfo()
-ntai = score_ntAI(segs, chromInfo)
-cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", ntai$score), file = opt$file_out, append=FALSE)
-cat("\n", file = opt$file_out, append=TRUE)
-
-warnings()
diff --git a/copy_number/ntaiscore.mk b/copy_number/ntaiscore.mk
deleted file mode 100644
index 2f8d751a..00000000
--- a/copy_number/ntaiscore.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/ntai_score.$(NOW)
-PHONY += genome_stats
-
-ntai_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai)
-
-define ntai-score
-genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt
-	$$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out genome_stats/$1_$2.ntai")
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/copy_number/plotFacets.R b/copy_number/plotFacets.R
index 5b3c848a..a21d116e 100644
--- a/copy_number/plotFacets.R
+++ b/copy_number/plotFacets.R
@@ -26,6 +26,8 @@ parser <- OptionParser(usage = "%prog [options] [facets Rdata file]", option_lis
 arguments <- parse_args(parser, positional_arguments = T)
 opt <- arguments$options
 
+OLD_STYLE = TRUE
+
 if (length(arguments$args) < 1) {
     cat("Need facets Rdata file\n")
     print_help(parser)
@@ -66,7 +68,11 @@ normalName <- facetsFile %>%
 			  sub('\\..*', '', .)
 
 pdf(file = str_c(opt$outPrefix, ".pdf"), width=10, height=4.25)
-plot_log2_(x=out2, y=fit, purity=fit$purity, ploidy=fit$ploidy, title = gsub("facets/plots/log2/", "", opt$outPrefix, fixed=TRUE))
+if (OLD_STYLE) {
+	plot_sample_lrr_(x=out2, fit=fit)
+} else {
+	plot_log2_(x=out2, y=fit, purity=fit$purity, ploidy=fit$ploidy, title = gsub("facets/plots/log2/", "", opt$outPrefix, fixed=TRUE))
+}
 dev.off()
 
 pdf(file = str_c(gsub("log2", "cncf", opt$outPrefix, fixed=TRUE), ".pdf"), width=10, height=7)
diff --git a/default_yaml/project_config.yaml b/default_yaml/project_config.yaml
index be2b012e..3be1e9e6 100644
--- a/default_yaml/project_config.yaml
+++ b/default_yaml/project_config.yaml
@@ -32,13 +32,6 @@ ann_pathogen: true
 # target panels
 targets_file: ~/share/reference/target_panels/
 
-# cnvkit default target panels
-# ontarget_file: ~/share/reference/target_panels/
-# offtarget_file: ~/share/reference/target_panels/
-
-# whole exome sequencing
-# exome: false
-
 # gatk options
 gatk_hard_filter_snps: true
 gatk_pool_snp_recal: false
diff --git a/qc/bamIntervalMetrics.mk b/qc/bamIntervalMetrics.mk
deleted file mode 100644
index 88930e8a..00000000
--- a/qc/bamIntervalMetrics.mk
+++ /dev/null
@@ -1,102 +0,0 @@
-# generate bam interval metrics per sample
-
-#NO_RM := true
-
-include modules/Makefile.inc
-include modules/variant_callers/gatk.inc
-# picard format intervals file, needs requires sam format header
-
-VPATH ?= bam
-
-LOGDIR ?= log/metrics.$(NOW)
-
-PLOT_HS_METRICS = $(RSCRIPT) modules/qc/plotHsMetrics.R
-NON_REF_FREQ = $(PERL) modules/qc/nonRefFreqFromPileup.pl
-NON_REF_FREQ_BIN_SIZE = 0.01
-
-SUMMARIZE_HS_METRICS = python modules/qc/summarize_hs_metrics.py
-SUMMARIZE_IDXSTATS = python modules/qc/summarize_idxstats.py
-
-.DELETE_ON_ERROR:
-
-.SECONDARY: 
-
-.PHONY: bam_interval_metrics hs_metrics amplicon_metrics interval_report #non_ref_metrics insert_size_metrics idxstats
-
-bam_interval_metrics : hs_metrics interval_report #non_ref_metrics idxstats
-
-#non_ref_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).interval_nonref_freq.tsv)
-
-hs_metrics : metrics/hs_metrics.tsv metrics/interval_hs_metrics.tsv metrics/hs_metrics.summary.tsv
-
-amplicon_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).amplicon_metrics.tsv)
-
-interval_report : metrics/interval_report/interval_report.timestamp
-
-#insert_size_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_size_metrics.tsv)
-
-#idxstats : metrics/idxstats_summary.tsv $(foreach sample,$(SAMPLES),metrics/$(sample).idxstats)
-
-# interval metrics per sample
-metrics/%.hs_metrics.tsv metrics/%.interval_hs_metrics.tsv : bam/%.bam bam/%.bam.bai
-	$(call RUN,-s 10G -m 20G,"TMP=`mktemp`.intervals; \
-	$(SAMTOOLS) view -H $< | grep '^@SQ' > \$$TMP &&  grep -P \"\t\" $(TARGETS_FILE) | awk 'BEGIN {OFS = \"\t\"} { print \$$1$(,)\$$2+1$(,)\$$3$(,)\"+\"$(,)NR }' >> \$$TMP; \
-	$(CALC_HS_METRICS) INPUT=$< OUTPUT=metrics/$*.hs_metrics.tsv METRIC_ACCUMULATION_LEVEL=ALL_READS REFERENCE_SEQUENCE=$(REF_FASTA) PER_TARGET_COVERAGE=metrics/$*.interval_hs_metrics.tsv TARGET_INTERVALS=\$$TMP BAIT_SET_NAME=hs BAIT_INTERVALS=\$$TMP")
-
-# not sure how this differs from above, see picard doc
-metrics/%.amplicon_metrics.tsv metrics/%.interval_amplicon_metrics.tsv : bam/%.bam bam/%.bam.bai
-	$(call RUN,-s 10G -m 20G,"TMP=`mktemp`.intervals; \
-	$(SAMTOOLS) view -H $< | grep '^@SQ' > \$$TMP && grep -P \"\t\"  $(TARGETS_FILE) | awk 'BEGIN {OFS = \"\t\"} { print \$$1$(,)\$$2+1$(,)\$$3$(,)\"+\"$(,)NR }' >> \$$TMP; \
-	$(COLLECT_TARGETED_METRICS) INPUT=$< REFERENCE_SEQUENCE=$(REF_FASTA) OUTPUT=$@ AMPLICON_INTERVALS=\$$TMP TARGET_INTERVALS=\$$TMP METRIC_ACCUMULATION_LEVEL=ALL_READS PER_TARGET_COVERAGE=metrics/$*.interval_amplicon_metrics.tsv")
-
-# summarize interval metrics into one file
-metrics/interval_hs_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).interval_hs_metrics.tsv)
-	$(INIT) \
-	sed '/^#/d; /^$$/d' $< | cut -f 1-6 > $@.tmp; \
-	for metrics in $^; do \
-		samplename=$$(basename $${metrics%%.interval_hs_metrics.tsv}); \
-		sed '/^#/d; /^$$/d' $$metrics | cut -f 7,8 | sed "s/mean_coverage/$${samplename}_mean_coverage/; s/normalized_coverage/$${samplename}_normalized_coverage/" | paste $@.tmp - > $@; \
-		cp $@ $@.tmp; \
-	done; \
-	rm -f $@.tmp
-
-metrics/hs_metrics.summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.tsv)
-	$(INIT) $(SUMMARIZE_HS_METRICS) --excel_file $(@:.tsv=.xlsx) --project_name $(PROJECT_NAME) $^ > $@ 2> $(LOG)
-
-metrics/hs_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.tsv)
-	$(INIT) \
-		{ \
-		sed '/^$$/d; /^#/d; s/SAMPLE.*//; s/BAIT_SET/SAMPLE/; s/\s$$//' $< | head -1; \
-		for metrics in $^; do \
-			samplename=$$(basename $${metrics%%.hs_metrics.tsv}); \
-			sed "/^#/d; /^BAIT/d; /^\$$/d; s/^hs/$$samplename/; s/\t\+$$//" $$metrics; \
-		done; \
-		} > $@
-
-metrics/interval_report/interval_report.timestamp : metrics/hs_metrics.tsv
-	$(call RUN,-s 7G -m 10G,"$(PLOT_HS_METRICS) --outDir $(@D) $< && touch $@")
-
-#metrics/%.interval_nonref_freq.tsv : bam/%.bam
-#	$(call RUN,-s 8G -m 10G,"$(SAMTOOLS) mpileup -l $(TARGETS_FILE) -f $(REF_FASTA) $< | $(NON_REF_FREQ) -b $(NON_REF_FREQ_BIN_SIZE) > $@")
-
-#metrics/%.insert_size_metrics.tsv : bam/%.bam
-#	$(call RUN,-s 8G -m 10G,"$(call PICARD,CollectInsertSizeMetrics,8G) INPUT=$< OUTPUT=$@ \
-#		REFERENCE_SEQUENCE=$(REF_FASTA) HISTOGRAM_FILE=$(@:.tsv=.pdf)")
-
-#metrics/insert_size_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_size_metrics.tsv)
-#	$(INIT) \
-#		{ \
-#		sed '/^$$/d; /^#/d; s/SAMPLE.*//; s/\s$$//; s/^/SAMPLE\t/' $< | head -1; \
-#		for metrics in $^; do \
-#			samplename=$$(basename $${metrics%%.insert_size_metrics.tsv}); \
-#			grep -A1 '^MEDIAN_INSERT_SIZE' $$metrics | sed "1d; s/^/$$samplename\t/; s/\t\+$$//";  \
-#		done; \
-#		} > $@
-
-#metrics/%.idxstats : bam/%.bam bam/%.bam.bai
-#	$(call RUN,,"samtools idxstats $< > $@")
-
-#metrics/idxstats_summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).idxstats)
-#	$(INIT) $(SUMMARIZE_IDXSTATS) --excel_file $(@:.tsv=.xlsx) --project_name $(PROJECT_NAME) --targets_file $(TARGETS_FILE) $^ > $@ 2> $(LOG)
-
-include modules/bam_tools/processBam.mk
diff --git a/qc/bamMetrics.mk b/qc/bamMetrics.mk
deleted file mode 100644
index be2f4fa3..00000000
--- a/qc/bamMetrics.mk
+++ /dev/null
@@ -1,50 +0,0 @@
-include modules/Makefile.inc
-include modules/variant_callers/gatk.inc
-
-LOGDIR ?= log/bam_metrics.$(NOW)
-PHONY += metrics
-
-COLLECT_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_DIR)/CollectMultipleMetrics.jar VALIDATION_STRINGENCY=LENIENT
-COLLECT_WGS_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_JAR) CollectWgsMetrics VALIDATION_STRINGENCY=LENIENT
-COLLECT_GC_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_DIR)/CollectGcBiasMetrics.jar VALIDATION_STRINGENCY=LENIENT
-
-SUMMARIZE_IDXSTATS = python modules/qc/summarize_idxstats.py
-
-bam_metrics : summary_metrics gc flagstats wgs_metrics
-
-PHONY += flagstats
-flagstats : $(foreach sample,$(SAMPLES),metrics/$(sample).flagstats)
-PHONY += summary_metrics
-summary_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).alignment_summary_metrics)
-PHONY += wgs_metrics
-wgs_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics) metrics/wgs_metrics_summary.tsv
-PHONY += dup
-dup : $(foreach sample,$(SAMPLES),metrics/$(sample).dup_metrics)
-PHONY += gc
-gc : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_bias_metrics)
-
-metrics/%.alignment_summary_metrics : bam/%.bam
-	$(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_METRICS) I=$< O=metrics/$(*).alignment_summary_metrics REFERENCE_SEQUENCE=$(REF_FASTA)")
-
-metrics/wgs_metrics_summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics)
-	$(INIT) (grep GENOME_TERRITORY $< | sed 's/^/SAMPLE\t/'; for x in $(SAMPLES); do grep -A1 GENOME_TERRITORY metrics/$$x.wgs_metrics | sed 1d | sed "s/^/$$x\t/" ; done) > $@
-
-metrics/%.wgs_metrics : bam/%.bam
-	$(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_WGS_METRICS) I=$< O=$@ REFERENCE_SEQUENCE=$(REF_FASTA)")
-
-metrics/%.gc_bias_metrics : bam/%.bam
-	$(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_GC_METRICS) I=$< O=$@ CHART_OUTPUT=$(addsuffix .pdf,$@) REFERENCE_SEQUENCE=$(REF_FASTA)")
-
-metrics/%.flagstats : bam/%.bam
-	$(call RUN,-s 18G -m 24G -w 7200,"$(SAMTOOLS) flagstat $< > $@")
-	
-bam/%.markdup.bam metrics/%.dup_metrics : bam/%.bam
-	$(call RUN,-s 18G -m 24G -w 7200,"$(MARK_DUP) I=$< O=bam/$*.markdup.bam METRICS_FILE=metrics/$*.dup_metrics")
-
-metrics/dup_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dup_metrics.txt)
-	$(INIT) grep '^LIBRARY' $< > $@ && \
-	for metrics in $^; do \
-	    grep -A1 '^LIBRARY' $$metrics | sed '1d' >> $@; \
-	done
-
-.PHONY: $(PHONY)
diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk
new file mode 100644
index 00000000..b7d049ec
--- /dev/null
+++ b/qc/bam_interval_metrics.mk
@@ -0,0 +1,147 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/bam_interval_metrics.$(NOW)
+
+bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) \
+	      summary/idx_metrics.txt \
+	      summary/aln_metrics.txt \
+	      summary/insert_metrics.txt \
+	      summary/oxog_metrics.txt \
+	      summary/hs_metrics.txt \
+	      summary/gc_metrics.txt \
+	      summary/gc_summary.txt
+
+PICARD = picard
+PICARD_MEM = 16G
+PICARD_OPTS = VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=4000000 TMP_DIR=$(TMPDIR)
+CALC_HS_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectHsMetrics $(PICARD_OPTS)
+COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryMetrics $(PICAD_OPTS)
+COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICAD_OPTS)
+COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OPTS)
+COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICAD_OPTS)
+BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICAD_OPTS)
+
+BAITS_LIST = $(HOME)/share/lib/bed_files/targets/IMPACT505/b37/IMPACT505_b37_baits.list
+TARGETS_LIST ?= $(HOME)/share/lib/bed_files/targets/IMPACT505/b37/IMPACT505_b37_targets.list
+	      
+define idx-metrics
+metrics/$1.idx_stats.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(BAM_INDEX) \
+								 INPUT=$$(<) \
+								 > $$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call idx-metrics,$(sample))))
+					    
+define aln-metrics
+metrics/$1.aln_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(COLLECT_ALIGNMENT_METRICS) \
+								 REFERENCE_SEQUENCE=$$(REF_FASTA) \
+								 INPUT=$$(<) \
+								 OUTPUT=$$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call aln-metrics,$(sample))))
+
+define insert-metrics
+metrics/$1.insert_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(COLLECT_INSERT_METRICS) \
+								 INPUT=$$(<) \
+								 OUTPUT=$$(@) \
+								 HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \
+								 MINIMUM_PCT=0.5")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call insert-metrics,$(sample))))
+
+define oxog-metrics
+metrics/$1.oxog_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(COLLECT_OXOG_METRICS) \
+								 REFERENCE_SEQUENCE=$$(REF_FASTA) \
+								 INPUT=$$(<) \
+								 OUTPUT=$$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call oxog-metrics,$(sample))))
+
+define hs-metrics
+metrics/$1.hs_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(CALC_HS_METRICS) \
+								 REFERENCE_SEQUENCE=$$(REF_FASTA) \
+								 INPUT=$$(<) \
+								 OUTPUT=$$(@) \
+								 BAIT_INTERVALS=$$(BAITS_LIST) \
+								 TARGET_INTERVALS=$$(TARGETS_LIST)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call hs-metrics,$(sample))))
+
+define gc-metrics
+metrics/$1.gc_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								 $$(COLLECT_GC_BIAS) \
+								 INPUT=$$(<) \
+								 OUTPUT=metrics/$1.gc_bias.txt \
+								 CHART_OUTPUT=metrics/$1.gc_metrics.pdf \
+								 REFERENCE_SEQUENCE=$$(REF_FASTA) \
+								 SUMMARY_OUTPUT=$$(@)")
+					   
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call gc-metrics,$(sample))))
+		
+summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'")
+
+summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'")
+
+summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'")
+
+summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'")
+
+summary/hs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'")
+
+summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'")
+					  
+summary/gc_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt)
+	$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \
+								$(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'")
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     echo "picard" >> version/bam_interval_metrics.txt; \
+	     $(PICARD) CollectAlignmentSummaryMetrics --version &>> version/bam_interval_metrics.txt; \
+	     $(PICARD) CollectInsertSizeMetrics --version &>> version/bam_interval_metrics.txt; \
+	     $(PICARD) CollectOxoGMetrics --version &>> version/bam_interval_metrics.txt; \
+	     $(PICARD) CollectHsMetrics --version &>> version/bam_interval_metrics.txt; \
+	     $(PICARD) CollectGcBiasMetrics --version &>> version/bam_interval_metrics.txt; \
+             R --version >> version/bam_interval_metrics.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: bam_metrics
diff --git a/qc/bam_metrics.mk b/qc/bam_metrics.mk
new file mode 100644
index 00000000..00377c43
--- /dev/null
+++ b/qc/bam_metrics.mk
@@ -0,0 +1,136 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/bam_metrics.$(NOW)
+
+bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) \
+	      summary/idx_metrics.txt \
+	      summary/aln_metrics.txt \
+	      summary/insert_metrics.txt \
+	      summary/oxog_metrics.txt \
+	      summary/hs_metrics.txt \
+	      summary/gc_metrics.txt \
+	      summary/gc_summary.txt
+	      
+TARGETS_LIST ?= $(HOME)/share/lib/resource_files/MSK-IMPACT-v4.sorted.list
+	      
+define idx-metrics
+metrics/$1.idx_stats.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(BAM_INDEX) \
+					    INPUT=$$(<) \
+					    > $$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call idx-metrics,$(sample))))
+					    
+define aln-metrics
+metrics/$1.aln_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(COLLECT_ALIGNMENT_METRICS) \
+					    REFERENCE_SEQUENCE=$$(REF_FASTA) \
+					    INPUT=$$(<) \
+					    OUTPUT=$$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call aln-metrics,$(sample))))
+
+define insert-metrics
+metrics/$1.insert_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(COLLECT_INSERT_METRICS) \
+					    INPUT=$$(<) \
+					    OUTPUT=$$(@) \
+					    HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \
+					    MINIMUM_PCT=0.5")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call insert-metrics,$(sample))))
+
+define oxog-metrics
+metrics/$1.oxog_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(COLLECT_OXOG_METRICS) \
+					    REFERENCE_SEQUENCE=$$(REF_FASTA) \
+					    INPUT=$$(<) \
+					    OUTPUT=$$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call oxog-metrics,$(sample))))
+
+define hs-metrics
+metrics/$1.hs_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(CALC_HS_METRICS) \
+					    REFERENCE_SEQUENCE=$$(REF_FASTA) \
+					    INPUT=$$(<) \
+					    OUTPUT=$$(@) \
+					    BAIT_INTERVALS=$$(TARGETS_LIST) \
+					    TARGET_INTERVALS=$$(TARGETS_LIST)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call hs-metrics,$(sample))))
+
+define gc-metrics
+metrics/$1.gc_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					    $$(COLLECT_GC_BIAS) \
+					    INPUT=$$(<) \
+					    OUTPUT=metrics/$1.gc_bias.txt \
+					    CHART_OUTPUT=metrics/$1.gc_metrics.pdf \
+					    REFERENCE_SEQUENCE=$$(REF_FASTA) \
+					    SUMMARY_OUTPUT=$$(@)")
+					   
+endef
+$(foreach sample,$(SAMPLES),\
+ 		$(eval $(call gc-metrics,$(sample))))
+		
+summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'")
+
+summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'")
+
+summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'")
+
+summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'")
+
+summary/hs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'")
+
+summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'")
+					  
+summary/gc_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt)
+	$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'")
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     echo "picard" >> version/bam_metrics.txt; \
+	     $(PICARD) CollectAlignmentSummaryMetrics --version &>> version/bam_metrics.txt; \
+	     $(PICARD) CollectInsertSizeMetrics --version &>> version/bam_metrics.txt; \
+	     $(PICARD) CollectOxoGMetrics --version &>> version/bam_metrics.txt; \
+	     $(PICARD) CollectHsMetrics --version &>> version/bam_metrics.txt; \
+	     $(PICARD) CollectGcBiasMetrics --version &>> version/bam_metrics.txt; \
+             R --version >> version/bam_metrics.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: bam_metrics
diff --git a/qc/wgs_metrics.mk b/qc/wgs_metrics.mk
new file mode 100644
index 00000000..ded23e69
--- /dev/null
+++ b/qc/wgs_metrics.mk
@@ -0,0 +1,116 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/wgs_metrics.$(NOW)
+
+wgs_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics_summary.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics.txt) \
+	      $(foreach sample,$(SAMPLES),metrics/$(sample).duplicate_metrics.txt) \
+	      summary/idx_metrics.txt \
+	      summary/aln_metrics.txt \
+	      summary/insert_metrics.txt \
+	      summary/oxog_metrics.txt \
+	      summary/gc_metrics.txt \
+	      summary/wgs_metrics.txt \
+	      summary/duplicate_metrics.txt
+	    
+SAMTOOLS_THREADS = 4
+SAMTOOLS_MEM_THREAD = 1G
+
+GATK_THREADS = 4
+GATK_MEM_THREAD = 2G
+
+define picard-metrics
+metrics/$1.idx_stats.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(BAM_INDEX) \
+							INPUT=$$(<) \
+							> $$(@)")
+									   
+metrics/$1.aln_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_ALIGNMENT_METRICS) \
+							REFERENCE_SEQUENCE=$$(REF_FASTA) \
+							INPUT=$$(<) \
+							OUTPUT=$$(@)")
+									   
+metrics/$1.insert_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_INSERT_METRICS) \
+							INPUT=$$(<) \
+							OUTPUT=$$(@) \
+							HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \
+							MINIMUM_PCT=0.05")
+									   
+metrics/$1.oxog_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_OXOG_METRICS) \
+							REFERENCE_SEQUENCE=$$(REF_FASTA) \
+							INPUT=$$(<) \
+							OUTPUT=$$(@)")
+					    
+metrics/$1.gc_metrics_summary.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_GC_BIAS) \
+							INPUT=$$(<) \
+							OUTPUT=metrics/$1.gc_metrics.txt \
+							CHART_OUTPUT=metrics/$1.gc_metrics.pdf \
+							REFERENCE_SEQUENCE=$$(REF_FASTA) \
+							SUMMARY_OUTPUT=$$(@)")
+					   
+metrics/$1.wgs_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_WGS_METRICS) \
+							INPUT=$$(<) \
+							OUTPUT=$$(@) \
+							REFERENCE_SEQUENCE=$$(REF_FASTA)")
+							
+metrics/$1.duplicate_metrics.txt : bam/$1.bam
+	$$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \
+							$$(COLLECT_DUP_METRICS) \
+							INPUT=$$(<) \
+							METRICS_FILE=$$(@)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+	$(eval $(call picard-metrics,$(sample))))
+	
+summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 1 --sample_names '$(SAMPLES)'")
+					  
+summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 2 --sample_names '$(SAMPLES)'")
+
+summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 3 --sample_names '$(SAMPLES)'")
+					  
+summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 4 --sample_names '$(SAMPLES)'")
+					  
+summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics_summary.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 5 --sample_names '$(SAMPLES)'")
+					  
+summary/wgs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 6 --sample_names '$(SAMPLES)'")
+					  
+summary/duplicate_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).duplicate_metrics.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 7 --sample_names '$(SAMPLES)'")
+
+#..DUMMY := $(shell mkdir -p version; \
+#	     $(SAMTOOLS) --version >> version/wgs_metrics.txt; \
+#	     echo "gatk3" >> version/wgs_metrics.txt; \
+#	     $(GATK) --version >> version/wgs_metrics.txt; \
+#	     echo "picard" >> version/wgs_metrics.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: wgs_metrics
diff --git a/rnaseq/cufflinks.mk b/rnaseq/cufflinks.mk
deleted file mode 100644
index 54e294fe..00000000
--- a/rnaseq/cufflinks.mk
+++ /dev/null
@@ -1,75 +0,0 @@
-# This module is used for running cufflinks
-# input: $(SAMPLES) 
-# Options: BAM_PHRED64 = true/false
-# Authors: Fong Chun Chan <fongchunchan@gmail.com>
-#
-include modules/Makefile.inc
-
-LOGDIR = log/cufflinks.$(NOW)
-
-
-NUM_CORES ?= 8
-CUFFLINKS = $(HOME)/share/usr/bin/cufflinks
-CUFFLINKS_OPTS = -b $(REF_FASTA) -u -g $(GENES_GTF) -p $(NUM_CORES) -u --no-update-check -v
-CUFFCOMPARE = $(HOME)/share/usr/bin/cuffcompare
-CUFFCOMPARE_OPTS = --no-update-check
-CUFFMERGE = $(HOME)/share/usr/bin/cuffmerge
-CUFFMERGE_OPTS = --no-update-check
-CUFFDIFF = $(HOME)/share/usr/bin/cuffdiff
-CUFFDIFF_OPTS = --no-update-check -v
-CUFFQUANT = $(HOME)/share/usr/bin/cuffquant
-CUFFQUANT_OPTS = --no-update-check -v
-CUFFNORM = $(HOME)/share/usr/bin/cuffnorm
-CUFFNORM_OPTS = --no-update-check -v
-CUFFCOMPARE_OPTS = --no-update-check -s $(REF_FASTA) -r $(GENES_GTF) -V -v
-
-PHENO_FILE ?= pheno.txt
-ifneq ($(wildcard $(PHENO_FILE)),)
-  A = $(shell sed '1d' $(PHENO_FILE) | cut -f1)
-  B = $(shell sed '1d' $(PHENO_FILE) | cut -f2)
-  $(foreach i,$(shell seq 1 $(words $(A))),$(eval pheno.$(word $i,$(B)) += $(word $i,$(A))))
-  PHENOTYPES = $(shell sed '1d' $(PHENO_FILE) | cut -f2 | sort | uniq)
-endif
-
-..DUMMY := $(shell mkdir -p version; $(CUFFLINKS) &> version/tophat.txt; echo "options: $(CUFFLINKS_OPTS)" >> version/cufflinks.txt)
-.SECONDARY:
-.DELETE_ON_ERROR:
-.PHONY : all_cufflinks cufflinks cuffcmp cuffmerge cuffdiff cuffnorm
-
-all_cufflinks : cufflinks cuffcmp cuffmerge cuffdiff cuffnorm
-cufflinks : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf)
-cuffcmp : cufflinks/cuffcmp/cc.stats
-cuffmerge : cufflinks/gtf/merged.gtf
-cuffdiff : cufflinks/cuffdiff/gene_exp.diff
-cuffnorm : cufflinks/cuffnorm/gene_exp.txt
-
-cufflinks/gtf/%.transcripts.gtf cufflinks/fpkm_tracking/%.isoforms.fpkm_tracking cufflinks/fpkm_tracking/%.genes.fpkm_tracking : bam/%.bam
-	$(call RUN,-n $(NUM_CORES) -s 2G -m 4G,"${CUFFLINKS} ${CUFFLINKS_OPTS} -o cufflinks/$* $<  && \
-		mkdir -p cufflinks/gtf cufflinks/fpkm_tracking && \
-		ln cufflinks/$*/transcripts.gtf cufflinks/gtf/$*.transcripts.gtf && \
-		ln cufflinks/$*/isoforms.fpkm_tracking cufflinks/fpkm_tracking/$*.isoforms.fpkm_tracking && \
-		ln cufflinks/$*/genes.fpkm_tracking cufflinks/fpkm_tracking/$*.genes.fpkm_tracking")
-
-cufflinks/cuffcmp/cc.stats : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf)
-	$(call RUN,-s 10G -m 20G,"$(CUFFCOMPARE) $(CUFFCOMPARE_OPTS) -o $(@:.stats=) $^")
-
-cufflinks/assembly_list.txt : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf)
-	$(INIT) echo "$^" | tr ' ' '\n' > $@
-
-cufflinks/gtf/merged.gtf : cufflinks/assembly_list.txt
-	$(call RUN,-n 8 -s 1G -m 2.5G,"$(CUFFMERGE) $(CUFFMERGE_OPTS) -o $(@D) -g $(GENES_GTF) -p 8 $<")
-
-cufflinks/cxb/%.cxb : cufflinks/gtf/merged.gtf bam/%.bam
-	$(call RUN,-n 4 -s 1G -m 2.5G,"mkdir -p cufflinks/$* && \
-	   	$(CUFFQUANT) $(CUFFQUANT_OPTS) -o cufflinks/$* -b $(REF_FASTA) -p 4 $^ && \
-		ln cufflinks/$*/abundances.cxb $@")
-
-cufflinks/cuffdiff/gene_exp.diff : cufflinks/gtf/merged.gtf $(foreach sample,$(SAMPLES),cufflinks/cxb/$(sample).cxb)
-	$(call RUN,-n 8 -s 1G -m 4G,"$(CUFFDIFF) $(CUFFDIFF_OPTS) -o $(@D) -p 8 $< \
-		$(foreach pheno,$(PHENOTYPES),$(subst $( ),$(,),$(foreach s,$(pheno.$(pheno)),cufflinks/cxb/$s.cxb))) \
-		-L $(subst $( ),$(,),$(PHENOTYPES))")
-
-cufflinks/cuffnorm/gene_exp.txt : cufflinks/gtf/merged.gtf $(foreach sample,$(SAMPLES),cufflinks/cxb/$(sample).cxb)
-	$(call RUN,-n 8 -s 1G -m 2G,"$(CUFFNORM) $(CUFFNORM_OPTS) -o $(@D) -p 8 $< \
-		$(foreach pheno,$(PHENOTYPES),$(subst $( ),$(,),$(foreach s,$(pheno.$(pheno)),cufflinks/cxb/$s.cxb))) \
-		-L $(subst $( ),$(,),$(PHENOTYPES))")
diff --git a/rnaseq/deseq.Rnw b/rnaseq/deseq.Rnw
deleted file mode 100644
index 8cc9f8de..00000000
--- a/rnaseq/deseq.Rnw
+++ /dev/null
@@ -1,138 +0,0 @@
-%%% Applies DESeq on a matrix of count data
-%%% Inputs: counts matrix and pheno design matrix
-
-\documentclass{article}
-\usepackage[margin=1in]{geometry}
-\usepackage{here}
-
-\title{DESeq Analysis}
-\author{Raymond Lim}
-
-\begin{document}
-
-\maketitle
-
-\SweaveOpts{cache=T, prefix.string=graphics/deSeq}
-
-
-<<init, echo = F, cache = F, results = hide>>=
-dir.create('graphics', showWarnings = F)
-options(width = 100)
-
-includeGraphic <- function(filename, caption = NULL, width = 1) {
-    if (is.null(caption)) {
-        cat("\\includegraphics[width=", width, "\\linewidth]{", filename, "}\n", sep = "")
-    } else {
-        cat("\\begin{figure}[h!]\n")
-        cat("\\includegraphics[width=", width, "\\linewidth]{", filename, "}\n", sep = "")
-        cat("\\caption{", caption, "}\n", sep = "")
-        cat("\\end{figure}\n")
-    }
-
-}
-
-includeGraphics <- function(filenames, width = 1, caption) {
-    cat("\\begin{figure}[h!]\n")
-    for (filename in filenames) {
-        includeGraphic(filename, width)
-    }
-    cat("\\caption{", caption, "}\n", sep = "")
-    cat("\\end{figure}\n")
-}
-@
-
-
-<<libs, echo = F, results = hide>>=
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("DESeq"))
-suppressPackageStartupMessages(library("GenomicFeatures"))
-suppressPackageStartupMessages(library("Rsamtools"))
-suppressPackageStartupMessages(library("xtable"))
-#library(multicore)
-@
-
-<<optParse, echo = F>>=
-optList <- list(
-                make_option("--condition", default = 'Condition', help = "Factor of interest in pheno file [default %default]"),
-                make_option("--refCondition", default = 'Normal', help = "Reference condition [default %default]"),
-                make_option("--altCondition", default = 'Variant', help = "Reference condition [default %default]"),
-                make_option("--outFile", default = NULL, help = "Output results to this file [optional]"));
-
-parser <- OptionParser(usage = "%prog [options] [counts file] [pheno file]", option_list = optList);
-
-arguments <- parse_args(parser, positional_arguments = T, arg = arguments);
-opt <- arguments$options;
-
-if (length(arguments$args) != 2) {
-    cat("Need pheno design file and counts data");
-    print_help(parser);
-    stop();
-} 
-@
-
-<<loadData, echo = F>>=
-phenoFile <- arguments$args[2];
-countsFile <- arguments$args[1];
-
-pheno <- read.table(phenoFile, header = T, sep = '\t', row.names = 1, check.names = F);
-pheno[, opt$condition] <- relevel(pheno[, opt$condition], opt$refCondition);
-
-counts <- read.table(countsFile, header = T, sep = '\t', na.strings = "", comment.char = "", stringsAsFactors = F, check.names = F);
-counts <- counts[!duplicated(counts[,1]), ]
-rownames(counts) <- counts[,1]
-counts <- counts[-1]
-
-if (!all(colnames(counts) %in% rownames(pheno))) {
-    cat("Design does not match data");
-}
-counts <- counts[, rownames(pheno)]
-
-cds <- newCountDataSet(counts, pheno[, opt$condition])
-@
-
-Estimate the effective library size:
-
-<<effectiveLibSize>>=
-cds <- estimateSizeFactors(cds)
-sizeFactors(cds)
-@
-
-Estimate dispersion/variance: 
-
-<<estimateDispersions>>=
-cds <- estimateDispersions(cds)
-
-str(fitInfo(cds))
-@
-
-<<diffEx>>=
-res <- nbinomTest(cds, levels(pData(cds)$condition)[1], levels(pData(cds)$condition)[2])
-@
-
-\begin{figure}
-<<MAplot, fig = T>>=
-plot(res$baseMean, res$log2FoldChange, log = "x", pch = 20, cex = .3, col = ifelse(res$padj < .1, "red", "black"), ylab = 'M', xlab = 'A')
-@
-    \caption{MA plot, normalised mean vs. log2 fold change}
-\end{figure}
-
-\begin{figure}
-<<pvalHist, fig = T>>=
-hist(res$pval, breaks = 100, col = 'skyblue', border = 'slateblue', main = "", xlab = 'p-value')
-@
-    \caption{Histogram of p-values}
-\end{figure}
-
-<<topGenes, results = tex>>=
-capt <- 'Top differentially expressed genes'
-print(xtable(head(res[order(res$padj), ], 20), caption = capt))
-@
-
-<<writeResults, echo = F>>=
-if (!is.null(opt$outFile)) {
-    write.table(res, file = opt$outFile, sep = '\t', quote = F, col.names=NA)
-}
-@
-
-\end{document}
-
diff --git a/rnaseq/deseq.mk b/rnaseq/deseq.mk
deleted file mode 100644
index bfec8c1e..00000000
--- a/rnaseq/deseq.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-include modules/Makefile.inc
-include modules/variant_callers/gatk.inc
-
-LOGDIR = log/deseq.$(NOW)
-
-DESEQ_RNW = modules/rnaseq/deseq.Rnw
-SWEAVE = $(RSCRIPT) modules/scripts/Sweave.R
-
-DESEQ_CONDITION ?= condition
-DESEQ_REF_CONDITION ?= ref
-
-# pheno file: sample\tpheno with header
-PHENO_FILE ?= pheno.txt
-
-.DELETE_ON_ERROR: 
-.SECONDARY: 
-
-.PHONY : all
-
-deseq_results.txt : sumreads/geneCounts.txt
-	mkdir -p graphics; $(SWEAVE) $(DESEQ_RNW) --condition $(DESEQ_CONDITION) --refCondition $(DESEQ_REF_CONDITION) --outFile $@ $< $(PHENO_FILE)
-
-
diff --git a/rnaseq/dexseq.mk b/rnaseq/dexseq.mk
deleted file mode 100644
index 23b5668f..00000000
--- a/rnaseq/dexseq.mk
+++ /dev/null
@@ -1,18 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/exon_counts.$(NOW)
-PHONY += dexseq
-
-dexseq : $(foreach sample,$(TUMOR_SAMPLES),dexseq/$(sample).txt)
-
-define exon-count
-dexseq/%.txt : star/bam/%.star.sorted.filtered.bam
-	$$(call RUN,-c -s 8G -m 12G -w 1440,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/dexseq && \
-								 /home/${USER}/share/usr/anaconda-envs/dexseq/lib/R/library/DEXSeq/python_scripts/dexseq_count.py -f bam -p yes -r pos /home/${USER}/share/reference/Ensembl/Homo_sapiens.GRCh37.75.gff $$< dexseq/$$*.txt")
-endef
-$(foreach sample,$(TUMOR_SAMPLES),\
-		$(eval $(call exon-count,$sample)))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/rnaseq/immunedeconv.mk b/rnaseq/immunedeconv.mk
new file mode 100644
index 00000000..e112137b
--- /dev/null
+++ b/rnaseq/immunedeconv.mk
@@ -0,0 +1,25 @@
+include modules/Makefile.inc
+
+LOGDIR = log/immunedeconv.$(NOW)
+
+immunedeconv : immunedeconv/quantiseq.txt \
+	       immunedeconv/mcpcounter.txt \
+	       immunedeconv/cibersort.txt
+
+immunedeconv/quantiseq.txt : kallisto/tpm_by_gene.txt
+	$(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 1 --input_file $(<) --output_file $(@)")
+
+immunedeconv/mcpcounter.txt : kallisto/tpm_by_gene.txt
+	$(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 2 --input_file $(<) --output_file $(@)")
+
+immunedeconv/cibersort.txt : kallisto/tpm_by_gene.txt
+	$(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \
+							   $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 3 --input_file $(<) --output_file $(@)")
+
+..DUMMY := $(shell mkdir -p version; \
+	     ~/share/usr/env/r-immunedeconv-2.1.0/bin/R --version >> version/immunedeconv.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: immunedeconv
diff --git a/rnaseq/kallisto.mk b/rnaseq/kallisto.mk
new file mode 100644
index 00000000..6db1acf1
--- /dev/null
+++ b/rnaseq/kallisto.mk
@@ -0,0 +1,48 @@
+include modules/Makefile.inc
+
+LOGDIR = log/kallisto.$(NOW)
+
+kallisto : $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R1.fastq.gz) \
+	   $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R2.fastq.gz) \
+	   $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) \
+	   kallisto/tpm_by_gene.txt
+
+SLEUTH_ANNOT ?= $(HOME)/share/lib/resource_files/Hugo_ENST_ensembl75_fixed.txt
+KALLISTO_INDEX ?= $(HOME)/share/lib/ref_files/b37/ensembl_v75-0.43.0_kallisto_index
+
+define merge-fastq
+kallisto/$1/$1_R1.fastq.gz : $$(foreach split,$2,$$(word 1, $$(fq.$$(split))))
+	$$(call RUN,-c -n 12 -s 0.5G -m 1G -w 24:00:00 -v $(PIGZ_ENV),"set -o pipefail && \
+								       $$(PIGZ) -cd -p 12 $$(^) | $$(PIGZ) -c -p 12 > $$(@)")
+	
+kallisto/$1/$1_R2.fastq.gz : $$(foreach split,$2,$$(word 2, $$(fq.$$(split))))
+	$$(call RUN,-c -n 12 -s 0.5G -m 1G -w 24:00:00 -v $(PIGZ_ENV),"set -o pipefail && \
+								       $$(PIGZ) -cd -p 12 $$(^) | $$(PIGZ) -c -p 12 > $$(@)")
+endef
+$(foreach sample,$(SAMPLES),\
+		$(eval $(call merge-fastq,$(sample),$(split.$(sample)))))
+
+define fastq-to-kallisto
+kallisto/$1/abundance.tsv : kallisto/$1/$1_R1.fastq.gz kallisto/$1/$1_R2.fastq.gz
+	$$(call RUN,-c -n 12 -s 2G -m 3G -v $(KALLISTO_ENV),"set -o pipefail && \
+							     kallisto quant \
+							     -i $$(KALLISTO_INDEX) \
+							     -o kallisto/$1 \
+							     --bias -b 100 -t 12\
+							     --fusion $$(<) $$(<<)")
+
+endef
+$(foreach sample,$(SAMPLES),\
+		$(eval $(call fastq-to-kallisto,$(sample))))
+		
+kallisto/tpm_by_gene.txt : $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv)
+	$(call RUN, -c -n 24 -s 1G -m 2G -v $(KALLISTO_ENV),"set -o pipefail && \
+							     $(RSCRIPT) $(SCRIPTS_DIR)/summarize_sleuth.R --annotation $(SLEUTH_ANNOT) --samples '$(SAMPLES)'")
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(SAMTOOLS) --version > version/kallisto.txt; \
+	     ~/share/usr/env/kallisto-0.46.2/bin/kallisto version >> version/kallisto.txt; \
+	     ~/share/usr/env/kallisto-0.46.2/bin/R --version >> version/kallisto.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: kallisto
diff --git a/rnaseq/rpkm.mk b/rnaseq/rpkm.mk
deleted file mode 100644
index 34a6d9b5..00000000
--- a/rnaseq/rpkm.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-#Module calculates RPKM values.  Depends on the sumRNASeqReads.mk
-RPKM_RSCRIPT = ${RSCRIPT} ~/gascoyne/scripts/calculateRPKM.R
-
-rpkm/%.rpkm.txt : summarized_reads/%.summarized_reads.txt
-	SGE_RREQ="-N $(@F) -l mem_free=1G -q all.q -now n" \
-	$(MKDIR) $(@D)/logs;\
-	$(RPKM_RSCRIPT) ${TXDB_FILE} $< $@ > $(@D)/logs/$*.log 2>&1
diff --git a/rnaseq/sumRNASeqReads.mk b/rnaseq/sumRNASeqReads.mk
deleted file mode 100644
index 931cf949..00000000
--- a/rnaseq/sumRNASeqReads.mk
+++ /dev/null
@@ -1,44 +0,0 @@
-include modules/Makefile.inc
-include modules/variant_callers/gatk.inc
-
-LOGDIR = log/sum_reads.$(NOW)
-
-DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6
-
-SUM_READS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReads.R
-SUM_EXONS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReadsByExon.R
-SUM_INTRONS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReadsByIntron.R
-SUM_READS_OPTS =
-
-.DELETE_ON_ERROR: 
-.SECONDARY: 
-
-.PHONY : all sumreads
-
-SUM_TYPE = byGene byExon
-
-all : $(foreach type,$(SUM_TYPE),$(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.$(type).txt)) sumreads/rpkm_by_gene.txt sumreads/rpkm_by_exon.txt sumreads/counts_by_gene.txt sumreads/counts_by_exon.txt
-
-sumreads/%.sumreads.byGene.txt : bam/%.bam bam/%.bam.bai
-	$(call RUN,-v $(DEFAULT_ENV) -s 24G -m 48G,"$(SUM_READS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<")
-
-sumreads/%.sumreads.byExon.txt : bam/%.bam bam/%.bam.bai
-	$(call RUN,-v $(DEFAULT_ENV) -s 24G -m 48G,"$(SUM_EXONS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<")
-
-sumreads/rpkm_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byGene.txt)
-	cut -f 2 $< > $@; \
-	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 7 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
-
-sumreads/rpkm_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byExon.txt)
-	cut -f 1-2 $< > $@; \
-	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 6 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
-
-sumreads/counts_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byGene.txt)
-	cut -f 2 $< > $@; \
-	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 3 $$x | sed "s/countsByGene/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
-
-sumreads/counts_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byExon.txt)
-	cut -f 1-2 $< > $@; \
-	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 4 $$x | sed "s/exonCount/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
-
-include modules/bam_tools/processBam.mk
diff --git a/rnaseq/sumreads.mk b/rnaseq/sumreads.mk
new file mode 100644
index 00000000..f8a6e0af
--- /dev/null
+++ b/rnaseq/sumreads.mk
@@ -0,0 +1,41 @@
+include modules/Makefile.inc
+
+LOGDIR = log/sum_reads.$(NOW)
+
+sumreads : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt) \
+	   $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt) \
+	   sumreads/rpkm_by_gene.txt \
+	   sumreads/rpkm_by_exon.txt \
+	   sumreads/counts_by_gene.txt \
+	   sumreads/counts_by_exon.txt
+
+SUM_READS_OPTS =
+REF ?= b37
+
+sumreads/%.sumreads.by_gene.txt : bam/%.bam bam/%.bam.bai
+	$(call RUN,-v $(SUMREADS_ENV) -s 24G -m 48G,"$(SUM_READS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<")
+
+sumreads/%.sumreads.by_exon.txt : bam/%.bam bam/%.bam.bai
+	$(call RUN,-v $(SUMREADS_ENV) -s 24G -m 48G,"$(SUM_EXONS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<")
+
+sumreads/rpkm_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt)
+	cut -f 2 $< > $@; \
+	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 7 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
+
+sumreads/rpkm_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt)
+	cut -f 1-2 $< > $@; \
+	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 6 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
+
+sumreads/counts_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt)
+	cut -f 2 $< > $@; \
+	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 3 $$x | sed "s/countsByGene/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
+
+sumreads/counts_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt)
+	cut -f 1-2 $< > $@; \
+	for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 4 $$x | sed "s/exonCount/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(SUMREADS_ENV)/bin/R --version >> version/sumreads.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: sumreads
diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R
new file mode 100755
index 00000000..ce3fc2ca
--- /dev/null
+++ b/scripts/annotateSummaryVcf.R
@@ -0,0 +1,53 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--input", default = NA, type = 'character', help = "input file path"),
+	       make_option("--maf", default = NA, type = 'character', help = "input maf file path"),
+	       make_option("--output", default = NA, type = 'character', help = "output file path"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	smry = readr::read_tsv(file = opt$input, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::rename(`#CHROM` = CHROM,
+			     POS = POS) %>%
+	       dplyr::mutate(ID = ".",
+			     QUAL = 100,
+			     FILTER = "PASS",
+			     INFO = ".") %>%
+	       dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, FILTER, INFO)
+	cat("##fileformat=VCFv4.2\n", file = opt$output, append = FALSE) 
+	readr::write_tsv(smry, path = opt$output, na = "NA", append = TRUE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option)==2) {
+	smry = readr::read_tsv(file = opt$input, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert()
+	maf = readr::read_tsv(file = opt$maf, comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	      readr::type_convert() %>%
+	      dplyr::mutate(Tumor_Sample_Barcode = smry$TUMOR_SAMPLE,
+			    Matched_Norm_Sample_Barcode = smry$NORMAL_SAMPLE,
+			    Tumor_Sample_UUID = smry$TUMOR_SAMPLE,
+			    Matched_Norm_Sample_UUID = smry$NORMAL_SAMPLE,
+			    t_depth = smry$TUMOR_DP,
+			    t_ref_count = round((1-smry$TUMOR_MAF) * smry$TUMOR_DP),
+			    t_alt_count = round(smry$TUMOR_MAF*smry$TUMOR_DP),
+			    n_depth = smry$NORMAL_DP,
+			    n_ref_count = round((1-smry$NORMAL_MAF) * smry$NORMAL_DP),
+			    n_alt_count = round(smry$NORMAL_MAF*smry$NORMAL_DP),
+			    CCF = smry$ccf,
+			    LOH = smry$facetsLOHCall,
+			    HOTSPOT = smry$HOTSPOT)
+	readr::write_tsv(x = maf, path = opt$output)
+	
+}
diff --git a/scripts/backup.sh b/scripts/backup.sh
deleted file mode 100755
index 3c1894f9..00000000
--- a/scripts/backup.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-LOCK=~/.backup_lock
-LOGFILE=~/.backup.log
-if ! mkdir $LOCK 2> /dev/null; then
-    echo "backup script is already running"
-    exit 1
-fi
-
-TMP=`mktemp`;
-TOPDIR=/ifs/e63data/reis-filho
-if mountpoint -q "/mount/limr/zedshared/"; then
-    while [ 1 ]; do
-        echo "searching for files in $TOPDIR"
-        cd $TOPDIR
-        'ls' data/*/*/bam/*.bam* projects/*/bam/*.bam* data/*/wgs*/fastq/*.fastq.gz | \
-            rsync --verbose --stats --recursive -a --files-from=- --log-file=$LOGFILE --prune-empty-dirs ./ /mount/limr/zedshared
-        if [ "$?" = "0" ]; then
-            echo "rsync complete"
-            exit
-        else
-            echo "rsync failure, retrying in 1 minute..."
-            sleep 60
-        fi
-    done
-fi
-
-rmdir $LOCK
diff --git a/scripts/bam_metrics.R b/scripts/bam_metrics.R
new file mode 100755
index 00000000..a08eefaa
--- /dev/null
+++ b/scripts/bam_metrics.R
@@ -0,0 +1,109 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_names", default = NA, type = 'character', help = "sample names"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".idx_stats.txt"),
+					 col_names = FALSE, col_types = cols(.default = col_character()))[-85,,drop=FALSE] %>%
+			 readr::type_convert() %>%
+			 dplyr::select(CHROMOSOME = X1,
+				       LENGTH = X2,
+				       ALIGNED_READS = X3) %>%
+			 dplyr::mutate(CHROMOSOME = gsub(pattern=" length=", replacement="", x=CHROMOSOME),
+				       ALIGNED_READS = gsub(pattern="Aligned= ", replacement="", x=ALIGNED_READS),
+				       SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/idx_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option)==2) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".aln_metrics.txt"),
+					 skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::select(-SAMPLE, -READ_GROUP) %>%
+			 dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/aln_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==3) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".insert_metrics.txt"),
+					 skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::select(-SAMPLE, -READ_GROUP) %>%
+			 dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/insert_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==4) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".oxog_metrics.txt"),
+					 skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::rename(SAMPLE_NAME = SAMPLE_ALIAS)
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/oxog_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==5) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".hs_metrics.txt"),
+					 skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/hs_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==6) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_metrics.txt"),
+					 skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/gc_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==7) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	x = list()
+	for (i in 1:length(sample_names)) {
+		x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_bias.txt"),
+					 skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	x = do.call(rbind, x)
+	write_tsv(x, path="summary/gc_summary.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+}
diff --git a/scripts/chmod.sh b/scripts/chmod.sh
deleted file mode 100755
index ba5b0892..00000000
--- a/scripts/chmod.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-chmod ug+rwX -R /ifs/e63data/reis-filho/data &> /dev/null
-chmod ug+rwX -R /ifs/e63data/reis-filho/projects &> /dev/null
-chmod ug+rwX -R /ifs/e63data/reis-filho/reference &> /dev/null
-chmod o+rX -R /ifs/e63data/reis-filho/data &> /dev/null
-chmod o+rX -R /ifs/e63data/reis-filho/projects &> /dev/null
-chmod o+rX -R /ifs/e63data/reis-filho/reference &> /dev/null
diff --git a/scripts/classify_indel_pathogenicity_vcf.py b/scripts/classify_indel_pathogenicity_vcf.py
index fdb66ee9..274e9081 100644
--- a/scripts/classify_indel_pathogenicity_vcf.py
+++ b/scripts/classify_indel_pathogenicity_vcf.py
@@ -31,7 +31,7 @@ def query_mutation_taster(record):
     parser.add_argument('--qsub_queue', nargs='?', default='jrf.q,all.q', help='qsub queue')
     parser.add_argument('--num_provean_threads', nargs='?', default=4, type=int, help='number of provean threads')
     parser.add_argument('--run_local', action='store_true', default=False, help='run provean locally')
-    parser.add_argument('--no_remote', action='store_true', default=False, help='no remote queries: can only call potentially pathogenic')
+    parser.add_argument('--no_remote', action='store_true', default=True, help='no remote queries: can only call potentially pathogenic')
     parser.add_argument('--no_mt_provean', action='store_true', default=False, help='do not run mutation taster / provean')
     args = parser.parse_args()
 
diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R
new file mode 100644
index 00000000..1e7b0a6e
--- /dev/null
+++ b/scripts/cnvkit.R
@@ -0,0 +1,237 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("copynumber"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"),
+		  make_option("--sample_name", default = NA, type = 'character', help = "sample name"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+'plot_log2_ratio' <- function(x)
+{
+   	par(mar=c(5, 5, 4, 2)+.1)
+	plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5))
+	y = x %>%
+	    dplyr::group_by(chromosome) %>%
+	    dplyr::summarize(start = min(start_chr),
+			     end = max(end_chr)) %>%
+	    dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>%
+	    dplyr::arrange(chromosome)
+	points(x = c(y$start[1]-1E9, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey20", lwd = 1.15)
+	axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035)
+	axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1)
+	axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1)
+	mtext(side = 2, text = expression(Log[2]~"Ratio   "), line = 3.15, cex = 1)
+}
+
+'add_segmented' <- function(x)
+{
+	for (i in 1:nrow(x)) {
+		points(x = c(x$Start_Position[i], x$End_Position[i]), y = rep(x$Log2_Ratio[i], 2), type = "l", col = "#e41a1c", lwd = 2.75)
+	}
+}
+
+'add_totalcopies' <- function(purity, ploidy, xmin, xmax)
+{
+	for (i in c(1, 2, 4, 6, 10)) {
+		y = log2(((purity*i) + (1-purity)*2)/((purity*ploidy) + (1-purity)*2))
+		if (!is.na(y) & y<2) {
+			points(x = c(xmin, xmax), y = rep(y, 2), type = "l", col = "goldenrod3", lty = 3, lwd = 1)
+		}
+	}
+}
+
+if (as.numeric(opt$option) == 1) {
+	data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::filter(weight>.1) %>%
+	       dplyr::filter(chromosome != "Y") %>%
+	       dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE))
+	cytoband = data %>%
+		   dplyr::group_by(chromosome) %>%
+		   dplyr::summarize(start = min(start),
+				    end = max(end)) %>%
+		   dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>%
+		   dplyr::arrange(chromosome) %>%
+		   dplyr::mutate(end = cumsum(end))
+	start = rep(0, nrow(cytoband))
+	start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)]
+	cytoband$start = start
+	data = data %>%
+	       dplyr::left_join(cytoband %>%
+				dplyr::rename(start_chr = start,
+					      end_chr = end),
+			        by = "chromosome") %>%
+	       dplyr::mutate(start = start + start_chr,
+			     end = end + start_chr) %>%
+	       dplyr::mutate(position = .5*(start + end)) %>%
+	       dplyr::mutate(log2 = case_when(
+		       log2 > 6 ~ 0,
+		       log2 < (-4) ~ 0,
+		       TRUE ~ log2
+	       ))
+	
+	pdf(file = paste0("cnvkit/plots/log2/", opt$sample_name, ".pdf"), width = 8, height = 3.75)
+	plot_log2_ratio(x = data)
+	dev.off()
+
+} else if (as.numeric(opt$option) == 2) {
+	data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::filter(weight>.1) %>%
+	       dplyr::filter(chromosome != "Y")
+	smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad")
+	segmented = pcf(data = smoothed, kmin = 25, gamma = 75, normalize = FALSE, fast = FALSE) %>%
+		    dplyr::as_tibble() %>%
+		    dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm,
+				  Start_Position = start.pos, End_Position = end.pos,
+				  N = n.probes, Log2_Ratio = mean) %>%
+		    dplyr::mutate(Sample_Name = opt$sample_name)
+	readr::write_tsv(x = segmented, file = paste0("cnvkit/segmented/", opt$sample_name, ".txt"), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option) == 3) {
+	data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::filter(weight>.1) %>%
+	       dplyr::filter(chromosome != "Y")
+	smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad")
+	segmented = pcf(data = smoothed, kmin = 25, gamma = 75, normalize = FALSE, fast = FALSE) %>%
+		    dplyr::as_tibble() %>%
+		    dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm,
+				  Start_Position = start.pos, End_Position = end.pos,
+				  N = n.probes, Log2_Ratio = mean) %>%
+		    dplyr::mutate(Sample_Name = opt$sample_name)
+	cytoband = data %>%
+		   dplyr::group_by(chromosome) %>%
+		   dplyr::summarize(start = min(start),
+				    end = max(end)) %>%
+		   dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>%
+		   dplyr::arrange(chromosome) %>%
+		   dplyr::mutate(end = cumsum(end))
+	start = rep(0, nrow(cytoband))
+	start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)]
+	cytoband$start = start
+	data = data %>%
+	       dplyr::left_join(cytoband %>%
+				dplyr::rename(start_chr = start,
+					      end_chr = end),
+			        by = "chromosome") %>%
+	       dplyr::mutate(start = start + start_chr,
+			     end = end + start_chr) %>%
+	       dplyr::mutate(position = start) %>%
+	       dplyr::mutate(log2 = case_when(
+		       log2 > 6 ~ 0,
+		       log2 < (-4) ~ 0,
+		       TRUE ~ log2
+	       ))
+	segmented = segmented %>%
+		    dplyr::left_join(cytoband %>%
+				     dplyr::rename(Chromosome = chromosome,
+						   start_chr = start,
+					      	   end_chr = end),
+			             by = "Chromosome") %>%
+	       	    dplyr::mutate(Start_Position = Start_Position + start_chr,
+				  End_Position = End_Position + start_chr)
+	
+	pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 8, height = 3.75)
+	plot_log2_ratio(x = data)
+	add_segmented(x = segmented)
+	dev.off()
+
+} else if (as.numeric(opt$option) == 4) {
+	tumor_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[1]
+	normal_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[2]
+	data = readr::read_tsv(file = paste0("cnvkit/segmented/", tumor_name, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert()
+	facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_name, "_", normal_name, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>%
+	         readr::type_convert()
+	purity = as.numeric(gsub(pattern = "# Purity = ", replacement = "", x = facets %>% dplyr::slice(10) %>% .[["X1"]], fixed = TRUE))
+	ploidy = as.numeric(gsub(pattern = "# Ploidy = ", replacement = "", x = facets %>% dplyr::slice(11) %>% .[["X1"]], fixed = TRUE))
+	data = data %>%
+	       dplyr::mutate(Total_Copy = ((2^(Log2_Ratio))*(purity*ploidy + (1-purity)*2) - (1-purity)*2)/purity)
+	readr::write_tsv(x = data, file = paste0("cnvkit/totalcopy/", tumor_name, ".txt"), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option) == 5) {
+	tumor_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[1]
+	normal_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[2]
+	data = readr::read_tsv(file = paste0("cnvkit/cnr/", tumor_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::filter(weight>.1) %>%
+	       dplyr::filter(chromosome != "Y")
+	segmented = readr::read_tsv(file = paste0("cnvkit/totalcopy/", tumor_name, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       	    readr::type_convert()
+	cytoband = data %>%
+		   dplyr::group_by(chromosome) %>%
+		   dplyr::summarize(start = min(start),
+				    end = max(end)) %>%
+		   dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>%
+		   dplyr::arrange(chromosome) %>%
+		   dplyr::mutate(end = cumsum(end))
+	start = rep(0, nrow(cytoband))
+	start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)]
+	cytoband$start = start
+	data = data %>%
+	       dplyr::left_join(cytoband %>%
+				dplyr::rename(start_chr = start,
+					      end_chr = end),
+			        by = "chromosome") %>%
+	       dplyr::mutate(start = start + start_chr,
+			     end = end + start_chr) %>%
+	       dplyr::mutate(position = start) %>%
+	       dplyr::mutate(log2 = case_when(
+		       log2 > 6 ~ 0,
+		       log2 < (-4) ~ 0,
+		       TRUE ~ log2
+	       ))
+	segmented = segmented %>%
+		    dplyr::left_join(cytoband %>%
+				     dplyr::rename(Chromosome = chromosome,
+						   start_chr = start,
+					      	   end_chr = end),
+			             by = "Chromosome") %>%
+	       	    dplyr::mutate(Start_Position = Start_Position + start_chr,
+				  End_Position = End_Position + start_chr)
+	
+	facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_name, "_", normal_name, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>%
+	         readr::type_convert()
+	purity = as.numeric(gsub(pattern = "# Purity = ", replacement = "", x = facets %>% dplyr::slice(10) %>% .[["X1"]], fixed = TRUE))
+	ploidy = as.numeric(gsub(pattern = "# Ploidy = ", replacement = "", x = facets %>% dplyr::slice(11) %>% .[["X1"]], fixed = TRUE))
+	
+	pdf(file = paste0("cnvkit/plots/totalcopy/", tumor_name, ".pdf"), width = 8, height = 3.75)
+	plot_log2_ratio(x = data)
+	add_segmented(x = segmented)
+	add_totalcopies(purity, ploidy, cytoband[1,"start"]-1E9, cytoband[nrow(cytoband),"end"])
+	dev.off()
+	
+} else if (as.numeric(opt$option) == 6) {
+	sample_names = unlist(strsplit(x = opt$sample_name, split = " ", fixed = TRUE))
+	data = list()
+	for (i in 1:length(sample_names)) {
+		data[[i]] = readr::read_tsv(file = paste0("cnvkit/totalcopy/", sample_names[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       		    readr::type_convert()
+	}
+	data = do.call(bind_rows, data)
+	readr::write_tsv(x = data, file = "cnvkit/summary/total_copy.txt", col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option) == 7) {
+	sample_names = unlist(strsplit(x = opt$sample_name, split = " ", fixed = TRUE))
+	data = list()
+	for (i in 1:length(sample_names)) {
+		data[[i]] = readr::read_tsv(file = paste0("cnvkit/cnr/", sample_names[i], ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       		    readr::type_convert() %>%
+			    dplyr::select(chromosome, start, end, log2, weight) %>%
+			    dplyr::mutate(sample_name = sample_names[i])
+	}
+	data = do.call(bind_rows, data)
+	readr::write_tsv(x = data, file = "cnvkit/summary/log2_ratio.txt", col_names = TRUE, append = FALSE)
+	
+}
\ No newline at end of file
diff --git a/scripts/configure.py b/scripts/configure.py
index 8f216041..2a226a61 100755
--- a/scripts/configure.py
+++ b/scripts/configure.py
@@ -1,23 +1,18 @@
 #!/usr/bin/env python
-from __future__ import print_function
 
+from __future__ import print_function
 import yaml
 import argparse
 import collections
 
-""" convert yaml files to make include files
-"""
-
-
 def lowerBool(x):
     if isinstance(x, bool):
         return str(x).lower()
     else:
         return x
 
-
 def sample_yaml2mk(samples_file, out):
-    samples = yaml.load(open(args.samples_file, 'r'))
+    samples = yaml.full_load(open(args.samples_file, 'r'))
 
     tumors = set()
     normals = set()
@@ -95,7 +90,7 @@ def sample_yaml2mk(samples_file, out):
 
 def sample_attr_yaml2mk(sample_attr_file, out):
     print("\n# sample_attr_file", file=out)
-    sample_attr = yaml.load(open(sample_attr_file, 'r'))
+    sample_attr = yaml.full_load(open(sample_attr_file, 'r'))
     for attr, m in sample_attr.items():
         for k, v in m.items():
             print("{}.{} = {}".format(attr, k, v), file=out)
@@ -103,7 +98,7 @@ def sample_attr_yaml2mk(sample_attr_file, out):
 
 def sample_fastq_yaml2mk(sample_fastq_file, out):
     print("\n# sample_fastq_file", file=out)
-    sample_fastq = yaml.load(open(sample_fastq_file, 'r'))
+    sample_fastq = yaml.full_load(open(sample_fastq_file, 'r'))
     split_samples = set()
     for k, v in sample_fastq.items():
         for idx, fastq in enumerate(v):
@@ -122,17 +117,15 @@ def sample_fastq_yaml2mk(sample_fastq_file, out):
 
 def sample_merge_yaml2mk(sample_merge_file, out):
     print("\n# sample_merge_file", file=out)
-    sample_merge = yaml.load(open(args.sample_merge_file, 'r'))
+    sample_merge = yaml.full_load(open(args.sample_merge_file, 'r'))
     print("MERGE_SAMPLES = {}".format(" ".join(list(sample_merge.keys()))), file=out)
     for k, v in sample_merge.items():
         print("merge.{} = {}".format(k, " ".join(v)), file=out)
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(prog='configure',
-                                     description='Convert project YAML file to Make')
-    parser.add_argument('--project_config_file', help='project yaml config file',
-                        default='project_config.yaml')
+    parser = argparse.ArgumentParser(prog='configure', description='Convert project YAML file to Make')
+    parser.add_argument('--project_config_file', help='project yaml config file', default='project_config.yaml')
     parser.add_argument('--samples_file', help='yaml samples file', default='samples.yaml')
     parser.add_argument('--sample_attr_file', help='yaml sample attr file', default='sample_attr.yaml')
     parser.add_argument('--sample_fastq_file', help='yaml sample fastq file mappings', default='sample.fastq.yaml')
@@ -142,7 +135,7 @@ def sample_merge_yaml2mk(sample_merge_file, out):
 
     of = open(args.out_file, 'w')
 
-    config = yaml.load(open(args.project_config_file, 'r'))
+    config = yaml.full_load(open(args.project_config_file, 'r'))
     for k, v in config.items():
         print("{} = {}".format(k.upper(), lowerBool(v)), file=of)
 
diff --git a/scripts/createSampleSets.pl b/scripts/create_sample_sets.pl
similarity index 92%
rename from scripts/createSampleSets.pl
rename to scripts/create_sample_sets.pl
index f32b680d..2bcc0a55 100644
--- a/scripts/createSampleSets.pl
+++ b/scripts/create_sample_sets.pl
@@ -1,5 +1,4 @@
 #!/usr/bin/env perl
-# parse samples file to get sample sets (space delimited, normal last)
 
 use strict;
 use warnings;
diff --git a/signatures/extract_signatures.R b/scripts/extract_signatures.R
similarity index 100%
rename from signatures/extract_signatures.R
rename to scripts/extract_signatures.R
diff --git a/scripts/facets_suite.R b/scripts/facets_suite.R
new file mode 100644
index 00000000..11d45470
--- /dev/null
+++ b/scripts/facets_suite.R
@@ -0,0 +1,29 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"),
+		  make_option("--sample_pairs", default = NA, type = 'character', help = "sample pairs"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	sample_names = unlist(strsplit(as.character(opt$sample_pairs), split = " ", fixed = TRUE))
+	CN = list()
+	for (i in 1:length(sample_names)) {
+		CN[[i]] = readr::read_tsv(file = paste0("facets_suite/", sample_names[i], "/", sample_names[i], ".gene_level.txt"),
+					  col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			  readr::type_convert()
+	}
+	CN = do.call(rbind, CN)
+	readr::write_tsv(x = CN, path = "facets_suite/summary.txt", col_names = TRUE, append = FALSE)
+
+}
diff --git a/scripts/filter_sv.R b/scripts/filter_sv.R
new file mode 100644
index 00000000..bb9b87f6
--- /dev/null
+++ b/scripts/filter_sv.R
@@ -0,0 +1,50 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--input_file", default = NA, type = 'character', help = "Input VCF file"),
+               make_option("--output_file", default = NA, type = 'character', help = "Output VCF file"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+vcf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = FALSE, col_types = cols(.default = col_character())) %>%
+      readr::type_convert() %>%
+      dplyr::filter(!grepl("SUPP_VEC=110", X8, fixed = TRUE)) %>%
+      dplyr::mutate(X3 = X12) %>%
+      dplyr::mutate(X3 = unlist(lapply(X3, function(x) { unlist(strsplit(x, split = ":", fixed = TRUE))[8] }))) %>%
+      dplyr::mutate(X3 = gsub(pattern = "_", replacement = ":", x = X3, fixed = TRUE)) %>%
+      dplyr::mutate(X5 = case_when(
+	      grepl("DUP", X3, fixed = TRUE) ~ "<DUP:TANDEM>",
+	      grepl("DEL", X3, fixed = TRUE) ~ "<DEL>",
+	      grepl("INV", X3, fixed = TRUE) ~ "<INV>",
+	      TRUE ~ X5
+      )) %>%
+      dplyr::mutate(X8 = case_when(
+	      grepl("DUP", X3, fixed = TRUE) ~ gsub("SVTYPE=INV", "SVTYPE=DUP", X8),
+	      grepl("DEL", X3, fixed = TRUE) ~ gsub("SVTYPE=INV", "SVTYPE=DEL", X8),
+	      TRUE ~ X8
+      )) %>%
+      dplyr::rename(`#CHROM` = X1,
+		    POS = X2,
+		    ID = X3,
+		    REF = X4,
+		    ALT = X5,
+		    QUAL = X6,
+		    FILTER = X7,
+		    INFO = X8,
+		    FORMAT = X9,
+		    SVABA = X10,
+		    GRIDSS = X11,
+		    MANTA = X12)
+
+readr::write_tsv(x = vcf, path = as.character(opt$output_file), append = TRUE, col_names = TRUE)
+
+
diff --git a/scripts/get_basecounts.R b/scripts/get_basecounts.R
new file mode 100644
index 00000000..f65fae14
--- /dev/null
+++ b/scripts/get_basecounts.R
@@ -0,0 +1,29 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "Which option?"),
+		  make_option("--sample_name", default = NA, type = 'character', help = "sample name"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	data = list()
+	for (i in 1:length(sample_names)) {
+		data[[i]] = readr::read_tsv(file = paste0("gbc/", sample_names[i], ".txt.gz"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert() %>%
+			    dplyr::mutate(sample_name = sample_names[i])
+	}
+	data = do.call(bind_rows, data)
+	readr::write_tsv(x = data, path = "gbc/summary.txt", append = FALSE, col_names = TRUE)
+}
+
diff --git a/scripts/gzipLogs.sh b/scripts/gzipLogs.sh
deleted file mode 100644
index 05cdf2cc..00000000
--- a/scripts/gzipLogs.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-# gzip old log files
-LOCK=~/.gzip_lock
-if ! mkdir $LOCK 2> /dev/null; then
-    echo "log gzip script is already running"
-    exit 1
-fi
-find /ifs/e63data/reis-filho/data /ifs/e63data/reis-filho/projects/ -name '*.log' -mtime +5 -exec gzip {} \;
-rmdir $LOCK
diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R
new file mode 100644
index 00000000..d0ff2e03
--- /dev/null
+++ b/scripts/hr_detect.R
@@ -0,0 +1,237 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("signature.tools.lib"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"),
+		  make_option("--sample_name", default = NA, type = 'character', help = "sample name"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	      dplyr::filter(CHROM %in% c(1:22, "X")) %>%
+	      dplyr::mutate(CHROM = case_when(
+		      CHROM == "X" ~ "23",
+		      TRUE ~ CHROM
+	      )) %>%
+	      readr::type_convert() %>%
+	      dplyr::arrange(CHROM, POS) %>%
+	      dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+	      dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>%
+	      dplyr::filter(variantCaller == "mutect") %>%
+	      dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>%
+	      dplyr::mutate(CHROM = as.character(CHROM)) %>%
+	      dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>%
+	      dplyr::mutate(QUAL = 100,
+			    FILTER = "PASS",
+			    INFO = ".") %>%
+	      dplyr::select(`#CHROM` = CHROM,
+			    POS = POS,
+			    ID = ID,
+			    REF = REF,
+			    ALT = ALT,
+			    QUAL = QUAL,
+			    FILTER = FILTER,
+			    INFO = INFO)
+	cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), append = FALSE)
+	readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), col_names = TRUE, append = TRUE)
+
+} else if (as.numeric(opt$option) == 2) {
+	vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	      dplyr::filter(CHROM %in% c(1:22, "X")) %>%
+	      dplyr::mutate(CHROM = case_when(
+		      CHROM == "X" ~ "23",
+		      TRUE ~ CHROM
+	      )) %>%
+	      readr::type_convert() %>%
+	      dplyr::arrange(CHROM, POS) %>%
+	      readr::type_convert() %>%
+	      dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+	      dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>%
+	      dplyr::filter(grepl("varscan", variantCaller, fixed = TRUE)) %>%
+	      dplyr::filter(grepl("strelka", variantCaller, fixed = TRUE)) %>%
+	      dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>%
+	      dplyr::mutate(CHROM = as.character(CHROM)) %>%
+	      dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>%
+	      dplyr::mutate(QUAL = 100,
+			    FILTER = "PASS",
+			    INFO = ".") %>%
+	      dplyr::select(`#CHROM` = CHROM,
+			    POS = POS,
+			    ID = ID,
+			    REF = REF,
+			    ALT = ALT,
+			    QUAL = QUAL,
+			    FILTER = FILTER,
+			    INFO = INFO)
+	cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), append = FALSE)
+	readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), col_names = TRUE, append = TRUE)
+
+} else if (as.numeric(opt$option) == 3) {
+	cn = readr::read_tsv(file = paste0("facets/cncf/", as.character(opt$sample_name), ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	     readr::type_convert() %>%
+     	     dplyr::mutate(chrom = as.character(chrom)) %>%
+	     dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>%
+	     dplyr::mutate(seg_no = seg,
+			   Chromosome = chrom,
+			   chromStart = loc.start,
+			   chromEnd = loc.end,
+			   total.copy.number.inNormal = 2,
+			   minor.copy.number.inNormal = 1,
+			   total.copy.number.inTumour = tcn.em,
+			   minor.copy.number.inTumour = lcn.em) %>%
+	     dplyr::mutate(total.copy.number.inTumour = case_when(
+		     		is.na(total.copy.number.inTumour) ~ 2,
+		     		TRUE ~ total.copy.number.inTumour
+	     )) %>%
+	     dplyr::mutate(minor.copy.number.inTumour = case_when(
+		     		is.na(minor.copy.number.inTumour) ~ 2,
+		     		TRUE ~ minor.copy.number.inTumour
+	     )) %>%
+	     dplyr::select(seg_no,
+			   Chromosome,
+			   chromStart,
+			   chromEnd,
+			   total.copy.number.inNormal,
+			   minor.copy.number.inNormal,
+			   total.copy.number.inTumour,
+			   minor.copy.number.inTumour)
+	     
+	readr::write_tsv(x = cn, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt"), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option) == 4) {
+	sv = readr::read_tsv(file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	     readr::type_convert() %>%
+	     dplyr::filter(chrom1 %in% c(1:22, "X")) %>%
+	     dplyr::filter(chrom2 %in% c(1:22, "X")) %>%
+	     dplyr::mutate(svclass = case_when(
+		     svclass == "BND" ~ "translocation",
+		     svclass == "TRA" ~ "translocation",
+		     svclass == "DEL" ~ "deletion",
+		     svclass == "DUP" ~ "tandem-duplication",
+		     svclass == "INS" ~ "insertion",
+		     svclass == "INV" ~ "inversion",
+	     	     TRUE ~ svclass
+	     )) %>%
+     	     dplyr::mutate(sample = as.character(opt$sample_name))
+	     
+	readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE)
+
+	
+} else if (as.numeric(opt$option) == 5) {
+	url_subs_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv_repaired.vcf.bgz")
+	url_indels_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel_repaired.vcf.bgz")
+	url_cn_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt")
+	url_sv_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe")
+
+	genomePlot(subsVcf.file = url_subs_file,
+		   indelsVcf.file = url_indels_file,
+		   cnvsTab.file = url_cn_file,
+		   rearrBedpe.file = url_sv_file, 
+		   sampleID = as.character(opt$sample_name),
+		   genome.v = "hg19", file.ideogram = NULL, plot_title = NULL, 
+		   no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, 
+           	   no_subs_legend = FALSE, out_format = "png",
+		   out_path = paste0("hr_detect/", as.character(opt$sample_name), "/"), 
+		   rearr_only_assembled = FALSE, base.per.unit = NULL)
+	
+} else if (as.numeric(opt$option) == 6) {
+	url_subs_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv_repaired.vcf.bgz")
+	url_indels_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel_repaired.vcf.bgz")
+	url_cn_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt")
+	url_sv_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe")
+
+	genomePlot(subsVcf.file = url_subs_file,
+		   indelsVcf.file = url_indels_file,
+		   cnvsTab.file = url_cn_file,
+		   rearrBedpe.file = url_sv_file, 
+		   sampleID = as.character(opt$sample_name),
+		   genome.v = "hg19", file.ideogram = NULL, plot_title = NULL, 
+		   no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, 
+           	   no_subs_legend = FALSE, out_format = "svg",
+		   out_path = paste0("hr_detect/", as.character(opt$sample_name), "/"), 
+		   rearr_only_assembled = FALSE, base.per.unit = NULL)
+	
+} else if (as.numeric(opt$option) == 7) {
+	sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf.bgz") }))
+	indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") }))
+	cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") }))
+	sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") }))
+	
+	names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names
+	
+	res =  HRDetect_pipeline(genome.v = "hg19",
+				 SNV_vcf_files = snv_files,
+				 SV_bedpe_files = sv_files,
+				 Indels_vcf_files = indel_files,
+				 CNV_tab_files = cn_files,
+				 SNV_signature_version = "COSMICv2",
+				 nparallel = 4)
+	
+	readr::write_tsv(x = res$hrdetect_output %>%
+			     as.data.frame() %>%
+			     tibble::rownames_to_column(var = "sample_name") %>%
+			     dplyr::as_tibble(),
+			 file = "hr_detect/hrdetect_smry.txt", append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option) == 8) {
+	sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf.bgz") }))
+	indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") }))
+	cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") }))
+	sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") }))
+	
+	names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names
+	
+	res =  signatureFit_pipeline(genome.v = "hg19",
+				     SNV_vcf_files = snv_files,
+				     nparallel = 4)
+	
+	signatures_to_use = c("SBS1", "SBS2", "SBS3", "SBS4", "SBS6", "SBS7a", "SBS7c", "SBS8",
+			      "SBS9", "SBS10a", "SBS10d", "SBS11", "SBS13", "SBS14", "SBS15",
+			      "SBS18", "SBS20", "SBS22", "SBS24", "SBS26", "SBS30", "SBS31",
+			      "SBS32", "SBS35", "SBS38", "SBS44", "SBS84", "SBS87", "SBS88",
+			      "SBS90", "SBS94", "SBS95", "SBS96", "SBS97", "SBS104", "SBS105",
+			      "SBS107", "SBS108", "SBS109", "SBS110", "SBS111", "SBS112",
+			      "SBS113", "SBS119", "SBS129", "SBS137")
+	
+	tags_to_use = c("Deamination (Age)", "Deamination (APOBEC)", "HR deficiency", "Tobacco", "MMR deficiency",
+			"UV exposure", "UV exposure", "HR deficiency", "Lymphoma", "POLE deficiency", "POLD deficiency",
+			"Temozolomide-1,2-DMH", "Deamination (APOBEC)", "MMR deficiency (POLE deficiency)", "MMR deficiency",
+			"BER deficiency", "MMR deficiency (POLD deficiency)", "AAI", "Aflatoxin", "MMR deficiency",
+			"BER deficiency", "Platinum", "Azathioprine", "Platinum", "Similar to UV", "MMR deficiency",
+			"AID", "Deamination (Thiopurine)", "Colibactin", "Duocarmycin", "Similar to tobacco", "Deamination",
+			"Deamination", "MMR deficiency", "Platinum-related", "Deamination", "Similar to tobacco", "BER deficiency",
+			"Similar to tobacco", "Similar to AAI", "Platinum-related", "Platinum-related", "AAI", "Temozolomide-1,2-DMH",
+			"Similar to UV", "Similar to UV")
+
+	res = res$fitResults$exposures %>%
+	      as.data.frame() %>%
+	      tibble::rownames_to_column(var = "sample_name") %>%
+	      reshape2::melt(id.vars = "sample_name", variable.name = "signature", value.name = "exposure") %>%
+	      dplyr::filter(signature %in% signatures_to_use) %>%
+	      dplyr::mutate(exposure = case_when(
+		      is.na(exposure) ~ 0,
+		      TRUE ~ exposure
+	      )) %>%
+	      dplyr::group_by(sample_name) %>%
+	      dplyr::summarize(signature = signature,
+			       exposure = exposure/sum(exposure)) %>%
+ 	      dplyr::ungroup() %>%
+	      dplyr::left_join(dplyr::tibble(signature = signatures_to_use,
+					     description = tags_to_use), by = "signature")
+	
+	
+	readr::write_tsv(x = res, file = "hr_detect/signatures_smry.txt", append = FALSE, col_names = TRUE)
+
+}
diff --git a/scripts/immunedeconv.R b/scripts/immunedeconv.R
new file mode 100644
index 00000000..b5497b5f
--- /dev/null
+++ b/scripts/immunedeconv.R
@@ -0,0 +1,57 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("immunedeconv"))
+
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option('--option', type = 'character', default = NA, help = 'Immune deconv algorithm'),
+               make_option('--input_file', type = 'character', default = NA, help = 'Expression input file'),
+	       make_option('--output_file', type = 'character', default = NA, help = 'Immune cell output file'))
+parser = OptionParser(usage = "%prog",  option_list=optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+set_cibersort_binary("~/share/lib/resource_files/CIBERSORT/CIBERSORT.R")
+set_cibersort_mat("~/share/lib/resource_files/CIBERSORT/LM22.txt")
+
+if (as.numeric(opt$option)==1) {
+	tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		      readr::type_convert() %>%
+		      dplyr::arrange(gene_symbol)
+	gene_expr = tpm_by_gene %>%
+		    dplyr::select(-gene_symbol) %>%
+		    as.matrix()
+	rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]]
+	quantiseq = immunedeconv::deconvolute(gene_expression = gene_expr, method = "quantiseq", scale_mrna = FALSE)
+	readr::write_tsv(x = quantiseq, file = opt$output_file, col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==2) {
+	tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		      readr::type_convert() %>%
+		      dplyr::arrange(gene_symbol)
+	gene_expr = tpm_by_gene %>%
+		    dplyr::select(-gene_symbol) %>%
+		    as.matrix()
+	rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]]
+	mcpcounter = immunedeconv::deconvolute(gene_expression = gene_expr, method = "mcp_counter", scale_mrna = FALSE)
+	readr::write_tsv(x = mcpcounter, file = opt$output_file, col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==3) {
+		tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		      readr::type_convert() %>%
+		      dplyr::arrange(gene_symbol)
+	gene_expr = tpm_by_gene %>%
+		    dplyr::select(-gene_symbol) %>%
+		    as.matrix()
+	rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]]
+	cibersort = immunedeconv::deconvolute(gene_expression = gene_expr, method = "cibersort_abs", scale_mrna = FALSE)
+	readr::write_tsv(x = cibersort, file = opt$output_file, col_names = TRUE, append = FALSE)
+
+}
diff --git a/scripts/initProject.pl b/scripts/init_project.pl
similarity index 77%
rename from scripts/initProject.pl
rename to scripts/init_project.pl
index 04ed5d37..06d15290 100644
--- a/scripts/initProject.pl
+++ b/scripts/init_project.pl
@@ -22,7 +22,3 @@
 unless (-e "summary_config.yaml") {
     copy("modules/default_yaml/summary_config.yaml", "summary_config.yaml") or die "Unable to create summary_config.yaml: $!";
 }
-
-# unless (-e "sample_attr.yaml") {
-#    copy("modules/default_yaml/sample_attr.yaml", "sample_attr.yaml") or die "Unable to create sample_attr.yaml: $!";
-# }
diff --git a/scripts/joinEff.pl b/scripts/join_eff.pl
similarity index 97%
rename from scripts/joinEff.pl
rename to scripts/join_eff.pl
index bc0b1a6b..2779407b 100644
--- a/scripts/joinEff.pl
+++ b/scripts/join_eff.pl
@@ -1,5 +1,4 @@
 #!/usr/bin/env perl
-# join EFF lines
 
 use strict;
 use List::MoreUtils qw(first_index indexes);
diff --git a/scripts/knit.R b/scripts/knit.R
index c63e70e4..f6c77bf3 100644
--- a/scripts/knit.R
+++ b/scripts/knit.R
@@ -12,7 +12,6 @@ input <- args[1]
 outPrefix <- args[2]
 args <- args[c(-1,-2)]
 
-#create output dirs
 figPath <- file.path(outPrefix, 'figure/')
 cachePath <- file.path(outPrefix, 'cache/')
 dir.create(figPath, showWarnings = F, recursive = T)
diff --git a/scripts/medicc2.R b/scripts/medicc2.R
new file mode 100644
index 00000000..4289f47c
--- /dev/null
+++ b/scripts/medicc2.R
@@ -0,0 +1,74 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("reshape2"))
+suppressPackageStartupMessages(library("copynumber"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "Which option?"),
+		  make_option("--tumor_sample_name", default = NA, type = 'character', help = "Tumor sample name"),
+		  make_option("--normal_sample_name", default = NA, type = 'character', help = "Normal sample name"),
+		  make_option("--file_in", default = NA, type = 'character', help = "Input file name including path"),
+		  make_option("--file_out", default = NA, type = 'character', help = "Output file name including path"))
+				  
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	load(as.character(opt$file_in))
+	cn_df = out2$jointseg %>%
+		dplyr::as_tibble() %>%
+		dplyr::select(Chromosome = chrom,
+			      Position = maploc,
+			      Log2_Ratio = cnlr)
+	readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE)
+
+} else if (as.numeric(opt$option) == 2) {
+	tumor_sample_names = unlist(strsplit(x = as.character(opt$tumor_sample_name), split = " ", fixed = TRUE))
+	normal_sample_name = unlist(strsplit(x = as.character(opt$normal_sample_name), split = " ", fixed = TRUE))
+	cn_df = list()
+	for (i in 1:length(tumor_sample_names)) {
+		data_ = readr::read_tsv(file = paste0("medicc2/", tumor_sample_names[i], "/", tumor_sample_names[i], ".txt"),
+					col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			readr::type_convert()
+		colnames(data_) = c("Chromosome", "Position", paste0(tumor_sample_names[i], "_Log2_Ratio"))
+		cn_df[[i]] = data_ %>%
+			     reshape2::melt(id.vars = c("Chromosome", "Position"))
+	}
+	cn_df = do.call(bind_rows, cn_df) %>%
+		reshape2::dcast(Chromosome + Position ~ variable, value.var = "value", fill = 0)
+	readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option) == 3) {
+	tumor_sample_names = unlist(strsplit(x = as.character(opt$tumor_sample_name), split = " ", fixed = TRUE))
+	normal_sample_name = unlist(strsplit(x = as.character(opt$normal_sample_name), split = " ", fixed = TRUE))
+	cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		readr::type_convert() %>%
+		as.data.frame()
+	cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 2.5, k = 40, verbose = FALSE)
+	cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 40, normalize = FALSE, fast = FALSE, verbose = FALSE)
+	
+	total_copies = cn_segmented %>%
+		       dplyr::select(c("chrom", "start.pos", "end.pos", contains("Log2_Ratio"))) %>%
+		       dplyr::rename(start = start.pos, end = end.pos) %>%
+		       reshape2::melt(id.vars = c("chrom", "start", "end")) %>%
+		       dplyr::select(sample_id = variable,
+				     chrom, start, end, nAB = value) %>%
+		       dplyr::mutate(sample_id = gsub(pattern = "_Log2_Ratio", replacement = "", x = sample_id, fixed = TRUE)) %>%
+		       dplyr::left_join(readr::read_tsv(file = "facets/summary/summary.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       				dplyr::select(sample_id = tumorName, purity, ploidy),
+					by = "sample_id") %>%
+		       readr::type_convert() %>%
+		       dplyr::mutate(nAB = ((2^nAB)*((purity*ploidy) + (2*(1-purity))) - 2*(1-purity))/purity) %>%
+		       dplyr::mutate(nAB = round(nAB)) %>%
+		       dplyr::select(-purity, -ploidy)
+	
+	readr::write_tsv(x = total_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE)
+}
diff --git a/scripts/mimsi.R b/scripts/mimsi.R
new file mode 100644
index 00000000..aebd765a
--- /dev/null
+++ b/scripts/mimsi.R
@@ -0,0 +1,29 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"),
+		  make_option("--sample_names", default = NA, type = 'character', help = "sample name"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	smry = list()
+	for (i in 1:length(sample_names)) {
+		smry[[i]] = readr::read_tsv(file = paste0("mimsi/", sample_names[i], "/", sample_names[i], ".txt"),
+					    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert()
+	}
+	smry = do.call(rbind, smry)
+	write_tsv(smry, path="mimsi/summary.txt", append = FALSE, col_names = TRUE)
+	
+}
diff --git a/scripts/monitorMySQL.sh b/scripts/monitorMySQL.sh
index 898c070b..6fb2b6cf 100755
--- a/scripts/monitorMySQL.sh
+++ b/scripts/monitorMySQL.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+
 UP=$(pgrep -u limr mysqld | wc -l);
 if [ "$UP" -ne 1 ];
 then
diff --git a/scripts/monitorGfServer.sh b/scripts/monitor_gfserver.sh
similarity index 99%
rename from scripts/monitorGfServer.sh
rename to scripts/monitor_gfserver.sh
index 6c552afd..376bb8df 100644
--- a/scripts/monitorGfServer.sh
+++ b/scripts/monitor_gfserver.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+
 UP=$(pgrep -u limr gfServer | wc -l);
 if [ "$UP" -ne 1 ];
 then
diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R
new file mode 100644
index 00000000..0c79350e
--- /dev/null
+++ b/scripts/pyclone_13.R
@@ -0,0 +1,268 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("fuzzyjoin"))
+suppressPackageStartupMessages(library("reshape2"))
+suppressPackageStartupMessages(library("ComplexHeatmap"))
+suppressPackageStartupMessages(library("RColorBrewer"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_set", default = NA, type = 'character', help = "sample set"),
+	       make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"),
+	       make_option("--input_file", default = NA, type = 'character', help = "input file"),
+	       make_option("--output_file", default = NA, type = 'character', help = "output file"),
+	       make_option("--num_iter", default = NA, type = 'character', help = "mcmc iterations"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_"))
+	normal_sample = as.character(opt$normal_sample)
+	sample_set = setdiff(sample_set, normal_sample)
+	pyclone = list()
+	for (i in 1:length(sample_set)) {
+		sufam = readr::read_tsv(file = paste0("pyclone_13/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			readr::type_convert() %>%
+			dplyr::select(Chromosome = chrom,
+				      Position = pos,
+				      Reference_Allele = val_ref,
+				      Alternate_Allele = val_alt,
+				      t_depth = cov,
+				      t_alt_count = val_al_count) %>%
+		 	dplyr::mutate(t_ref_count = t_depth - t_alt_count) %>%
+			dplyr::mutate(mutation_id = paste0(Chromosome, ":", Position, ":", Reference_Allele, ":", Alternate_Allele),
+				      ref_counts = t_ref_count,
+				      var_counts = t_alt_count,
+				      normal_cn = 2)
+		
+		facets = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>%
+			 dplyr::mutate(Chromosome = chrom,
+				       Start_Position = loc.start,
+				       End_Position = loc.end,
+				       minor_cn = ifelse(is.na(lcn.em), "0", lcn.em),
+				       major_cn = tcn.em) %>%
+		 readr::type_convert() %>%
+		 dplyr::mutate(major_cn = major_cn - minor_cn) %>%
+		 dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn)
+		 
+		pyclone[[i]] = sufam %>%
+		  	       dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>%
+			       dplyr::mutate(Start_Position = Position,
+					     End_Position = Position +1) %>%
+			       readr::type_convert() %>%
+			       fuzzyjoin::genome_left_join(facets %>%
+							   dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>%
+							   readr::type_convert(),
+							   by = c("Chromosome", "Start_Position", "End_Position")) %>%
+			       dplyr::mutate(sample_id = sample_set[i]) %>%
+			       dplyr::select(mutation_id, sample_id, ref_counts, var_counts, normal_cn, major_cn, minor_cn)
+		
+	}
+	pyclone = do.call(rbind, pyclone) %>%
+		  dplyr::filter(!is.na(ref_counts)) %>%
+		  dplyr::filter(!is.na(var_counts)) %>%
+		  dplyr::mutate(var_counts = ifelse(var_counts<=1, 0, var_counts)) %>%
+		  dplyr::filter(!is.na(major_cn)) %>%
+		  dplyr::filter(major_cn != 0) %>%
+		  dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn))
+	
+	smry = pyclone %>%
+	       dplyr::group_by(mutation_id) %>%
+	       dplyr::summarize(n_x = n(),
+			        n_y = sum(var_counts)) %>%
+	       dplyr::ungroup()
+	
+	pyclone = pyclone %>%
+		  dplyr::left_join(smry, by = "mutation_id") %>%
+		  dplyr::filter(n_x == length(sample_set)) %>%
+		  dplyr::filter(n_y > 0)
+	
+	for (i in 1:length(sample_set)) {
+		pyclone_ft = pyclone %>%
+			     dplyr::filter(sample_id == sample_set[i]) %>%
+			     dplyr::select(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn)
+		readr::write_tsv(x = pyclone_ft, file = paste0("pyclone_13/", opt$sample_set, "/", sample_set[i], ".tsv"), append = FALSE, col_names = TRUE)
+	}
+	
+} else if (as.numeric(opt$option) == 2) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_"))
+	normal_sample = as.character(opt$normal_sample)
+	sample_set = setdiff(sample_set, normal_sample)
+	params = list()
+	for (i in 1:length(sample_set)) {
+		params[[i]] = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>%
+			      readr::type_convert() %>%
+			      dplyr::filter(grepl("# Purity", X1)) %>%
+			      dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>%
+			      readr::type_convert() %>%
+			      .[["X1"]]
+	}
+	cat(paste0("num_iters: ", as.numeric(opt$num_iter), "\n\n"), file = as.character(opt$output_file), append = FALSE)
+	cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  alpha: 1\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  beta: 1\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("concentration:\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  value: 1.0\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  prior:\n", file = as.character(opt$output_file), append = TRUE)
+	cat("    shape: 1.0\n", file = as.character(opt$output_file), append = TRUE)
+	cat("    rate: 1.0\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+    	cat("density: pyclone_beta_binomial\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("beta_binomial_precision_params:\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  value: 1000\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  prior:\n", file = as.character(opt$output_file), append = TRUE)
+  	cat("    shape: 10\n", file = as.character(opt$output_file), append = TRUE)
+	cat("    rate: 10\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("  proposal:\n", file = as.character(opt$output_file), append = TRUE)
+	cat("    precision: 0.1\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("working_dir: pyclone_13/", file = as.character(opt$output_file), append = TRUE)
+	cat(as.character(opt$sample_set), file = as.character(opt$output_file), append = TRUE)
+	cat("\n\n", file = as.character(opt$output_file), append = TRUE)
+	cat("trace_dir: trace\n", file = as.character(opt$output_file), append = TRUE)
+	cat("init_method: connected\n", file = as.character(opt$output_file), append = TRUE)
+	cat("\n", file = as.character(opt$output_file), append = TRUE)
+	cat("samples:\n", file = as.character(opt$output_file), append = TRUE)
+	for (i in 1:length(sample_set)) {
+		cat(paste0("  ", sample_set[i], ":\n"), file = as.character(opt$output_file), append = TRUE)
+		cat(paste0("    mutations_file: pyclone_13/", as.character(opt$sample_set), "/", sample_set[i], ".yaml\n\n"), file = as.character(opt$output_file), append = TRUE)
+		cat("    tumour_content:\n", file = as.character(opt$output_file), append = TRUE)
+		cat(paste0("      value: ", params[[i]], "\n"), file = as.character(opt$output_file), append = TRUE)
+		cat("\n", file = as.character(opt$output_file), append = TRUE)
+		cat("    error_rate: 0.01\n", file = as.character(opt$output_file), append = TRUE)
+		cat("\n", file = as.character(opt$output_file), append = TRUE)
+	}
+} else if (as.numeric(opt$option) == 3) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " "))
+	pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		  readr::type_convert() %>%
+		  dplyr::arrange(mutation_id)
+	
+	pyclone_ft = list()
+	index = 1
+	for (i in 1:(length(sample_set)-1)) {
+		for (j in (i+1):length(sample_set)) {
+			pyclone_ft[[index]] = pyclone %>%
+					      dplyr::filter(sample_id == sample_set[i]) %>%
+					      dplyr::select(mutation_id,
+							    cluster_id,
+							    sample_id_x = sample_id,
+							    cellular_prevalence_x = cellular_prevalence) %>%
+					      dplyr::full_join(pyclone %>%
+							       dplyr::filter(sample_id == sample_set[j]) %>%
+							       dplyr::select(mutation_id,
+									     sample_id_y = sample_id,
+									     cellular_prevalence_y = cellular_prevalence),
+							       by = "mutation_id") %>%
+					      readr::type_convert()
+			index = index + 1
+		}
+	}
+	pyclone_ft = do.call(bind_rows, pyclone_ft) %>%
+		     readr::type_convert() %>%
+		     dplyr::filter(!is.na(cellular_prevalence_x)) %>%
+		     dplyr::filter(!is.na(cellular_prevalence_y)) %>%
+		     dplyr::mutate(sample_id_x = factor(sample_id_x, levels = sample_set, ordered = TRUE)) %>%
+		     dplyr::mutate(sample_id_y = factor(sample_id_y, levels = sample_set, ordered = TRUE))
+	
+	smry_ = pyclone_ft %>%
+		dplyr::group_by(mutation_id) %>%
+		dplyr::summarize(cluster_id = unique(cluster_id)) %>%
+		dplyr::ungroup() %>%
+		dplyr::group_by(cluster_id) %>%
+		dplyr::summarize(n = n())
+	
+	pyclone_ft = pyclone_ft %>%
+		     dplyr::left_join(smry_, by = "cluster_id")
+	
+	colourCount = nrow(smry_)
+	getPalette = colorRampPalette(brewer.pal(9, "Set1"))
+		
+	plot_ = pyclone_ft %>%
+		ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) +
+		geom_point(stat = "identity", alpha = .75, shape = 21) +
+		scale_color_manual(values = getPalette(colourCount)) +
+		xlab("\n\nCCF (%)\n") +
+		ylab("\nCCF (%)\n\n") +
+		guides(color = guide_legend(title = "Cluster", override.aes = list(shape = 19)),
+		       size = guide_legend(title = "N")) +
+		facet_wrap(~sample_id_x+sample_id_y)
+	
+	pdf(file = as.character(opt$output_file), width = 18, height = 18)
+	print(plot_)
+	dev.off()
+	
+} else if (as.numeric(opt$option) == 4) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " "))
+	pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		  readr::type_convert() %>%
+		  dplyr::mutate(sample_id = paste0(sample_id, "     "))
+	
+	pyclone_mt = pyclone %>%
+		     reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>%
+		     dplyr::left_join(pyclone %>%
+				      dplyr::select(mutation_id, cluster_id) %>%
+				      dplyr::filter(!duplicated(mutation_id)), by = "mutation_id")
+	
+	smry_ = pyclone %>%
+		dplyr::group_by(cluster_id) %>%
+		dplyr::summarize(cluster_mean = mean(cellular_prevalence)) %>%
+		dplyr::ungroup()
+	
+	pyclone_mt = pyclone_mt %>%
+		     dplyr::left_join(smry_, by = "cluster_id")
+	
+	index = pyclone_mt %>%
+		dplyr::select(-mutation_id, -cluster_id, -cluster_mean) %>%
+		apply(., 1, mean)
+	
+	pyclone_mt = pyclone_mt %>%
+		     dplyr::mutate(index = index) %>%
+		     dplyr::arrange(desc(cluster_mean), desc(cluster_id), desc(index))
+	
+	cp = c("#f0f0f0","#c6dbef","#9ecae1","#6baed6","#4292c6","#2171b5","#08519c","#08519c","#08306b","#08306b","#08306b")
+	ca = colorRampPalette(brewer.pal(9, "Set1"))(nrow(smry_))
+	names(ca) = smry_ %>% .[["cluster_id"]]
+	
+	ha = rowAnnotation(
+		`Cluster ID` = pyclone_mt %>% .[["cluster_id"]],
+		col = list(`Cluster ID` = ca),
+		simple_anno_size = unit(7, "mm")
+	)
+	
+	pdf(file = as.character(opt$output_file), width = 12, height = 18)
+	draw(Heatmap(matrix = pyclone_mt %>%
+		         dplyr::select(-mutation_id, -cluster_id, -cluster_mean, -index),
+		col = cp,
+		name = "CCF",
+		na_col = "#f0f0f0",
+		border = "white",
+		border_gp = gpar(lwd = 0),
+		cluster_rows = TRUE,
+		show_row_dend = FALSE,
+		cluster_row_slices = TRUE,
+		cluster_columns = TRUE,
+		show_column_dend = FALSE,
+		use_raster = FALSE,
+	        left_annotation = ha,
+	        row_split = pyclone_mt %>% .[["cluster_id"]],
+	        width = unit(20, "cm"),
+	        height = unit(40, "cm")))
+	dev.off()
+
+}
diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R
new file mode 100644
index 00000000..831ce0da
--- /dev/null
+++ b/scripts/pyclone_vi.R
@@ -0,0 +1,230 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("fuzzyjoin"))
+suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("reshape2"))
+suppressPackageStartupMessages(library("ComplexHeatmap"))
+suppressPackageStartupMessages(library("RColorBrewer"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_set", default = NA, type = 'character', help = "sample set"),
+	       make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"),
+	       make_option("--input_file", default = NA, type = 'character', help = "input file"),
+	       make_option("--output_file", default = NA, type = 'character', help = "output file"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option) == 1) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_"))
+	normal_sample = as.character(opt$normal_sample)
+	sample_set = setdiff(sample_set, normal_sample)
+	pyclone = list()
+	for (i in 1:length(sample_set)) {
+		sufam = readr::read_tsv(file = paste0("pyclone_vi/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			readr::type_convert() %>%
+			dplyr::select(Chromosome = chrom,
+				      Position = pos,
+				      Reference_Allele = val_ref,
+				      Alternate_Allele = val_alt,
+				      t_depth = cov,
+				      t_alt_count = val_al_count) %>%
+		 	dplyr::mutate(t_ref_count = t_depth - t_alt_count) %>%
+			dplyr::mutate(mutation_id = paste0(Chromosome, ":", Position, ":", Reference_Allele, ":", Alternate_Allele),
+				      ref_counts = t_ref_count,
+				      alt_counts = t_alt_count,
+				      normal_cn = 2)
+		
+		facets = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			 dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>%
+			 dplyr::mutate(Chromosome = chrom,
+				       Start_Position = loc.start,
+				       End_Position = loc.end,
+				       minor_cn = lcn.em,
+				       major_cn = tcn.em) %>%
+		 	 readr::type_convert() %>%
+			 dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>%
+			 dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), major_cn, minor_cn)) %>%
+		 	 dplyr::mutate(major_cn = major_cn - minor_cn) %>%
+			 dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn)
+		 
+		pyclone[[i]] = sufam %>%
+		  	       dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>%
+			       dplyr::mutate(Start_Position = Position,
+					     End_Position = Position +1) %>%
+			       readr::type_convert() %>%
+			       fuzzyjoin::genome_left_join(facets %>%
+							   dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>%
+							   readr::type_convert(),
+							   by = c("Chromosome", "Start_Position", "End_Position")) %>%
+			       dplyr::mutate(sample_id = sample_set[i]) %>%
+			       dplyr::select(mutation_id, sample_id, ref_counts, alt_counts, normal_cn, major_cn, minor_cn)
+	
+		params = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>%
+			 readr::type_convert() %>%
+			 dplyr::filter(grepl("# Purity", X1)) %>%
+			 dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>%
+			 readr::type_convert() %>%
+			 .[["X1"]]
+		
+		parame = ifelse(is.na(params), .1, params)
+		
+		pyclone[[i]] = pyclone[[i]] %>%
+			       dplyr::mutate(tumour_content = params)
+	}
+	pyclone = do.call(rbind, pyclone) %>%
+		  dplyr::filter(!is.na(ref_counts)) %>%
+		  dplyr::filter(!is.na(alt_counts)) %>%
+		  dplyr::mutate(alt_counts = ifelse(alt_counts<=1, 0, alt_counts)) %>%
+		  dplyr::mutate(major_cn = ifelse(is.na(major_cn), 1, major_cn)) %>%
+		  dplyr::mutate(major_cn = ifelse(major_cn==0, 1, major_cn)) %>%
+		  dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn))
+	
+	smry = pyclone %>%
+	       dplyr::group_by(mutation_id) %>%
+	       dplyr::summarize(n_x = n(),
+			        n_y = sum(alt_counts)) %>%
+	       dplyr::ungroup()
+	
+	pyclone = pyclone %>%
+		  dplyr::left_join(smry, by = "mutation_id") %>%
+		  dplyr::filter(n_x == length(sample_set)) %>%
+		  dplyr::filter(n_y > 0)
+	
+	readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option) == 2) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " "))
+	pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		  readr::type_convert()
+	
+	pyclone_ft = list()
+	index = 1
+	for (i in 1:(length(sample_set)-1)) {
+		for (j in (i+1):length(sample_set)) {
+			pyclone_ft[[index]] = pyclone %>%
+					      dplyr::filter(sample_id == sample_set[i]) %>%
+					      dplyr::select(mutation_id,
+							    cluster_id,
+							    sample_id_x = sample_id,
+							    cellular_prevalence_x = cellular_prevalence) %>%
+					      dplyr::full_join(pyclone %>%
+							       dplyr::filter(sample_id == sample_set[j]) %>%
+							       dplyr::select(mutation_id,
+									     sample_id_y = sample_id,
+									     cellular_prevalence_y = cellular_prevalence),
+							       by = "mutation_id") %>%
+					      readr::type_convert()
+			index = index + 1
+		}
+	}
+	pyclone_ft = do.call(bind_rows, pyclone_ft) %>%
+		     readr::type_convert() %>%
+		     dplyr::filter(!is.na(cellular_prevalence_x)) %>%
+		     dplyr::filter(!is.na(cellular_prevalence_y)) %>%
+		     dplyr::mutate(sample_id_x = factor(sample_id_x, levels = sample_set, ordered = TRUE)) %>%
+		     dplyr::mutate(sample_id_y = factor(sample_id_y, levels = sample_set, ordered = TRUE))
+	
+	smry_ = pyclone_ft %>%
+		dplyr::group_by(mutation_id) %>%
+		dplyr::summarize(cluster_id = unique(cluster_id)) %>%
+		dplyr::ungroup() %>%
+		dplyr::group_by(cluster_id) %>%
+		dplyr::summarize(n = n())
+	
+	pyclone_ft = pyclone_ft %>%
+		     dplyr::left_join(smry_, by = "cluster_id")
+	
+	colourCount = nrow(smry_)
+	getPalette = colorRampPalette(brewer.pal(9, "Set1"))
+		
+	plot_ = pyclone_ft %>%
+		ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) +
+		geom_point(stat = "identity", alpha = .75, shape = 21) +
+		scale_color_manual(values = getPalette(colourCount)) +
+		xlab("\n\nCCF (%)\n") +
+		ylab("\nCCF (%)\n\n") +
+		guides(color = guide_legend(title = "Cluster"),
+		       size = guide_legend(title = "N")) +
+		facet_wrap(~sample_id_x+sample_id_y)
+	
+	pdf(file = as.character(opt$output_file), width = 18, height = 18)
+	print(plot_)
+	dev.off()
+	
+} else if (as.numeric(opt$option) == 3) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " "))
+	pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		  readr::type_convert() %>%
+		  dplyr::mutate(sample_id = paste0(sample_id, "     "))
+	
+	pyclone_mt = pyclone %>%
+		     reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>%
+		     dplyr::left_join(pyclone %>%
+				      dplyr::select(mutation_id, cluster_id) %>%
+				      dplyr::filter(!duplicated(mutation_id)), by = "mutation_id")
+	
+	smry_ = pyclone %>%
+		dplyr::group_by(cluster_id) %>%
+		dplyr::summarize(cluster_mean = mean(cellular_prevalence)) %>%
+		dplyr::ungroup()
+	
+	pyclone_mt = pyclone_mt %>%
+		     dplyr::left_join(smry_, by = "cluster_id")
+	
+	index = pyclone_mt %>%
+		dplyr::select(-mutation_id, -cluster_id, -cluster_mean) %>%
+		apply(., 1, mean)
+	
+	pyclone_mt = pyclone_mt %>%
+		     dplyr::mutate(index = index) %>%
+		     dplyr::arrange(desc(cluster_mean), desc(cluster_id), desc(index))
+	
+	cp = c("#f0f0f0","#c6dbef","#9ecae1","#6baed6","#4292c6","#2171b5","#08519c","#08519c","#08306b","#08306b","#08306b")
+	ca = colorRampPalette(brewer.pal(9, "Set1"))(nrow(smry_))
+	names(ca) = smry_ %>% .[["cluster_id"]]
+	
+	ha = rowAnnotation(
+		`Cluster ID` = pyclone_mt %>% .[["cluster_id"]],
+		col = list(`Cluster ID` = ca),
+		simple_anno_size = unit(7, "mm")
+	)
+	
+	pdf(file = as.character(opt$output_file), width = 12, height = 18)
+	draw(Heatmap(matrix = pyclone_mt %>%
+		         dplyr::select(-mutation_id, -cluster_id, -cluster_mean, -index),
+		col = cp,
+		name = "CCF",
+		na_col = "#f0f0f0",
+		border = "white",
+		border_gp = gpar(lwd = 0),
+		cluster_rows = TRUE,
+		show_row_dend = FALSE,
+		cluster_row_slices = TRUE,
+		cluster_columns = TRUE,
+		show_column_dend = FALSE,
+		use_raster = FALSE,
+	        left_annotation = ha,
+	        row_split = pyclone_mt %>% .[["cluster_id"]],
+	        width = unit(20, "cm"),
+	        height = unit(40, "cm")))
+	dev.off()
+
+} else if (as.numeric(opt$option) == 4) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " "))
+	pyclone = list()
+	for (i in 1:length(sample_set)) {
+		pyclone[[i]] = readr::read_tsv(file = paste0("pyclone_vi/", sample_set[i], "/summary/by_loci.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		  	       readr::type_convert()
+	}
+	pyclone = do.call(bind_rows, pyclone)
+	readr::write_tsv(x = pyclone, file = "pyclone_vi/summary.txt", append = FALSE, col_names = TRUE)
+}
diff --git a/scripts/qmake.pl b/scripts/qmake.pl
index 89738127..20461017 100755
--- a/scripts/qmake.pl
+++ b/scripts/qmake.pl
@@ -6,24 +6,21 @@
 use Cwd;
 
 my $cwd = getcwd;
-#my $fin_email_addrs = "qmake.finished\@raylim.mm.st charlottekyng+qmake.finished\@gmail.com";
-#my $err_email_addrs = "qmake.error\@raylim.mm.st charlottekyng+qmake.error\@gmail.com";
-#my $start_email_addrs = "qmake.start\@raylim.mm.st charlottekyng+qmake.start\@gmail.com";
-
 my $err_slack = "pipeline_error";
 my $fin_slack = "pipeline_finished";
 
 my %slack_map = (
-    limr => "raylim",
-    debruiji => "debruiji",
-    brownd7 => "brownd7",
-    selenicp => "selenicp",
-    lees19 => "lees19",
-    ferrandl => "ferrandl",
-    dacruzpa => "dacruzpa"
+    brownd7 => "W013UH0HWUF",
+    selenicp => "W0142HA5LNA",
+    dacruzpa => "W01BT68MSSD",
+    parejaf => "W01BLNUF7J8",
+    zhuy1 => "W013UH382P9",
+    peix => "W0147TPN3E1",
+    issabhas => "U01V8R1RKQU",
+    xiaoy => "U01C8MPBSH5",
+    giacomf1 => "U06SW7W6D44"
 );
 
-
 sub HELP_MESSAGE {
     print "Usage: qmake.pl -n [name] -m -r [numAttempts]\n";
     print "-m: e-mail notifications\n";
@@ -37,8 +34,13 @@ sub HELP_MESSAGE {
 
 sub slack {
     my ($slack_channel, $slack_message) = @_;
-    my $slack_url = "\$'https://jrflab.slack.com/services/hooks/slackbot?token=2TWPiY9Hu4EUteoECqCEfYAZ&channel=%23$slack_channel'";
-    system "curl --data ' $slack_message' $slack_url &> /dev/null";
+    my $slack_url = "";
+    if ($slack_channel eq "pipeline_error") {
+    	$slack_url = $ENV{SLACK_URL_ERR};
+    } elsif ($slack_channel eq "pipeline_finished") {
+    	$slack_url = $ENV{SLACK_URL_FIN};
+    }
+    system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null";
 }
 
 
@@ -73,10 +75,8 @@ sub slack {
 # makefile processing
 =pod
 my $orig_args = $args;
-
 $args =~ s;-f (\S+);"-f " . dirname($1) . "/." . basename($1) . ".tmp";e;
 my $optf = $1;
-
 my @makefiles;
 if (defined $optf) {
     push @makefiles, $optf;
@@ -88,9 +88,6 @@ sub slack {
     }
     push @makefiles, "Makefile";
 }
-
-
-
 do {
     my $makefile = glob(shift(@makefiles));
     
@@ -148,13 +145,17 @@ sub slack {
                 $mail_subject = "**FINAL** $mail_subject";
             }
             $mail_subject .= " Attempt " . ($n + 1) if $n > 0; 
+            #open(MAIL, "| mail -s '$mail_subject' $addrs");
+            #print MAIL "Return code: $retcode\n";
+            #print MAIL "$mail_msg";
+            #close MAIL;
         }
 
-        my $pipeline_channel_msg = "\@${slackname} $project_name :";
+        my $pipeline_channel_msg = "<\@${slackname}|cal> $project_name :";
         if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) {
             if ($retcode == 0) {
                 # op success
-                my $slack_msg = "*COMPLETE* $name :ok_hand:";
+                my $slack_msg = "*COMPLETE* $name :the_horns:";
                 &slack($fin_slack, "$pipeline_channel_msg $slack_msg");
                 &slack($opt{c}, $slack_msg) if $opt{c};
             } else {
@@ -162,7 +163,7 @@ sub slack {
                 my $slack_msg = "*FAILURE* $cwd/$logfile";
                 if ($n + 1 == $attempts) {
                     # final attempt
-                    $slack_msg = ":troll: $slack_msg";
+                    $slack_msg = ":-1: $slack_msg";
                     &slack($opt{c}, $slack_msg) if $opt{c};
                 }
                 &slack($err_slack, "$pipeline_channel_msg $slack_msg");
diff --git a/scripts/star_fish.R b/scripts/star_fish.R
new file mode 100644
index 00000000..b7f53b36
--- /dev/null
+++ b/scripts/star_fish.R
@@ -0,0 +1,107 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("Starfish"))
+
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_name", default = NA, type = 'character', help = "sample name"),
+	       make_option("--input_file", default = NA, type = 'character', help = "input file"),
+	       make_option("--output_file", default = NA, type = 'character', help = "output file"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_name = as.character(opt$sample_name)
+	bed = readr::read_tsv(file = as.character(opt$input_file), col_names = FALSE, col_types = cols(.default = col_character())) %>%
+	      readr::type_convert() %>%
+	      dplyr::rename(chrom1 = X1,
+			    start1 = X2,
+			    end1 = X3,
+			    chrom2 = X4,
+			    start2 = X5,
+			    end2 = X6,
+			    sv_id = X7,
+			    pe_support = X8,
+			    strand1 = X9,
+			    strand2 = X10,
+			    svclass = X11) %>%
+	      dplyr::select(chrom1, pos1 = start1, chrom2, pos2 = start2, strand1, strand2, svtype = svclass) %>%
+	      dplyr::mutate(svtype = case_when(
+		      svtype == "INV" & strand1 == "+" & strand2 == "+" ~ "h2hINV",
+		      svtype == "INV" & strand1 == "-" & strand2 == "-" ~ "t2tINV",
+		      TRUE ~ svtype
+	      )) %>%
+	      dplyr::mutate(sample = sample_name)
+	readr::write_tsv(x = bed, file = as.character(opt$output_file), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==2) {
+	sample_name = as.character(opt$sample_name)
+	data = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       dplyr::select(chromosome = chrom,
+			     start = loc.start,
+			     end = loc.end,
+			     total_cn = tcn.em) %>%
+	       dplyr::mutate(sample = sample_name)
+	readr::write_tsv(x = data, file = as.character(opt$output_file), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==3) {
+	sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	sv_df = cn_df = gd_df = list()
+	for (i in 1:length(sample_names)) {
+		sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			     readr::type_convert()
+		cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		     	     readr::type_convert()
+		gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>%
+		     	     readr::type_convert()
+	}
+	sv_df = do.call(rbind, sv_df)
+	cn_df = do.call(rbind, cn_df)
+	gd_df = do.call(rbind, gd_df)
+	starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/")
+	if (length(starfish_link_out) == 1) {
+		cat(starfish_link_out, file = as.character(opt$output_file), append = FALSE)
+	} else {
+		starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call,
+							complex_sv = starfish_link_out$interleave_tra_complex_sv,
+							cnv_file = cn_df %>%
+								   dplyr::mutate(chromosome = as.character(chromosome)) %>%
+								   dplyr::mutate(chromosome = case_when(
+									   chromosome == "23" ~ "X",
+									   TRUE ~ chromosome)),
+							gender_file = gd_df,
+							prefix = "star_fish/summary/",
+							genome_v = "hg19",
+							cnv_factor = "auto",
+							arm_del_rm = TRUE)
+		starfish_sig_out = starfish_sig(cluster_feature = starfish_feature_out$cluster_feature,
+					        prefix = "star_fish/summary/",
+					        cmethod = "class")
+		wd = getwd()
+		setwd("star_fish/summary/")
+		starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19")
+		setwd(wd)
+		cat("taskcomplete!!", file = as.character(opt$output_file), append = FALSE)
+	}
+
+} else if (as.numeric(opt$option)==4) {
+	df = readr::read_csv(file = "star_fish/summary/_pcawg_6signatures_class.csv", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	     readr::type_convert() %>%
+	     dplyr::mutate(sample_name = unlist(lapply(cluster_id, function(x) { paste0(unlist(strsplit(x, "_", fixed = TRUE))[1:2], collapse="_")})))
+	readr::write_tsv(x = df, file = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option)==5) {
+	df = readr::read_csv(file = "star_fish/summary/_CGR_feature_matrix.csv", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	     readr::type_convert() %>%
+	     dplyr::rename(sample_name = sample)
+	readr::write_tsv(x = df, file = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+}
diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R
new file mode 100644
index 00000000..98fa3726
--- /dev/null
+++ b/scripts/sufam_gt.R
@@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("fuzzyjoin"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_set", default = NA, type = 'character', help = "sample set"),
+	       make_option("--tumor_sample", default = NA, type = 'character', help = "tumor sample"),
+	       make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"),
+	       make_option("--input_file", default = NA, type = 'character', help = "input file"),
+	       make_option("--output_file", default = NA, type = 'character', help = "output file"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE))
+	normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE))
+	sample_set = setdiff(sample_set, normal_sample)
+	smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       readr::type_convert() %>%
+	       dplyr::filter(TUMOR_SAMPLE %in% sample_set) %>%
+	       dplyr::filter(NORMAL_SAMPLE == normal_sample) %>%
+	       dplyr::mutate(UUID = paste0(CHROM, ":", POS, "_", REF, ">", ALT)) %>%
+	       dplyr::filter(!duplicated(UUID)) %>%
+	       dplyr::mutate(`#CHROM` = CHROM,
+			     POS = POS,
+			     ID = ".",
+			     REF = REF,
+			     ALT = ALT,
+			     QUAL = 100,
+			     FILTER = "PASS",
+			     INFO = ".") %>%
+	       dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, INFO) %>%
+	       dplyr::mutate(`#CHROM` = as.character(`#CHROM`)) %>%
+	       dplyr::mutate(chr_n = case_when(
+		       `#CHROM` == "X" ~ "23",
+		       `#CHROM` == "Y" ~ "24",
+		       TRUE ~ `#CHROM`
+	       )) %>%
+	       readr::type_convert() %>%
+	       dplyr::arrange(chr_n) %>%
+	       dplyr::select(-chr_n)
+	cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE)
+	readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==2) {
+	tumor_sample = unlist(strsplit(x = as.character(opt$tumor_sample), split = " ", fixed=TRUE))
+	normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE))
+	maf = readr::read_tsv(file = opt$input_file, comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	      readr::type_convert() %>%
+	      dplyr::mutate(chrom = Chromosome,
+			    loc.start = Start_Position,
+			    loc.end = End_Position) %>%
+	      dplyr::mutate(chrom = as.character(chrom))
+	facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_sample, "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		 dplyr::mutate(chrom = case_when(
+			 chrom == "23" ~ "X",
+			 TRUE ~ chrom
+		 )) %>%
+		 readr::type_convert() %>%
+		 dplyr::mutate(qt = tcn.em,
+			       q2 = tcn.em - lcn.em) %>%
+		 dplyr::select(chrom, loc.start, loc.end, qt, q2)
+	maf = maf %>%
+	      fuzzyjoin::genome_left_join(facets, by = c("chrom", "loc.start", "loc.end")) %>%
+	      dplyr::select(-chrom.x, -loc.start.x, -loc.end.x, -chrom.y, -loc.start.y, -loc.end.y)
+			
+	write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==3) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE))
+	normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE))
+	sample_set = setdiff(sample_set, normal_sample)
+	maf = list()
+	for (i in 1:length(sample_set)) {
+		sufam = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			readr::type_convert() %>%
+			dplyr::select(CHROM = chrom,
+				      POS = pos,
+				      REF = val_ref,
+				      ALT = val_alt,
+				      t_depth = cov,
+				      t_alt_count = val_al_count) %>%
+		 	dplyr::mutate(t_ref_count = t_depth - t_alt_count)
+			
+		maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ann.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		      	   readr::type_convert() %>%
+		      	   dplyr::select(-t_depth, -t_alt_count, -t_ref_count) %>%
+		      	   dplyr::bind_cols(sufam)
+	}
+	maf = do.call(bind_rows, maf)
+	write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==4) {
+	sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE))
+	maf = list()
+	for (i in 1:length(sample_set)) {
+		maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character()))
+	}
+	maf = do.call(bind_rows, maf) %>%
+	      readr::type_convert()
+	smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	       dplyr::mutate(HOTSPOT = case_when(
+		       is.na(HOTSPOT) ~ FALSE,
+		       HOTSPOT == "True" ~ TRUE,
+		       HOTSPOT == "False" ~ FALSE,
+		       HOTSPOT == "TRUE" ~ TRUE,
+		       HOTSPOT == "FALSE" ~ FALSE
+	       )) %>%
+	       dplyr::mutate(HOTSPOT_INTERNAL = case_when(
+		       is.na(HOTSPOT_INTERNAL) ~ FALSE,
+		       HOTSPOT_INTERNAL == "True" ~ TRUE,
+		       HOTSPOT_INTERNAL == "False" ~ FALSE,
+		       HOTSPOT_INTERNAL == "TRUE" ~ TRUE,
+		       HOTSPOT_INTERNAL == "FALSE" ~ FALSE
+	       )) %>%
+	       dplyr::mutate(cmo_hotspot = case_when(
+		       is.na(cmo_hotspot) ~ FALSE,
+		       cmo_hotspot == "True" ~ TRUE,
+		       cmo_hotspot == "False" ~ FALSE,
+		       cmo_hotspot == "TRUE" ~ TRUE,
+		       cmo_hotspot == "FALSE" ~ FALSE
+	       )) %>%
+	       dplyr::mutate(is_Hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>%
+	       dplyr::mutate(facetsLOHCall = case_when(
+		       is.na(facetsLOHCall) ~ FALSE,
+		       facetsLOHCall == "True" ~ TRUE,
+		       facetsLOHCall == "False" ~ FALSE,
+		       facetsLOHCall == "TRUE" ~ TRUE,
+		       facetsLOHCall == "FALSE" ~ FALSE
+	       )) %>%
+	       dplyr::mutate(is_LOH = facetsLOHCall) %>%
+	       readr::type_convert()
+	maf = maf %>%
+	      dplyr::left_join(smry %>%
+			       dplyr::group_by(CHROM, POS, REF, ALT) %>%
+	       		       dplyr::summarize(is_Hotspot = unique(is_Hotspot)) %>%
+			       dplyr::ungroup(),
+			       by = c("CHROM", "POS", "REF", "ALT"))
+	maf = maf %>%
+	      dplyr::left_join(smry %>%
+			       dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_LOH) %>%
+			       dplyr::mutate(is_present = TRUE),
+			       by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) %>%
+	      dplyr::mutate(is_present = case_when(
+		      is.na(is_present) ~ FALSE,
+		      TRUE ~ is_present
+	      ))
+	write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==5) {
+	maf = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+	      readr::type_convert() %>%
+	      dplyr::filter(is_present)
+	write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE)
+}
+
diff --git a/rnaseq/summarizeRNASeqReads.R b/scripts/summarize_rnaseqreads.R
similarity index 100%
rename from rnaseq/summarizeRNASeqReads.R
rename to scripts/summarize_rnaseqreads.R
diff --git a/rnaseq/summarizeRNASeqReadsByExon.R b/scripts/summarize_rnaseqreads_byexon.R
similarity index 100%
rename from rnaseq/summarizeRNASeqReadsByExon.R
rename to scripts/summarize_rnaseqreads_byexon.R
diff --git a/rnaseq/summarizeRNASeqReadsByIntron.R b/scripts/summarize_rnaseqreads_byintron.R
similarity index 100%
rename from rnaseq/summarizeRNASeqReadsByIntron.R
rename to scripts/summarize_rnaseqreads_byintron.R
diff --git a/scripts/summarize_sleuth.R b/scripts/summarize_sleuth.R
new file mode 100644
index 00000000..51f018b4
--- /dev/null
+++ b/scripts/summarize_sleuth.R
@@ -0,0 +1,34 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("sleuth"))
+
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option('--annotation', type = 'character', default = NA, help = 'path to annotation file'),
+               make_option('--samples', type = 'character', default = NA, help = 'list of samples names'))
+parser = OptionParser(usage = "%prog",  option_list=optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+sample_names = unlist(strsplit(x=opt$samples, split=" ", fixed=TRUE))
+annotation = readr::read_tsv(file=opt$annotation, col_names=TRUE, col_types=cols(.default=col_character()))
+manifest = dplyr::tibble(sample = sample_names,
+			 condition = rep(1, length(sample_names)),
+			 path = paste0("kallisto/", sample_names))
+data = sleuth::sleuth_prep(sample_to_covariates = manifest,
+			   extra_bootstrap_summary = TRUE,
+			   read_bootstrap_tpm = TRUE,
+			   target_mapping = annotation,
+			   aggregation_column = "hugo",
+			   gene_mode = TRUE)
+res = as.data.frame(sleuth_to_matrix(data, "obs_norm", "tpm"))
+tpm_bygene = dplyr::tibble(gene_symbol = rownames(res)) %>%
+	     dplyr::bind_cols(dplyr::as_tibble(res))
+write_tsv(x=tpm_bygene, path="kallisto/tpm_by_gene.txt", append=FALSE, col_names=TRUE, quote_escape=FALSE)
diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R
new file mode 100644
index 00000000..3ec46f58
--- /dev/null
+++ b/scripts/sv_signature.R
@@ -0,0 +1,82 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("signature.tools.lib"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_name", default = NA, type = 'character', help = "sample name"),
+	       make_option("--input_file", default = NA, type = 'character', help = "input file"),
+	       make_option("--output_file", default = NA, type = 'character', help = "output file"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_name = as.character(opt$sample_name)
+	sv_bedpe = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+		   readr::type_convert() %>%
+		   dplyr::mutate(sample = sample_name) %>%
+		   dplyr::select(-svclass)
+	res_list = bedpeToRearrCatalogue(sv_bedpe %>% data.frame())
+	catalogues_mutations = data.frame(row.names = rownames(res_list$rearr_catalogue), stringsAsFactors = FALSE)
+	bedpecolumns = c("chrom1", "start1", "end1", "chrom2", "start2", "end2" , "sample","svclass","id", "is.clustered", "length")
+	catalogues_mutations = cbind(catalogues_mutations,res_list$rearr_catalogue)
+	mtype_mutations = signature.tools.lib:::getTypeOfMutationsFromChannels(catalogues_mutations)
+	exposureFilterType = "fixedThreshold"
+	threshold_percent = 5
+	optimisation_method = "KLD"
+	useBootstrap = FALSE
+	nboot = 1000
+	threshold_p.value = 0.05
+	nparallel = 4
+	randomSeed = 1
+	fit = Fit(catalogues = catalogues_mutations,
+		  signatures = signature.tools.lib:::RefSigv1_rearr,
+		  exposureFilterType = exposureFilterType,
+		  threshold_percent = threshold_percent,
+		  method = optimisation_method,
+		  useBootstrap = useBootstrap,
+		  nboot = nboot,
+		  threshold_p.value = threshold_p.value,
+		  nparallel = nparallel,
+		  randomSeed = randomSeed,
+		  verbose = TRUE)
+	x = dplyr::tibble(feature_name = rownames(fit$catalogues),
+			  feature_count = as.vector(fit$catalogues[,1])) %>%
+	    dplyr::mutate(sample_name = sample_name)
+	readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE)
+	
+	x = dplyr::tibble(signature_name = colnames(fit$exposures),
+			  signature_exposure = as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>%
+	    dplyr::mutate(sample_name = sample_name)
+	readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==2) {
+	sample_name = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	signature_df = list()
+	for (i in 1:length(sample_name)) {
+		signature_df[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+				    readr::type_convert()
+	}
+	signature_df = do.call(bind_rows, signature_df)
+	readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==3) {
+	sample_name = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))
+	signature_df = list()
+	for (i in 1:length(sample_name)) {
+		signature_df[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_features.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>%
+				    readr::type_convert() %>%
+				    dplyr::mutate(feature_proportion = 100*feature_count / sum(feature_count)) %>%
+				    dplyr::select(feature_name, feature_count, feature_proportion, sample_name)
+	}
+	signature_df = do.call(bind_rows, signature_df)
+	readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE)
+}
diff --git a/scripts/wgs_metrics.R b/scripts/wgs_metrics.R
new file mode 100755
index 00000000..48c35cb6
--- /dev/null
+++ b/scripts/wgs_metrics.R
@@ -0,0 +1,109 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"),
+               make_option("--sample_names", default = NA, type = 'character', help = "sample names"))
+parser = OptionParser(usage = "%prog", option_list = optList)
+arguments = parse_args(parser, positional_arguments = T)
+opt = arguments$options
+
+if (as.numeric(opt$option)==1) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".idx_stats.txt"),
+					       col_names = FALSE, col_types = cols(.default = col_character()))[-85,,drop=FALSE] %>%
+			       readr::type_convert() %>%
+			       dplyr::select(CHROMOSOME = X1,
+					     LENGTH = X2,
+					     ALIGNED_READS = X3) %>%
+			       dplyr::mutate(CHROMOSOME = gsub(pattern=" length=", replacement="", x=CHROMOSOME),
+					     ALIGNED_READS = gsub(pattern="Aligned= ", replacement="", x=ALIGNED_READS),
+					     SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/idx_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option)==2) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".aln_metrics.txt"),
+					       skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::select(-SAMPLE, -READ_GROUP) %>%
+			       dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/aln_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==3) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".insert_metrics.txt"),
+					       skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::select(-SAMPLE, -READ_GROUP) %>%
+			       dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/insert_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==4) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".oxog_metrics.txt"),
+					  skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::rename(SAMPLE_NAME = SAMPLE_ALIAS)
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/oxog_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==5) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_metrics.txt"),
+					       skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/gc_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==6) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".wgs_metrics.txt"),
+					       skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/wgs_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option)==7) {
+	sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE))
+	metrics = list()
+	for (i in 1:length(sample_names)) {
+		metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".duplicate_metrics.txt"),
+					       skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			       readr::type_convert() %>%
+			       dplyr::mutate(SAMPLE_NAME = sample_names[i])
+	}
+	metrics = do.call(rbind, metrics)
+	write_tsv(metrics, path="summary/duplicate_metrics.txt", na = "NA", append = FALSE, col_names = TRUE)
+
+}
diff --git a/signatures/createNMFinput.m b/signatures/createNMFinput.m
deleted file mode 100644
index e61040b2..00000000
--- a/signatures/createNMFinput.m
+++ /dev/null
@@ -1,17 +0,0 @@
-function createNMFinput( mutationFile, sampleNameFile, typesFile, cancerType, inputFile)
-%create WTSI input 
-%   convert mutsig mutation matrix file and sample name file into input for
-%   WTSI mutation signature package
-
-originalGenomes = importdata(mutationFile)';
-
-fid = fopen(sampleNameFile);
-sampleNames = textscan(fid, '%s');
-fclose(fid);
-sampleNames = sampleNames{1};
-
-load(typesFile);
-
-save(inputFile, 'originalGenomes', 'subtypes', 'types', 'sampleNames', 'cancerType');
-quit
-end
diff --git a/signatures/deconstruct_sigs.mk b/signatures/deconstruct_sigs.mk
index c6721dc2..ba309ad8 100644
--- a/signatures/deconstruct_sigs.mk
+++ b/signatures/deconstruct_sigs.mk
@@ -1,24 +1,27 @@
 include modules/Makefile.inc
 
 LOGDIR = log/deconstruct_sigs.$(NOW)
-PHONY += deconstructsigs deconstructsigs/signatures deconstructsigs/plots/context
 
-deconstructsigs : $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/signatures/$(sample).RData) $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/plots/context/$(sample).pdf)
+deconstructsigs : $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/signatures/$(sample).RData) \
+		  $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/plots/context/$(sample).pdf)
 
 define extract-signatures
 deconstructsigs/signatures/%.RData : summary/tsv/mutation_summary.tsv
-	$$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"$(RSCRIPT) modules/signatures/extract_signatures.R --sample_name $$(*)")
+	$$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefial && \
+							   $(RSCRIPT) modules/signatures/extract_signatures.R \
+							   --sample_name $$()")
 	
 deconstructsigs/plots/context/%.pdf : deconstructsigs/signatures/%.RData
-	$$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"mkdir -p  deconstructsigs/plots/context && \
-													   mkdir -p  deconstructsigs/plots/exposures && \
-													   $(RSCRIPT) modules/signatures/plot_signatures.R --sample_name $$(*)")
+	$$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefail && \
+							   $(RSCRIPT) modules/signatures/plot_signatures.R \
+							   --sample_name $$(*)")
 
 endef
 $(foreach sample,$(TUMOR_SAMPLES),\
 		$(eval $(call extract-signatures,$(sample))))
 
-
-.DELETE_ON_ERROR:
+..DUMMY := $(shell mkdir -p version; \
+	     $(DECONSTRUCTSIGS_ENV)/bin/R --version > version/deconstruct_sigs.txt)
 .SECONDARY:
-.PHONY: $(PHONY)
+.DELETE_ON_ERROR:
+.PHONY: deconstructsigs
\ No newline at end of file
diff --git a/signatures/emu.mk b/signatures/emu.mk
deleted file mode 100644
index 7d48e555..00000000
--- a/signatures/emu.mk
+++ /dev/null
@@ -1,63 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR = log/emu.$(NOW)
-
-EMU_PREPARE = $(HOME)/usr/bin/EMu-prepare
-EMU_PREPARE_OPTS := --chr $(EMU_REF_DIR)
-ifdef EMU_TARGETS_FILE
-EMU_PREPARE_OPTS += --regions $(EMU_TARGETS_FILE)
-endif
-EMU = $(HOME)/usr/bin/EMu
-
-PLOT_EMU = $(RSCRIPT) modules/signatures/plot_emu_signatures.R
-
-NO_CNV ?= false
-
-.DELETE_ON_ERROR:
-.SECONDARY: 
-.PHONY: all
-
-ALL := emu/emu_results_bic.txt emu/report/index.html
-ifdef NUM_SPECTRA
-ALL += emu/emu_$(NUM_SPECTRA).timestamp
-endif
-
-all : $(ALL)
-
-ALL_TABLE ?= alltables/allTN.mutect_snps.tab.txt
-
-emu/mutations.txt : $(ALL_TABLE)
-	$(INIT) awk 'NR > 1 { sub("X", "23", $$3); sub("Y", "24", $$3); sub("MT", "25", $$3); print $$1 "_" $$2, $$3, $$4, $$6 ">" $$7 }' $< | cat - $(EMU_REF_MUTATIONS) > $@
-
-emu/cnv.txt : $(foreach pair,$(SAMPLE_PAIRS),freec/$(pair)/$(tumor.$(pair)).bam_CNVs)
-	$(INIT) rm -f $@; for x in $^; do \
-		sample=`echo $$x | sed 's:freec/::; s:/.*::'`; \
-		awk -v sample=$$sample 'NR > 1 { sub("chr", "", $$1); sub("X", "23" , $$1); sub("Y", "24", $$1); sub("MT", "25", $$1); print sample, $$1, $$2, $$3, $$4; }' $$x >> $@; \
-	done && cat $(EMU_REF_CNV) >> $@
-
-ifeq ($(NO_CNV),false)
-emu/mutations.txt.mut.matrix : emu/mutations.txt emu/cnv.txt
-	$(call RUN,-s 4G -m 8G,"$(EMU_PREPARE) $(EMU_PREPARE_OPTS) --cnv $(<<) --mut $< --pre $(@D) --regions $(EMU_TARGETS_FILE)")
-else
-emu/mutations.txt.mut.matrix : emu/mutations.txt
-	$(call RUN,-s 4G -m 8G,"$(EMU_PREPARE) $(EMU_PREPARE_OPTS) --chr $(EMU_REF_DIR) --mut $< --pre $(@D)")
-endif
-
-emu/emu_results_bic.txt : emu/mutations.txt.mut.matrix
-	$(call RUN,-s 4G -m 8G,"$(EMU) --mut $< --opp human-exome --pre emu/emu_results")
-
-RESULT_TIMESTAMPS = 
-ifdef NUM_SPECTRA
-emu/emu_$(NUM_SPECTRA).timestamp : emu/mutations.txt.mut.matrix
-	$(call RUN,-s 4G -m 8G,"$(EMU) --force $(NUM_SPECTRA) --mut $< --opp human-exome --pre emu/emu_results && touch $@")
-
-RESULT_TIMESTAMPS += emu/emu_$(NUM_SPECTRA).timestamp
-endif
-
-emu/samples.txt : 
-	$(INIT) echo "$(SAMPLE_PAIRS)" | sed 's/ /\n/g' > $@
-
-emu/report/index.html : emu/emu_results_bic.txt emu/samples.txt emu/mutations.txt $(RESULT_TIMESTAMPS)
-	$(call RUN,-s 4G -m 16G,"$(PLOT_EMU) --inPrefix $(<D)/emu_results --outDir $(@D) --sampleSubset $(<<) --mutations $(<<<) --samples $(<<<).samples")
-
-include modules/vcf_tools/vcftools.mk
diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk
new file mode 100644
index 00000000..23f335b9
--- /dev/null
+++ b/signatures/hr_detect.mk
@@ -0,0 +1,139 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/hr_detect.$(NOW)
+
+MIN_SIZE = 1
+MAX_SIZE = 100000000000000000000
+
+hr_detect :  $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz.tbi) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).png) \
+	     $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).svg) \
+	     hr_detect/hrdetect_smry.txt \
+	     hr_detect/signatures_smry.txt
+
+define hr-detect
+hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \
+							    SURVIVOR vcftobed \
+							    $$(<) \
+							    $(MIN_SIZE) \
+							    $(MAX_SIZE) \
+							    $$(@)")
+							    
+hr_detect/$1_$2/$1_$2.merged.bedpe : hr_detect/$1_$2/$1_$2.merged.bed
+	$$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \
+					 echo \"chrom1	start1	end1	chrom2	start2	end2	sv_id	pe_support	strand1	strand2	svclass\" > \
+					 $$(@) && \
+					 cat $$(<) >> $$(@)")
+					 
+hr_detect/$1_$2/$1_$2.snv.vcf : summary/tsv/all.tsv
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+					   			     $(RSCRIPT) modules/scripts/hr_detect.R \
+								     --option 1 \
+								     --sample_name $1_$2")
+
+hr_detect/$1_$2/$1_$2.snv.vcf.bgz : hr_detect/$1_$2/$1_$2.snv.vcf
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bgzip -c $$(<) > $$(@)")
+
+hr_detect/$1_$2/$1_$2.snv.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.snv.vcf.bgz
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								tabix -p vcf $$(<)")
+								
+hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								     $(RSCRIPT) modules/scripts/hr_detect.R \
+								     --option 2 \
+								     --sample_name $1_$2")
+								     
+hr_detect/$1_$2/$1_$2.indel.vcf.bgz : hr_detect/$1_$2/$1_$2.indel.vcf
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bgzip -c $$(<) > $$(@)")
+
+hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								tabix -p vcf $$(<)")
+								
+hr_detect/$1_$2/$1_$2.snv_repaired.vcf : hr_detect/$1_$2/$1_$2.snv.vcf.bgz hr_detect/$1_$2/$1_$2.snv.vcf.bgz.tbi
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bcftools view $$(<) > $$(@)")
+								
+hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz : hr_detect/$1_$2/$1_$2.snv_repaired.vcf
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bgzip -c $$(<) > $$(@)")
+
+hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								tabix -p vcf $$(<)")
+
+hr_detect/$1_$2/$1_$2.indel_repaired.vcf : hr_detect/$1_$2/$1_$2.indel.vcf.bgz hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bcftools view $$(<) > $$(@)")
+								
+
+hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz : hr_detect/$1_$2/$1_$2.indel_repaired.vcf
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								bgzip -c $$(<) > $$(@)")
+
+hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \
+								tabix -p vcf $$(<)")
+
+hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								     $(RSCRIPT) modules/scripts/hr_detect.R \
+								     --option 3 \
+								     --sample_name $1_$2")
+								     
+hr_detect/$1_$2/$1_$2.sv.bedpe : hr_detect/$1_$2/$1_$2.merged.bedpe
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								     $(RSCRIPT) modules/scripts/hr_detect.R \
+								     --option 4 \
+								     --sample_name $1_$2")
+								     
+hr_detect/$1_$2/$1_$2.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								    $(RSCRIPT) modules/scripts/hr_detect.R \
+								    --option 5 \
+								    --sample_name $1_$2 && \
+								    mv hr_detect/$1_$2/$1_$2.genomePlot.png $$(@)")
+
+hr_detect/$1_$2/$1_$2.svg : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe
+	$$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								     $(RSCRIPT) modules/scripts/hr_detect.R \
+								     --option 6 \
+								     --sample_name $1_$2 && \
+								     mv hr_detect/$1_$2/$1_$2.genomePlot.svg $$(@)")
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+hr_detect/hrdetect_smry.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt)
+	$(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+					  			   $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'")
+
+hr_detect/signatures_smry.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt)
+	$(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+					  			   $(RSCRIPT) modules/scripts/hr_detect.R --option 8 --sample_name '$(SAMPLE_PAIRS)'")
+		
+..DUMMY := $(shell mkdir -p version; \
+	     R --version &> version/hr_detect.txt;)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: hr_detect
diff --git a/signatures/mut_sig.mk b/signatures/mut_sig.mk
deleted file mode 100644
index b95fbaae..00000000
--- a/signatures/mut_sig.mk
+++ /dev/null
@@ -1,28 +0,0 @@
-include modules/Makefile.inc
-include modules/variant_callers/gatk.inc
-
-LOGDIR = log/mut_sig.$(NOW)
-
-VCF2VRANGES = $(RSCRIPT) modules/signatures/vcf_2_vranges.R
-KNIT = $(RSCRIPT) modules/scripts/knit.R
-ALEXANDROV_DATA = $(HOME)/share/reference/sanger_30_mutsig_prob.txt
-MUTSIG_REPORT = modules/signatures/mut_sig_report.Rmd
-MUTSIG_REPORT_OPTS = --name $(PROJECT_NAME) \
-					 --alexandrovData $(ALEXANDROV_DATA) \
-					 $(if $(TARGETS_FILE),--targetBed $(TARGETS_FILE))
-
-SNV_TYPE ?= mutect
-
-.SECONDARY:
-.DELETE_ON_ERROR:
-.PHONY: mutect_mutsig_reports
-
-mutect_mutsig_reports : mutsig_report/mutect/mutsig_report.timestamp
-
-mutsig_report/mutect/mutsig_report.timestamp : $(foreach pair,$(SAMPLE_PAIRS),mutsig_report/vrange/$(pair).$(SNV_TYPE).ft.VRanges.Rdata)
-	$(call RUN,-N mutect_mutsig_report -v $(MUTSIG_REPORT_ENV) -n 4 -s 3G -m 5G,"$(KNIT) $(MUTSIG_REPORT) $(@D) --ncores 4 --outDir $(@D) $(MUTSIG_REPORT_OPTS) $^ && touch $@")
-
-mutsig_report/vrange/%.VRanges.Rdata : vcf/%.vcf
-	$(call RUN,-v $(MUTSIG_REPORT_ENV) -s 7G -m 10G,"$(VCF2VRANGES) --genome $(REF) --outFile $@ $<")
-
-include modules/vcf_tools/vcftools.mk
diff --git a/signatures/mut_sig_report.Rmd b/signatures/mut_sig_report.Rmd
deleted file mode 100644
index 11db1d82..00000000
--- a/signatures/mut_sig_report.Rmd
+++ /dev/null
@@ -1,330 +0,0 @@
-```{r setup, include = F}
-options(useHTTPS = F)
-library("optparse");
-library("VariantAnnotation");
-library("reshape")
-library("boot")
-library("plyr")
-library("dplyr")
-library("ggplot2")
-library("RColorBrewer")
-library("reshape2")
-library("SomaticSignatures")
-library("doMC")
-library("foreach")
-library("NMF")
-library("gridExtra")
-library("gplots")
-library("magrittr")
-
-
-optList <- list(
-                make_option("--name", default = '', type = "character", action = "store", help = "report name"),
-                make_option("--ncores", default = 4, type = "integer", action = "store", help = "number of cores"),
-                make_option("--alexandrovData", default = '~/share/reference/Alexandrov_NMF_signatures.txt', type = "character", action = "store", help = "alexandrov nmf signatures"),
-                make_option("--genome", default = 'b37', help = "reference genome"),
-                make_option("--vrangeDir", default = NULL, type = "character", action = "store", help = "input directory"),
-                make_option("--outDir", default = NULL, type = "character", action = "store", help = "output directory"),
-                make_option("--targetBed", default = NULL, type = "character", action = "store", help = "target intervals in bed format"))
-
-parser <- OptionParser(usage = "%prog [options] [VRange file(s)]", option_list = optList);
-arguments <- parse_args(parser, positional_arguments = T, args = args);
-opt <- arguments$options;
-
-if (!is.null(opt$vrangeDir)) {
-    vrFiles <- list.files(path = opt$vrangeDir,
-                          pattern = '.*\\.VRanges\\.Rdata',
-                          full.names = T)
-}
-if (length(arguments$args) > 1) {
-    vrFiles <- arguments$args
-}
-if (length(vrFiles) < 1) {
-    cat("Need VRange file(s)\n");
-    print_help(parser);
-    stop();
-}
-
-outFile <- opt$outFile
-if (opt$genome == "b37" || opt$genome == "hg19") {
-    library("BSgenome.Hsapiens.UCSC.hg19");
-    library("TxDb.Hsapiens.UCSC.hg19.knownGene")
-    genome <- BSgenome.Hsapiens.UCSC.hg19
-    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-    genomeName <- 'hg19'
-} else if (opt$genome == "mm10" || opt$genome == "GRCm38") {
-    library("TxDb.Mmusculus.UCSC.mm10.knownGene")
-    library("BSgenome.Mmusculus.UCSC.mm10");
-    genome <- BSgenome.Mmusculus.UCSC.mm10
-    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-    genomeName <- 'mm10'
-}
-
-txdb <- renameSeqlevels(txdb, sub('chr', '', seqlevels(txdb)))
-txByGenes <- transcriptsBy(txdb, 'gene')
-bases <- c("A", "C", "G", "T")
-
-if (!is.null(opt$targetBed)) {
-    bed <- ucsc(import(opt$targetBed))
-    k3t <- kmerFrequency(genome, 1e5, 3, bed)
-    k3wg <- kmerFrequency(genome, 1e5, 3)
-    norms <- k3wg[names(k3t)] / k3t
-}
-
-registerDoMC(opt$ncores)
-
-theme_set(theme_bw(base_size = 18))
-```
-
-# `r opt$name` Mutational Signature Report
----
-### Raymond Lim
-
-```{r createMotifMatrices, include = F, cache = F}
-vrs <- list()
-mm <- list()
-mmNorm <- list()
-mmTranscribed <- list()
-mmUntranscribed <- list()
-for (vrFile in vrFiles) {
-    s <- sub('\\..*', '', vrFile)
-    s <- sub('.*/', '', s)
-    load(vrFile)
-    vrs[[s]] <- vr
-}
-vrs <- VRangesList(vrs)
-allVr <- unlist(vrs)
-
-mm <- motifMatrix(allVr, normalize = F)
-mmNorm <- motifMatrix(allVr, normalize = T)
-x <- subset(allVr, allVr$transcribed)
-sampleNames(x) <- factor(as.factor(sampleNames(x)))
-mmTranscribed <- motifMatrix(x, normalize = T)
-x <- subset(allVr, !allVr$transcribed)
-sampleNames(x) <- factor(as.factor(sampleNames(x)))
-mmUntranscribed <- motifMatrix(x, normalize = T)
-if (!is.null(opt$targetBed)) {
-    mmNorm <- normalizeMotifs(mmNorm, norms)
-    mmTranscribed <- normalizeMotifs(mmTranscribed, norms)
-    mmUntranscribed <- normalizeMotifs(mmUntranscribed, norms)
-}
-```
-
-```{r writeMotifMatrices}
-if (!is.null(opt$outDir)) {
-    fn <- paste(opt$outDir, "/mm.tsv", sep = "")
-    write.table(mm, file = fn, quote = F, sep = '\t')
-    fn <- paste(opt$outDir, "/mm_transcribed.tsv", sep = "")
-    write.table(mmTranscribed, file = fn, quote = F, sep = '\t')
-    fn <- paste(opt$outDir, "/mm_untranscribed.tsv", sep = "")
-    write.table(mmUntranscribed, file = fn, quote = F, sep = '\t')
-    fn <- paste(opt$outDir, "/mm_norm.tsv", sep = "")
-    write.table(mmNorm, file = fn, quote = F, sep = '\t')
-}
-```
-
-```{r loadAlexandrovData}
-alexandrov <- read.table(opt$alexandrovData, sep = '\t', header = T, as.is = T)
-rownames(alexandrov) <- paste(sub('>', '', as.character(alexandrov$Substitution.Type)),
-                              ' ', subseq(as.character(alexandrov$Trinucleotide), 1, 1), '.',
-                              subseq(as.character(alexandrov$Trinucleotide), 3, 3), sep = '')
-
-alexandrovM <- alexandrov[rownames(mm), grepl('Signature', colnames(alexandrov))]
-
-```
-
-```{r nmf, fig.width = 10, fig.height = 10}
-solveNMF <- function(x, signatures){
-    coef <- fcnnls(x, signatures[rownames(x),, drop = F]) # reorder the rownames of the in matrix 
-    colsum <- apply(coef$x, 2, sum)
-    coef_x_scaled <- scale(coef$x, center = F, scale = colsum)
-    return(coef_x_scaled)
-}
-nmfCoefs <- solveNMF(as.matrix(alexandrovM), as.matrix(mm))
-
-if (!is.null(opt$outDir)) {
-    fn <- paste(opt$outDir, "/nnls_coefs.tsv", sep = "")
-    write.table(nmfCoefs, file = fn, quote = F, sep = '\t')
-}
-
-cols <- brewer.pal(9, 'Blues')
-if (ncol(nmfCoefs) > 2) {
-    heatmap.2(nmfCoefs, trace = 'none', margins = c(13, 8), cexCol = 0.8, col = cols, Rowv = F, Colv = F, dendrogram = 'none')
-}
-
-par(mar = c(10,5,5,5))
-for (s in colnames(nmfCoefs)) {
-    barplot(nmfCoefs[,s], main = s, las = 2)
-}
-```
-
-
-```{r pie, eval = F, echo = F}
-for (s in names(vrs)) {
-    vr <- vrs[[s]]
-    cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen",  "T>G" = "pink")
-    main <- paste(s, " (n = ", length(vr), ")", sep = '')
-    pie(table(vr$alteration), col = cols, main = main)
-    if (sum(vr$transcribed, na.rm = T) > 0 && sum(!vr$transcribed, na.rm = T) > 0) {
-        main <- paste('transcribed', s, " (n=", sum(vr$transcribed, na.rm = T), ")", sep = '')
-        pie(table(subset(vr, vr$transcribed)$alteration), col = cols, main = main)
-        main <- paste('untranscribed', s, " (n=", sum(!info(vcf)$transcribed, na.rm = T), ")", sep = '')
-        pie(table(subset(vr, !vr$transcribed)$alteration), col = cols, main = paste('untranscribed', main))
-    }
-}
-```
-
-
-```{r mutCountPlots, fig.height = 7, fig.width = 28}
-plotMutBarplot <- function(samp, mm) {
-    cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen",  "T>G" = "pink")
-    mdf <- melt(mm, varnames = c('motif', 'sample'))
-    mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2",
-                          mdf$motif)
-    mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif)
-    tit <- paste(samp, ' (n = ', sum(mdf %>% filter(sample == samp) %$% value, na.rm = T), ")", sep = '')
-    mdf %>% filter(sample == samp) %>%
-        ggplot(aes(x = context, y = value, fill = alteration)) +
-        geom_bar(stat = 'identity') +
-        facet_grid(~ alteration, switch = 'x') +
-        xlab("") + ylab("") + ggtitle(tit) +
-        theme(axis.text.x = element_text(angle = 90, hjust = 1),
-              legend.position = 'none',
-              panel.border = element_blank(),
-              axis.line.x = element_line(color = 'black', size = 1),
-              axis.line.y = element_line(color = 'black', size = 1),
-              panel.grid = element_blank(),
-              strip.background = element_blank(),
-              strip.text.x = element_text(size = 20)) +
-        scale_fill_manual(values = cols)
-}
-
-plotMutBarplotStranded <- function(samp, mmTranscribed, mmUntranscribed) {
-    mdfTranscribed <- melt(mmTranscribed, varnames = c('motif', 'sample'))
-    mdfTranscribed$transcribed <- T
-    mdfUntranscribed <- melt(mmUntranscribed, varnames = c('motif', 'sample'))
-    mdfUntranscribed$transcribed <- F
-    mdf <- rbind(mdfTranscribed, mdfUntranscribed)
-    mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2",
-                          mdf$motif)
-    mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif)
-    tit <- paste(samp, ' (n = ', sum(mdf %>% filter(sample == samp) %$% value, na.rm = T), ")", sep = '')
-    mdf %>% filter(sample == samp) %>%
-        ggplot(aes(x = context, y = value, fill = transcribed)) +
-        geom_bar(stat = 'identity', position = 'dodge') +
-        facet_grid(~ alteration, switch = 'x') +
-        theme(axis.text.x = element_text(angle = 90, hjust = 1),
-              panel.border = element_blank(),
-              axis.line.x = element_line(color = 'black', size = 1),
-              axis.line.y = element_line(color = 'black', size = 1),
-              panel.grid = element_blank(),
-              strip.background = element_blank(),
-              strip.text.x = element_text(size = 20)) +
-        scale_fill_manual(name = "", values = c('blue', 'red'),
-                          labels = c("Transcribed strand",
-                                     'Untranscribed strand')) + 
-        xlab("") + ylab("") + ggtitle(tit)
-}
-
-plotMutPiechart <- function(samp, mm) {
-    mdf <- melt(mm, varnames = c('motif', 'sample'))
-    mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2",
-                          mdf$motif)
-    mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif)
-    cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen",  "T>G" = "pink")
-    mdf %>% filter(sample == samp) %>% group_by(alteration) %>% summarise(value = sum(value)) %>%
-        ggplot(aes(x = "", y = value, fill = alteration)) +
-        geom_bar(width = 1, stat = 'identity') + scale_fill_manual(values = cols) + 
-        coord_polar("y") + xlab("") + ylab("")
-}
-
-for (s in colnames(mm)) {
-    p1 <- plotMutBarplot(s, mm)
-    p2 <- plotMutPiechart(s, mm)
-    grid.arrange(p1, p2, ncol = 2, widths = c(4, 2))
-
-    p1 <- plotMutBarplot(s, mmNorm)
-    p2 <- plotMutPiechart(s, mmNorm)
-    grid.arrange(p1, p2, ncol = 2, widths = c(4, 2))
-}
-
-for (s in colnames(mm)) {
-    vr <- vrs[[s]]
-    if (sum(vr$transcribed, na.rm = T) > 0 && sum(!vr$transcribed, na.rm = T) > 0) {
-        p1 <- plotMutBarplotStranded(s, mmTranscribed, mmUntranscribed)
-        p2 <- plotMutPiechart(s, mmTranscribed)
-        p3 <- plotMutPiechart(s, mmUntranscribed)
-        lom <- matrix(c(1,1, 2,3), nrow = 2, ncol = 2)
-        grid.arrange(p1, p2, p3, layout_matrix = lom,  widths = c(4, 2))
-    }
-}
-
-```
-
-
-```{r bootPlot, fig.width = 12}
-bootFun <- function(x) {
-    baseMotif = subseq(as.character(x$motif), 4, 6)
-    subseq(baseMotif, 2, 2) = subseq(as.character(x$motif), 1, 1)
-    if (!is.null(opt$targetBed)) {
-        nval <- x$value * norms[baseMotif]
-    } else {
-        nval <- x$value
-    }
-    nval <- nval / sum(nval)
-    apply(alexandrovM, 2, function(y) fcnnls(nval, y)$x)
-}
-
-ranFun <- function(p, d) {
-    # create a vector of same # of mutations using original context probabilities
-    s <- sample.int(nrow(p), size = sum(p$value), replace = T, prob = p$value / sum(p$value))
-    pp <- p
-    tab <- table(p[s, "motif"])
-    pp[match(names(tab), pp$motif), "value"] <- tab
-    pp
-}
-
-mm <- motifMatrix(allVr, normalize = F)
-bootDf <- foreach(s = colnames(mm), .combine = 'rbind', .errorhandling = 'remove') %dopar% {
-    mdf <- melt(mm[, s, drop = F], varnames = c('motif', 'sample'))
-    mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2",
-                          mdf$motif)
-    mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif)
-    boots <- boot(mdf, bootFun, R = 1000, ran.gen = ranFun, sim = 'parametric', parallel = 'multicore')
-    boots.sd <- apply(boots$t, 2, sd)
-    ci <- norm.ci(boots, index = 1:ncol(alexandrovM))
-    Df <- as.data.frame(boots$t0, row.names = 1:ncol(alexandrovM))
-    colnames(Df) <- 'bootCor'
-    Df$signature <- sub('Signature.', '', colnames(alexandrovM))
-    Df$bootSD <- boots.sd
-    Df$lowerCI95 <- ci[,2]
-    Df$upperCI95 <- ci[,3]
-    n <- colnames(alexandrovM)
-    Df$votes <- table(factor(n[apply(boots$t, 1, which.max)], levels = n))
-    Df$sample <- s
-    Df$nCalls <- sum(mm[, s])
-    Df$significant <- ! with(Df, bootCor > lowerCI95 & bootCor < upperCI95)
-    Df$rank = rank(-Df$votes, ties.method = 'max')
-    Df
-}
-
-fn <- paste(opt$outDir, "/vote_results.tsv", sep = "")
-write.table(bootDf, row.names = F, quote = F, sep = '\t', file = fn)
-
-maxBootDf <- bootDf %>% group_by(sample) %>% filter(votes == max(votes))
-fn <- paste(opt$outDir, "/max_vote_results.tsv", sep = "")
-write.table(maxBootDf, row.names = F, quote = F, sep = '\t', file = fn)
-
-for (s in colnames(mm)) {
-    bdf <- filter(bootDf, sample == s)
-    cols <- ifelse(bdf$signficant, 'red', 'grey')
-    n <- sub('Signature.', '', colnames(alexandrovM))
-    par(mfrow = c(2,1), mar = c(3,5,3,3))
-    barCenters <- barplot(bdf$bootCor, ylim = c(min(bdf$bootCor - bdf$bootSD), max(bdf$bootCor + bdf$bootSD)), names.arg = n, col = cols,
-                          main = s, ylab = 'Correlation')
-    segments(barCenters, bdf$bootCor - bdf$bootSD, barCenters, bdf$bootCor + bdf$bootSD, lwd = 1)
-    # vote barplot
-    barplot(bdf$votes, ylab = '# Votes', names.arg = bdf$signature)
-}
-```
-
diff --git a/signatures/nmfMutSig.mk b/signatures/nmfMutSig.mk
deleted file mode 100644
index 3b69c1ee..00000000
--- a/signatures/nmfMutSig.mk
+++ /dev/null
@@ -1,45 +0,0 @@
-# Run wtsi NMF mutation sig on tumour/normal data
-# Detect mutation signatures using mutect calls
-##### DEFAULTS ######
-
-include modules/Makefile.inc
-
-LOGDIR = log/nmf_mutsig.$(NOW)
-
-EMU_PREPARE = $(HOME)/usr/bin/EMu-prepare
-MATLABPATH := modules/mut_sigs
-ifeq ($(HOSTNAME),ika.cbio.mskcc.org)
-export MATLAB_BIN := /usr/local/MATLAB/R2013a/bin/matlab
-else
-export MATLAB_BIN := /usr/local/bin/matlab
-endif
-MATLAB = export MATLABPATH=$(MATLABPATH); $(MATLAB_BIN) -nodisplay -nosplash 
-
-NMF_DIR = $(HOME)/usr/nmf_mut_sig
-NMF_TYPES_FILE = $(NMF_DIR)/types.mat
-
-NMF_MIN_SIG = 1
-NMF_MAX_SIG = 4
-
-.DELETE_ON_ERROR:
-.SECONDARY: 
-.PHONY: all
-
-ALL := nmf_mutsig/mutations.txt.mut.matrix nmf_mutsig/results.mat nmf_mutsig/plot.timestamp
-
-all : $(ALL)
-
-nmf_mutsig/mutations.txt : alltables/allTN.mutect_snps.tab.txt
-	$(INIT) awk 'NR > 1 { sub("X", "23", $$3); sub("Y", "24", $$3); sub("MT", "25", $$3); print $$1 "_" $$2, $$3, $$4, $$6 ">" $$7 }' $< > $@
-
-nmf_mutsig/mutations.txt.mut.matrix : nmf_mutsig/mutations.txt
-	$(INIT) $(EMU_PREPARE) --chr $(EMU_REF_DIR) --mut $< --pre $(@D) --regions $(EMU_TARGETS_FILE)
-
-nmf_mutsig/input.mat : nmf_mutsig/mutations.txt.mut.matrix
-	$(INIT) $(MATLAB) -r "createNMFinput $< $(<:.mut.matrix=.samples) $(NMF_TYPES_FILE) $(PROJECT_NAME) $@"
-
-nmf_mutsig/results.mat : nmf_mutsig/input.mat
-	$(INIT) $(MATLAB) -r "runNMF $< $(@:.mat=) $(NMF_DIR) $(NMF_MIN_SIG) $(NMF_MAX_SIG)"
-
-nmf_mutsig/plot.timestamp : nmf_mutsig/results.mat
-	$(INIT) $(MATLAB) -r "plotNMF $(<:.mat=) $(NMF_DIR) $(NMF_MIN_SIG) $(NMF_MAX_SIG)" && touch $@
diff --git a/signatures/plotNMF.m b/signatures/plotNMF.m
deleted file mode 100644
index a1420ccb..00000000
--- a/signatures/plotNMF.m
+++ /dev/null
@@ -1,20 +0,0 @@
-function plotNMF( prefix, nmfDir, minNumSig, maxNumSig )
-% run NMF
-addpath(strcat(nmfDir, '/source/'));
-addpath(strcat(nmfDir, '/plotting/'));
-mkdir('temp');
-
-minNumSig = str2num(minNumSig);
-maxNumSig = str2num(maxNumSig);
-
-for totalSignatures = minNumSig : maxNumSig
-    tsPrefix = strcat(prefix, '_ts', num2str(totalSignatures));
-    inputFile = strcat(tsPrefix, '.mat');
-    S = load(inputFile);
-    plotSignaturesToFile(tsPrefix, S.processes, S.input, S.allProcesses, S.idx, S.processStabAvg);
-    plotSignaturesExposureInSamplesToFile(tsPrefix, S.exposures, S.input);
-end
-
-quit
-end
-
diff --git a/signatures/plot_emu_signatures.R b/signatures/plot_emu_signatures.R
deleted file mode 100644
index 6b08a32f..00000000
--- a/signatures/plot_emu_signatures.R
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("VariantAnnotation"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-suppressPackageStartupMessages(library("hwriter"))
-suppressPackageStartupMessages(library("org.Hs.eg.db"))
-
-options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-
-optList <- list(
-                make_option("--outDir", default = NULL, help = "output dir (required)"),
-                make_option("--mutations", default = NULL, help = "mutations file (required)"),
-                make_option("--samples", default = NULL, help = "samples file"),
-                make_option("--sampleSubset", default = NULL, help = "sample subset file: list of samples to plot contribution"),
-                make_option("--inPrefix", default = NULL, help = "EMu input prefix (required)"))
-
-parser <- OptionParser(usage = "%prog [options]", option_list = optList);
-
-arguments <- parse_args(parser, positional_arguments = T);
-opt <- arguments$options;
-
-if (is.null(opt$outDir)) {
-    cat("Need output dir\n");
-    print_help(parser);
-    stop();
-} else if (is.null(opt$inPrefix)) {
-    cat("Need EMu input prefix\n");
-    print_help(parser);
-    stop();
-} else if (is.null(opt$mutations)) {
-    cat("Need EMu mutations file\n");
-    print_help(parser);
-    stop();
-} else {
-    files <- arguments$args;
-}
-
-glob <- paste(opt$inPrefix, '*_ml_spectra.txt', sep = '')
-spectraFiles <- Sys.glob(glob)
-
-glob <- paste(opt$inPrefix, '*_map_activities.txt', sep = '')
-activityFiles <- Sys.glob(glob)
-
-glob <- paste(opt$inPrefix, '*_assigned.txt', sep = '')
-assignedFiles <- Sys.glob(glob)
-
-pg <- openPage('index.html', dirname = opt$outDir, title = 'EMu results')
-
-set.seed(002)
-palette(sample(rainbow(30)))
-
-for (fn in spectraFiles) {
-    spectra <- read.table(fn, sep = ' ')
-    spectra <- spectra[,-97] # remove empty col
-    for (i in 1:nrow(spectra)) {
-        ofn <- paste(opt$outDir, "/", basename(fn), sep = '')
-        ofn <- sub('\\.txt$', paste("_", i, '.pdf', sep = ''), ofn)
-        pdf(ofn, height = 8, width = 10)
-        par(cex = 1.5)
-        cols <- rep(c('LightBlue', 'Black', 'Red', 'Grey', 'Green', 'Magenta'), each = 16)
-        barplot(t(spectra[i,]) * 100, beside = T, col = cols, border = cols, xaxt = 'n', main = paste("Signature", i), col.main = i, ylab = "% of mutations")
-        labs <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
-        mtext(labs, side = 1, at = 1:6 * 16 - 7.5)
-        null <- dev.off()
-    }
-
-    for (i in 1:nrow(spectra)) {
-        ofn <- paste(opt$outDir, "/", basename(fn), sep = '')
-        ofn <- sub('\\.txt$', paste("_", i, '.png', sep = ''), ofn)
-        png(ofn, height = 500, width = 800, type = 'cairo-png')
-        par(cex = 2)
-        cols <- rep(c('LightBlue', 'Black', 'Red', 'Grey', 'Green', 'Magenta'), each = 16)
-        barplot(t(spectra[i,]) * 100, beside = T, col = cols, border = cols, xaxt = 'n', main = paste("Signature", i), ylab = "% of mutations", col.main = i)
-        labs <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
-        mtext(labs, side = 1, at = 1:6 * 16 - 7.5)
-        null <- dev.off()
-        hwriteImage(basename(ofn), pg, br = T)
-    }
-}
-
-samples <- scan(opt$samples, what = 'character')
-sampleSubset <- scan(opt$sampleSubset, what = 'character')
-
-mutations <- read.table(opt$mutations, sep = ' ')
-colnames(mutations) <- c('sample', 'chr', 'pos', 'snv')
-mutations <- subset(mutations, sample %in% sampleSubset)
-
-for (fn in assignedFiles) {
-    assigned <- read.table(fn, sep = ' ')
-    assigned <- as.matrix(assigned[,-ncol(assigned)])
-    rownames(assigned) <- samples
-    assigned <- assigned[sampleSubset, ]
-
-    ofn <- paste(opt$outDir, "/", basename(fn), sep = '')
-    ofn <- sub('\\.txt$', '.png', ofn)
-
-    tab <- table(factor(mutations$sample))
-    tab <- tab[sampleSubset]
-    oo <- order(tab)
-    assigned <- assigned[oo, ]
-
-    png(ofn, height = 1000, width = 1000, type = 'cairo-png')
-    par(mar = c(5, 10, 5, 1), cex = 1, mfrow = c(1, 2), cex = 1.5)
-    barplot(t(assigned / rowSums(assigned)), col = 1:5, space = 0, border = F, horiz = T, las = 2, xlab = "Contribution of signature")
-    par(mar = c(5,1,5,5))
-    barplot(tab[oo], las = 2, horiz = T, space = 0, border = F, xlab = "Number of Mutations", axisnames = F)
-    null <- dev.off()
-    hwriteImage(basename(ofn), pg, br = T)
-
-    ofn <- sub('\\.png$', '.pdf', ofn)
-    pdf(ofn, height = 12, width = 12)
-    par(mar = c(5, 10, 5, 1), cex = 1, mfrow = c(1, 2), cex = 1.5)
-    barplot(t(assigned / rowSums(assigned)), col = 1:5, space = 0, border = F, horiz = T, las = 2, xlab = "Contribution of signature")
-    par(mar = c(5,1,5,5))
-    barplot(tab[oo], las = 2, horiz = T, space = 0, border = F, xlab = "Number of Mutations", axisnames = F)
-    null <- dev.off()
-}
-
-
-closePage(pg)
diff --git a/signatures/plot_signatures.R b/signatures/plot_signatures.R
deleted file mode 100644
index 86972ef4..00000000
--- a/signatures/plot_signatures.R
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("deconstructSigs"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-suppressPackageStartupMessages(library("ggplot2"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-suppressPackageStartupMessages(library("Palimpsest"))
-suppressPackageStartupMessages(library("BSgenome.Hsapiens.UCSC.hg19"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_name", default = NA, type = 'character', help = "tumor sample name")
-				  )
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-'plot96_mutation_spectrum' <- function (vcf, sample.col = "sample", mutcat3.col = "mutcat3",
-										ymax = NULL, averageProp = FALSE, plot.file = NULL)
-{
-    bases <- c("A", "C", "G", "T")
-    ctxt16 <- paste(rep(bases, each = 4), rep(bases, 4), sep = ".")
-    mt <- c("CA", "CG", "CT", "TA", "TC", "TG")
-    types96 <- paste(rep(mt, each = 16), rep(ctxt16, 6), sep = "_")
-    types96 <- sapply(types96, function(z) {
-        sub("\\.", substr(z, 1, 1), z)
-    })
-    context <- substr(types96, 4, 6)
-    nsamp <- length(unique(vcf[, sample.col]))
-    if (averageProp & nsamp > 1) {
-        tmp <- makeMutypeMatFromVcf(vcf, sample.col = "CHCID", 
-            mutcat.col = "mutcat3", mutypes = types96)
-        freq <- apply(tmp, 1, mean)
-    }
-    else {
-        freq <- sapply(types96, function(z) {
-            mean(vcf[, mutcat3.col] == z, na.rm = T)
-        })
-    }
-    if (!is.null(plot.file)) {
-        pdf(plot.file, width = 24, height = 5)
-    }
-    col96 <- c(rep("skyblue3", 16), rep("black", 16), rep("red", 
-        16), rep("grey", 16), rep("green", 16), rep("pink", 16))
-    labs <- c(rep("C>A", 16), rep("C>G", 16), rep("C>T", 16), 
-        rep("T>A", 16), rep("T>C", 16), rep("T>G", 16))
-    if (is.null(ymax)) {
-        ymax <- 100*ceiling(max(freq) * 100)/100
-        ymax <- ifelse(ymax>10, 30, 10)
-    }
-    bp <- barplot(freq*100, col = col96, border = col96, las = 2, 
-        width = 1, space = .35, yaxt = "n", xaxt = "n", ylim = c(0, 
-            ymax * 1.2))
-    title(ylab = "Fraction of mutations (%)", mgp = c(1, 1, 0), 
-        cex.lab = 1.6)
-    axis(1, at = bp, labels = context, pos = 0, las = 2, cex.axis = 1.5, 
-        tick = F, cex.axis = 1, lwd=-1)
-    if (ymax==40) {
-	    axis(2, at = c(0,10,20,30,40), labels=c(0,10,20,30,40), pos = 0, las = 1, cex.axis = 1.5)
-	} else if (ymax==30) {
-	    axis(2, at = c(0,5,10,15,20,25,30), labels=c(0,5,10,15,20,25,30), pos = 0, las = 1, cex.axis = 1.5)
-	} else if (ymax==20) {
-		axis(2, at = c(0,5,10,15,20), labels=c(0,5,10,15,20), pos = 0, las = 1, cex.axis = 1.5)
-	} else if (ymax==10) {
-		axis(2, at = c(0,2,4,6,8,10), labels=c(0,2,4,6,8,10), pos = 0, las = 1, cex.axis = 1.5)
-	}
-    for (i in seq(1, 81, by = 16)) {
-        rect(bp[i], par()$usr[4], bp[i + 15], par()$usr[4] - 
-            0.05 * diff(par()$usr[3:4]), col = col96[i], border = col96[i])
-        text((bp[i] + bp[i + 15])/2, par()$usr[4] + 0.09 * diff(par()$usr[3:4]), 
-            labels = labs[i], xpd = TRUE, cex = 2)
-    }
-    if (!is.null(plot.file)) {
-        dev.off()
-    }
-}
-
-load(file=paste0("deconstructsigs/signatures/", opt$sample_name, ".RData"))
-
-## barplot of base changes with 3' and 5' context
-colnames(mutation_summary) = c("Sample", "CHROM", "POS", "REF", "ALT")
-mutation_summary = cbind(mutation_summary, "Type"=rep("SNV", nrow(mutation_summary)))
-vcf = preprocessInput_snv(input_data = mutation_summary,
-                          ensgene = ensgene,
-                          reference_genome = BSgenome.Hsapiens.UCSC.hg19)
-patient_ids = unique(vcf$Sample)
-pdf(file=paste0("deconstructsigs/plots/context/", opt$sample_name, ".pdf"), width=18, height=5)
-plot96_mutation_spectrum(vcf, ymax=20, sample.col = "Sample",  plot.file = NULL)
-dev.off()
-
-## pie-charts of signatures
-palette = colorRampPalette(brewer.pal(9, "Set1"))
-cols = palette(30)
-names(cols) = 1:30
-
-df = data.frame(percentage = 100*as.numeric(extracted_signatures$weights[1,]),
-				signature_name = colnames(extracted_signatures$weights)) %>%
-				mutate(signature_name = as.numeric(gsub(pattern="Signature.", replacement="", signature_name))) %>%
-				arrange(signature_name) %>%
-				filter(percentage!=0) %>%
-				mutate(signature_name = factor(signature_name, ordered=TRUE, levels=sort(signature_name))) %>%
-				mutate(lab.ypos = cumsum(percentage) - 0.5*percentage)
-				
-plot.0  = ggplot(df, aes(x = "", y = percentage, fill = signature_name)) +
-		  geom_bar(width = 1, stat = "identity", color = "white") +
-		  scale_fill_manual(values=cols) +
-		  coord_polar("y", start = 0) +
-		  geom_text(aes(y = lab.ypos, label = paste0(signif(percentage,3), "%")), color = "white") +
-		  guides(fill=guide_legend(title="Signature")) +
-		  theme_void()
-		  
-pdf(file=paste0("deconstructsigs/plots/exposures/", opt$sample_name, ".pdf"), width=6, height=6)
-print(plot.0)
-dev.off()
diff --git a/signatures/runNMF.m b/signatures/runNMF.m
deleted file mode 100644
index 40d372d7..00000000
--- a/signatures/runNMF.m
+++ /dev/null
@@ -1,46 +0,0 @@
-function runNMF( inputFile, outputPrefix, nmfDir, minNumSig, maxNumSig )
-% run NMF 
-addpath(strcat(nmfDir, '/source/'));
-addpath(strcat(nmfDir, '/plotting/'));
-clc;
-
-mkdir('temp');
-
-minNumSig = str2num(minNumSig);
-maxNumSig = str2num(maxNumSig);
-
-%% Open matlabpool
-if ( matlabpool('size') == 0 )
-        matlabpool open; % opens the default matlabpool, if it is not already opened
-end
-
-%% Define parameters
-iterationsPerCore = 100;
-stability = zeros(maxNumSig, 1);
-reconstructionError = zeros(maxNumSig, 1);
-allOutputFile = strcat(outputPrefix, '.mat');
-
-for totalSignatures = minNumSig : maxNumSig
-    outputFile = strcat(outputPrefix, '_ts', num2str(totalSignatures), '.mat');
-
-    % Decipher the signatures of mutational processes from catalogues of mutations
-    [input allProcesses allExposures idx processes exposures processStab processStabAvg] = ...
-    decipherMutationalProcesses(iterationsPerCore, totalSignatures, inputFile, ...
-    [ outputFile ] );
-    % Record the stability and average Frobenius reconstruction error
-    stability(totalSignatures-minNumSig+1) = mean(processStabAvg);
-    reconstructionError(totalSignatures-minNumSig+1) = norm(input.originalGenomes - processes*exposures, 'fro');
-end
-
-%% Plotting the stability and average Frobenius reconstruction error
-try %% Some versions of MATLAB plotyy has a bug under linux with -nodisplay -nosplash -nodesktop options
-    plotSignatureStabilityAndReconstructionToFile(strcat(outputPrefix, '_stab_reconstruction.png'), minNumSig:maxNumSig, stability, reconstructionError, input);
-catch ME
-        %% Do not do anything - just ignore the plot in order to save the final output daya
-end
-
-%% Saving the data
-save(allOutputFile);
-
-quit
-end
diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk
new file mode 100644
index 00000000..3d24b30b
--- /dev/null
+++ b/signatures/star_fish.mk
@@ -0,0 +1,67 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/star_fish.$(NOW)
+
+MIN_SIZE = 1
+MAX_SIZE = 10000000000000000000
+
+star_fish :  $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \
+	     $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \
+	     $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \
+	     star_fish/summary/taskcomplete \
+	     star_fish/summary/exposures.txt \
+	     star_fish/summary/features.txt
+		
+define starfish-sv
+star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \
+							    SURVIVOR vcftobed \
+							    $$(<) \
+							    $(MIN_SIZE) \
+							    $(MAX_SIZE) \
+							    $$(@)")
+							    
+star_fish/$1_$2/$1_$2.merged_sv.bedpe : star_fish/$1_$2/$1_$2.merged_sv.bed
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \
+							    $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \
+							    --option 1 \
+							    --sample_name $1_$2 \
+							    --input_file $$(<) \
+							    --output_file $$(@)")
+							    
+star_fish/$1_$2/$1_$2.merged_cn.txt : facets/cncf/$1_$2.txt
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \
+							    $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \
+							    --option 2 \
+							    --sample_name $1_$2 \
+							    --input_file $$(<) \
+							    --output_file $$(@)")
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \
+							     $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \
+							     --option 3 \
+							     --sample_name '$(SAMPLE_PAIRS)' \
+							     --output_file $(@)")
+							     
+star_fish/summary/exposures.txt : star_fish/summary/taskcomplete
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \
+							     $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \
+							     --option 4 \
+							     --output_file $(@)")
+							     
+star_fish/summary/features.txt : star_fish/summary/taskcomplete
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \
+							     $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \
+							     --option 5 \
+							     --output_file $(@)")
+							     
+..DUMMY := $(shell mkdir -p version; \
+	     $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: star_fish
diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk
new file mode 100644
index 00000000..74656371
--- /dev/null
+++ b/signatures/sv_signature.mk
@@ -0,0 +1,53 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/sv_signature.$(NOW)
+
+MIN_SIZE = 1
+MAX_SIZE = 100000000000000000000
+
+signature_sv :  $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \
+		$(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \
+		$(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \
+		sv_signature/summary/exposures.txt \
+		sv_signature/summary/features.txt
+		
+define signature-sv
+sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \
+							    SURVIVOR vcftobed \
+							    $$(<) \
+							    $(MIN_SIZE) \
+							    $(MAX_SIZE) \
+							    $$(@)")
+							    
+sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed
+	$$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \
+					 echo \"chrom1	start1	end1	chrom2	start2	end2	sv_id	pe_support	strand1	strand2	svclass\" > \
+					 $$(@) && \
+					 cat $$(<) >> $$(@)")
+					 
+sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged.bedpe
+	$$(call RUN,-c -n 4 -s 2G -m 4G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+								  $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \
+								  --option 1 \
+								  --sample_name $1_$2 \
+								  --input_file $$(<) \
+								  --output_file sv_signature/$1_$2/$1_$2.merged")
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+sv_signature/summary/exposures.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+					  			    $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)")
+
+sv_signature/summary/features.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \
+					  			    $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 3 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)")
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/sv_signature.txt;)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: signature_sv
diff --git a/signatures/vcf_2_vranges.R b/signatures/vcf_2_vranges.R
deleted file mode 100644
index 925ef565..00000000
--- a/signatures/vcf_2_vranges.R
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("VariantAnnotation"))
-suppressPackageStartupMessages(library("reshape"))
-suppressPackageStartupMessages(library("boot"))
-suppressPackageStartupMessages(library("plyr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("ggplot2"))
-suppressPackageStartupMessages(library("RColorBrewer"))
-suppressPackageStartupMessages(library("reshape2"))
-suppressPackageStartupMessages(library("SomaticSignatures"))
-suppressPackageStartupMessages(library("foreach"))
-
-optList <- list(
-                make_option("--genome", default = 'b37', help = "reference genome"),
-                make_option("--ignoreFilter", default = F, action = 'store_true', help = "ignore the filter column for vcf files"),
-                make_option("--outFile", default = NULL, type = "character", action = "store", help = "output directory")
-                )
-
-parser <- OptionParser(usage = "%prog [options] [vcf file(s)]", option_list = optList);
-arguments <- parse_args(parser, positional_arguments = T);
-opt <- arguments$options;
-
-if (length(arguments$args) != 1) {
-    cat("Need vcf file\n");
-    print_help(parser);
-    stop();
-}
-
-vcfFile <- arguments$args[1]
-outFile <- opt$outFile
-if (opt$genome == "b37" || opt$genome == "hg19") {
-    library("BSgenome.Hsapiens.UCSC.hg19");
-    library("TxDb.Hsapiens.UCSC.hg19.knownGene")
-    genome <- BSgenome.Hsapiens.UCSC.hg19
-    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-    genomeName <- 'hg19'
-    chromosomes <- c(1:22, "X", "Y")
-    chromosomes <- c(chromosomes, paste('chr', chromosomes, sep = ''))
-} else if (opt$genome == "mm10" || opt$genome == "GRCm38") {
-    library("BSgenome.Mmusculus.UCSC.mm10");
-    library("TxDb.Mmusculus.UCSC.mm10.knownGene")
-    genome <- BSgenome.Mmusculus.UCSC.mm10
-    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-    genomeName <- 'mm10'
-    chromosomes <- c(1:19, "X", "Y")
-    chromosomes <- c(chromosomes, paste('chr', chromosomes, sep = ''))
-}
-
-txByGenes <- transcriptsBy(txdb, 'gene')
-
-temp <- tempfile()
-zipped <- bgzip(vcfFile, temp)
-idx <- indexTabix(temp, "vcf")
-cat('done\n')
-
-tab <- TabixFile(zipped, idx)
-open(tab)
-
-vcf <- readVcf(tab, genomeName)
-passIds <- which(rowRanges(vcf)$FILTER == "PASS")
-if (nrow(vcf) > 0 && sum(seqnames(vcf) %in% chromosomes) > 0 &&
-    sum(isSNV(vcf)) > 0 && (opt$ignoreFilter | length(passIds) > 0)) {
-    if (!opt$ignoreFilter) {
-        vcf <- vcf[passIds, ]
-    }
-    vcf <- vcf[isSNV(vcf) & seqnames(vcf) %in% chromosomes]
-    s <- sub('\\..*', '', vcfFile)
-    s <- sub('.*/', '', s)
-    vr <- VRanges(seqnames = seqnames(vcf),
-            ranges = ranges(vcf),
-            ref = as.character(ref(vcf)),
-            alt = sapply(alt(vcf), function(x) as.character(x[1])),
-            sampleNames = s)
-    seqlevels(vr) <- sub('^M$', 'MT', seqlevels(vr))
-    vr <- ucsc(vr)
-    vr <- mutationContext(vr, genome, unify = T)
-    vr$refalt <- paste(ref(vr), alt(vr), sep = '')
-
-    # query transcript ids
-    ol <- findOverlaps(vr, txByGenes)
-    subjectStrands <- sapply(txByGenes[subjectHits(ol)], function(x) paste(unique(as.character(strand(x))), collapse = ','))
-    queryStrands <- tapply(subjectStrands, queryHits(ol), function(x) paste(unique(x), collapse = ","))
-    vr$txStrand <- NA
-    vr$txStrand[as.integer(names(queryStrands))] <- queryStrands
-    vr$transcribed <- F
-    vr$transcribed[is.na(vr$txStrand)] <- NA
-    vr$transcribed[vr$refalt %in% c("GA", "GC", "GT", "AC", "AG", "AT") & grepl('\\+', vr$txStrand)] <- T
-    vr$transcribed[vr$refalt %in% c("CA", "CG", "CT", "TA", "TC", "TG") & grepl('-', vr$txStrand)] <- T
-    save(vr, file = opt$outFile)
-} else {
-    vr <- NULL
-    save(vr, file = opt$outFile)
-}
diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R
new file mode 100644
index 00000000..3698f4d4
--- /dev/null
+++ b/summary/delmh_summary.R
@@ -0,0 +1,118 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("BSgenome.Hsapiens.UCSC.hg19"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
+}
+
+args_list <- list(make_option("--input_file", default = NA, type = 'character', help = "file name and path"))
+				  
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+all_vars = read_tsv(file=opt$input_file, col_types = cols(.default = col_character())) %>%
+		   type_convert()
+		   
+all_tumors = all_vars %>%
+			 .[["TUMOR_SAMPLE"]]
+
+all_normals = all_vars %>%
+			 .[["NORMAL_SAMPLE"]]
+
+all_patients = unique(paste0(all_tumors, "_", all_normals))
+
+all_vars = all_vars %>%
+	   filter(Variant_Classification=="Frame_Shift_Del" | Variant_Classification=="In_Frame_Del") %>%
+	   filter((grepl("varscan", variantCaller) & grepl("strelka", variantCaller)) |
+	   	  ((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classification!="In_Frame_Del") |
+	   	  ((grepl("platypus", variantCaller) & grepl("lancet", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classification!="In_Frame_Del"))
+
+patient_summary = data_frame(SAMPLE_UUID = all_patients)
+del_count = all_vars %>%
+			mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+			dplyr::group_by(SAMPLE_UUID) %>%
+			dplyr::summarize(del_count = n())
+mean_delen = all_vars %>%
+			 mutate(del_len = nchar(REF)-1) %>%
+			 mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+			 dplyr::group_by(SAMPLE_UUID) %>%
+			 dplyr::summarize(mean_delen = mean(del_len))
+median_delen = all_vars %>%
+			   mutate(del_len = nchar(REF)-1) %>%
+			   mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+			   dplyr::group_by(SAMPLE_UUID) %>%
+			   dplyr::summarize(median_delen = median(del_len))
+deln4_count = all_vars %>%
+			  mutate(del_len = nchar(REF)-1) %>%
+			  mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>%
+			  dplyr::group_by(SAMPLE_UUID) %>%
+			  dplyr::summarize(deln4_count = sum(del_len>=4))
+			  
+'getSeqFrom' <- function(chr, start, end)
+{
+	ret = as.character(getSeq(x=BSgenome.Hsapiens.UCSC.hg19, names=chr, start=start, end=end, strand="+", as.character=TRUE))
+	return(invisible(ret))
+}
+
+
+'checkHomLen' <- function(deleted, next50)
+{
+	ret = 0
+	for (i in 1:nchar(deleted)) {
+		if (substr(deleted, 1, i) == substr(next50, 1, i)) {
+			ret = i
+		}
+	}
+    return(invisible(ret))
+}
+
+hml_down = hml_up = NULL
+for (i in 1:nrow(all_vars)) {
+	chr = paste0("chr", all_vars[i,"CHROM"])
+	start = as.numeric(all_vars[i,"POS"])+1
+	n = as.numeric(nchar(all_vars[i,"REF"]))-1
+	
+	deleted = getSeqFrom(chr = chr, start = start, end = start + n - 1)
+	prevn = getSeqFrom(chr = chr, start = start - n, end = start - 1)
+	nextn = getSeqFrom(chr = chr, start = start + n, end = start + 2*n - 1)
+	
+	hml_down = c(hml_down, checkHomLen(deleted = deleted, next50 = prevn))
+	hml_up = c(hml_up, checkHomLen(deleted = deleted, next50 = nextn))
+}
+
+mh_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE),
+				   del_len = nchar(all_vars$REF)-1,
+				   max_mhlen_5p = hml_down,
+				   max_mhlen_3p = hml_up,
+	  			   max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>%
+	    filter(del_len >= 4) %>%
+	    mutate(is_3 = ifelse(max_mhlen>=3, 1, 0)) %>%
+	    dplyr::group_by(SAMPLE_UUID) %>%
+	    dplyr::summarize(deln4_mhlen_3_counts = sum(is_3))
+
+mhl_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE),
+				   del_len = nchar(all_vars$REF)-1,
+				   max_mhlen_5p = hml_down,
+				   max_mhlen_3p = hml_up,
+	  			   max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>%
+	    filter(del_len >= 4) %>%
+	    filter(max_mhlen >= 3) %>%
+	    dplyr::group_by(SAMPLE_UUID) %>%
+	    dplyr::summarize(deln4_mhlen_3_avg_deln = mean(del_len))				 
+
+patient_summary = left_join(patient_summary, del_count, by="SAMPLE_UUID") %>%
+				  left_join(mean_delen, by="SAMPLE_UUID") %>%
+				  left_join(median_delen, by="SAMPLE_UUID") %>%
+				  left_join(deln4_count, by="SAMPLE_UUID") %>%
+				  left_join(mh_3, by="SAMPLE_UUID") %>%
+				  left_join(mhl_3, by="SAMPLE_UUID") %>%
+				  mutate(delmh_prop = deln4_mhlen_3_counts/del_count) %>%
+				  mutate(delmh_del4n_prop = deln4_mhlen_3_counts/deln4_count)
+				  
+write_tsv(patient_summary, path="summary/tsv/delmh_summary.tsv")
diff --git a/summary/delmh_summary.mk b/summary/delmh_summary.mk
new file mode 100644
index 00000000..7b82afc9
--- /dev/null
+++ b/summary/delmh_summary.mk
@@ -0,0 +1,14 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/delmh_summary.$(NOW)
+PHONY += delmh_summary
+
+delmh_summary : summary/tsv/delmh_summary.tsv
+
+summary/tsv/delmh_summary.tsv : summary/tsv/mutation_summary.tsv
+	$(call RUN,-n 1 -s 8G -m 8G,"set -o pipefail && \
+								 $(RSCRIPT) modules/summary/delmh_summary.R --input_file $(<)")
+	
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: $(PHONY)
diff --git a/summary/genomesummary.R b/summary/genomesummary.R
index 57c1940e..be20985e 100644
--- a/summary/genomesummary.R
+++ b/summary/genomesummary.R
@@ -1,13 +1,551 @@
 #!/usr/bin/env Rscript
 
-file_names = c("genome_altered.tsv", "lst_score.tsv", "myriad_score.tsv", "ntai_score.tsv")
-summary_scores = NULL
-for (i in 1:length(file_names)) {
-  data = read.csv(file=paste0("genome_stats/", file_names[i]), header=FALSE, sep="\t", stringsAsFactors=FALSE)
-  summary_scores = cbind(summary_scores, data[,2])
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("readr"))
+
+if (!interactive()) {
+    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
 }
-summary_scores = cbind(data[,1], summary_scores)
-colnames(summary_scores) = c("sample_names", gsub(".tsv", "", file_names))
-write.table(summary_scores, file="summary/tsv/genome_summary.tsv", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
 
-warnings()
+args_list <- list(make_option("--option", default = NA, type = 'character', help = "which analysis to do"),
+		  make_option("--sample_name", default = NA, type = 'character', help = "sample name"),
+		  make_option("--file_in", default = NA, type = 'character', help = "input file name"),
+		  make_option("--file_out", default = NA, type = 'character', help = "output file name"))
+parser <- OptionParser(usage = "%prog", option_list = args_list)
+arguments <- parse_args(parser, positional_arguments = T)
+opt <- arguments$options
+
+
+if (as.numeric(opt$option) == 1) {
+
+	load(opt$file_in)
+	alpha = ifelse(is.na(fit$purity), 1, fit$purity)
+	psi = ifelse(is.na(fit$ploidy), 2, fit$ploidy)
+	gamma = 1
+	x = fit$cncf[,"cnlr.median"]
+	absolute_copies = round(((((2^(x/gamma))*(alpha*psi+(1-alpha)*2)) - ((1-alpha)*2))/alpha))
+	index = absolute_copies!=round(psi)
+	if (sum(index, na.rm=TRUE)!=0) {
+		genome_footprint = sum(as.numeric(fit$cncf[,"end"]-fit$cncf[,"start"]), na.rm=TRUE)
+		genome_altered = sum(as.numeric(fit$cncf[index,"end"]-fit$cncf[index,"start"]), na.rm=TRUE)/genome_footprint
+	} else {
+		genome_altered = 0
+	}
+	x = dplyr::tibble(sample_name = as.character(opt$sample_name),
+			  genome_altered = genome_altered)
+	readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE)
+
+} else if (as.numeric(opt$option) == 2) {
+	
+	chromStrToNum <- function(str) {
+		suppressWarnings(cNum <- as.numeric(str))
+		if (is.na(cNum) && str == "X" ) { 
+			cNum <- 23
+		} else if (is.na(cNum) && str == "Y") {
+			cNum <- 24 
+		}
+		return(invisible(cNum))
+	}
+
+	GetChrominfo <- function() {
+	  f <- "modules/copy_number/hg19_chrominfo.txt"
+	  chrom <- read.table(file=f)
+	  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
+	  f <- "modules/copy_number/hg19_gaps.txt"
+	  gaps <- read.table(file=f)
+	  centro <- subset(gaps, gaps[,8] == "centromere")
+	  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
+	  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
+	  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
+	  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
+	  chrominfo[,1] <- as.character(chrominfo[,1])
+	  chrominfo$chr <- sub("chr", "", chrominfo$chr)
+	  chrominfo$chr <- sub("X", "23", chrominfo$chr)
+	  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
+	  chrominfo[,1] <- as.numeric(chrominfo[,1])
+	  chrominfo <- chrominfo[order(chrominfo$chr), ]  
+	  rownames(chrominfo) <- as.character(chrominfo[,1])
+	  chrominfo <- as.matrix(chrominfo)
+	  return(invisible(chrominfo))
+	}
+
+	fix_facets_column_names <- function(dat) {
+		colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
+		colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
+		colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
+		colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
+		sz <- dat[,"endBP"] - dat[,"startBP"]
+		dat <- cbind(dat, size=sz)
+	    nA <- dat[,"tcn.em"] - dat[,"nB"]
+	    dat <- cbind(dat, nA=nA)
+		return(invisible(dat))
+	}
+
+	join_adjacent_segments <- function(dat) {
+		cur_segs <- dat
+		something_changed <- 1
+		while ( something_changed ) {
+			new_segs <- c()
+			something_changed <- 0
+			x <- 2
+			last_changed <- 0
+			while (x <= nrow(cur_segs)) {
+				last_changed <- 0
+				if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
+						(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
+						(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
+				) {
+					t <- cur_segs[x-1,]
+					t["endBP"] <- cur_segs[x,"endBP"]
+					t["end"] <- cur_segs[x,"end"]
+					t["size"] <- t["endBP"] - t["startBP"]
+					something_changed <- 1
+					new_segs <- rbind(t, new_segs)
+					x <- x+2
+					last_changed <- 1
+				} else {
+					new_segs <- rbind(cur_segs[x-1,], new_segs)
+					x<-x+1
+				}
+			}
+			if (! last_changed ) {
+				new_segs <- rbind(cur_segs[x-1,],new_segs)
+			}
+			n <- nrow(new_segs)
+			new_segs <- new_segs[n:1,]
+			cur_segs <- new_segs
+		}	
+		return(invisible(cur_segs))
+	}
+
+	fix_facet_segs <- function(dat) {
+	    i <- which(is.na(dat$nB))
+	    if ( length(i) > 0 )  {
+		dat <- dat[-i, ]
+	    }
+	    dat <- join_adjacent_segments(dat)
+	    return(invisible(dat))
+	}
+
+	chrom_arm_LST_score <- function(dat) {
+		score <- 0
+		segs <- c()
+		SIZE_THRESH <- 10e6
+		SPACE_THRESH <- 3e6
+		if ( nrow(dat) >= 2 ) {
+			for (x in 2:nrow(dat)) {
+				if ( 	(dat[x-1,"size"] >= SIZE_THRESH) && 
+						(dat[x,"size"] >= SIZE_THRESH) &&
+						( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH)
+				) {
+					score <- score +1
+					segs <- rbind(dat[x-1,], segs)
+				}
+			}
+		}
+		tmp <- list()
+		tmp$score <- score
+		tmp$segs <- segs
+		return(invisible(tmp))
+	}
+
+	lst_filter <- function(dat, size_thresh) {
+		i <- which(dat[,"size"] < size_thresh)
+		sz <- dat[i,"size"]
+		i <- i[order(sz)]
+		segs_removed <- 0
+		while (length(i) > 0) {
+			dat <- dat[-i[1], ]
+			dat <- join_adjacent_segments(dat)
+			i<- which(dat[,"size"] < size_thresh)
+			sz <- dat[i,"size"]
+			i <- i[order(sz)]	
+			segs_removed <- segs_removed + 1
+		}
+		return(invisible(dat))
+	}
+
+	score_LST <- function(dat, chromInfo) {
+		score <- 0
+		segs <- c()
+		dat <- lst_filter(dat, 3e6)
+		for (c in unique(dat[,"chromosome"]) ) {
+			i <- which(dat[,"chromosome"] == c)
+			csegs <- dat[i,]
+			cNum <- chromStrToNum(c)
+			i <- which(csegs[,"startBP"] <= chromInfo[cNum,"centstart"])
+			parm <- csegs[i,]
+			tmp <- chrom_arm_LST_score(parm)
+			score <- score + tmp$score
+			segs <- rbind(tmp$segs, segs)
+			i <- which(csegs[,"endBP"] >= chromInfo[cNum,"centend"])
+			qarm <- csegs[i,]
+			tmp <- chrom_arm_LST_score(qarm)
+			score <- score + tmp$score
+			segs <- rbind(tmp$segs, segs)
+		}
+		tmp <- list()
+		tmp$score <- score
+		tmp$segs <- segs
+		return(invisible(tmp))
+	}
+
+	dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
+	dat = fix_facets_column_names(dat)
+	segs = fix_facet_segs(dat)
+	chromInfo = GetChrominfo()
+	lst = score_LST(segs, chromInfo)
+	x = dplyr::tibble(sample_name = as.character(opt$sample_name),
+			  lst = lst$score)
+	readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option) == 3) {
+	
+	chromStrToNum <- function(str) {
+		suppressWarnings(cNum <- as.numeric(str))
+		if (is.na(cNum) && str == "X" ) { 
+			cNum <- 23
+		} else if (is.na(cNum) && str == "Y") {
+			cNum <- 24 
+		}
+		return(invisible(cNum))
+	}
+
+	GetChrominfo <- function() {
+	  f <- "modules/copy_number/hg19_chrominfo.txt"
+	  chrom <- read.table(file=f)
+	  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
+	  f <- "modules/copy_number/hg19_gaps.txt"
+	  gaps <- read.table(file=f)
+	  centro <- subset(gaps, gaps[,8] == "centromere")
+	  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
+	  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
+	  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
+	  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
+	  chrominfo[,1] <- as.character(chrominfo[,1])
+	  chrominfo$chr <- sub("chr", "", chrominfo$chr)
+	  chrominfo$chr <- sub("X", "23", chrominfo$chr)
+	  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
+	  chrominfo[,1] <- as.numeric(chrominfo[,1])
+	  chrominfo <- chrominfo[order(chrominfo$chr), ]  
+	  rownames(chrominfo) <- as.character(chrominfo[,1])
+	  chrominfo <- as.matrix(chrominfo)
+	  return(invisible(chrominfo))
+	}
+
+	fix_facets_column_names <- function(dat) {
+		colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
+		colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
+		colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
+		colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
+		sz <- dat[,"endBP"] - dat[,"startBP"]
+		dat <- cbind(dat, size=sz)
+	    nA <- dat[,"tcn.em"] - dat[,"nB"]
+	    dat <- cbind(dat, nA=nA)
+		return(invisible(dat))
+	}
+
+	join_adjacent_segments <- function(dat) {
+		cur_segs <- dat
+		something_changed <- 1
+		while ( something_changed ) {
+			new_segs <- c()
+			something_changed <- 0
+			x <- 2
+			last_changed <- 0
+			while (x <= nrow(cur_segs)) {
+				last_changed <- 0
+				if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
+						(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
+						(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
+				) {
+					t <- cur_segs[x-1,]
+					t["endBP"] <- cur_segs[x,"endBP"]
+					t["end"] <- cur_segs[x,"end"]
+					t["size"] <- t["endBP"] - t["startBP"]
+					something_changed <- 1
+					new_segs <- rbind(t, new_segs)
+					x <- x+2
+					last_changed <- 1
+				} else {
+					new_segs <- rbind(cur_segs[x-1,], new_segs)
+					x<-x+1
+				}
+			}
+			if (! last_changed ) {
+				new_segs <- rbind(cur_segs[x-1,],new_segs)
+			}
+			n <- nrow(new_segs)
+			new_segs <- new_segs[n:1,]
+			cur_segs <- new_segs
+		}	
+		return(invisible(cur_segs))
+	}
+
+	fix_facet_segs <- function(dat) {
+	    i <- which(is.na(dat$nB))
+	    if ( length(i) > 0 )  {
+		dat <- dat[-i, ]
+	    }
+	    dat <- join_adjacent_segments(dat)
+	    return(invisible(dat))
+	}
+
+	score_ntAI <- function(dat, chromInfo, min_size=1000, shrink=FALSE) {
+		index <- dat[,"chromosome"] %in% c("MT", "Y", "24")
+		dat <- dat[!index,]
+		index <- dat[,"size"] < min_size
+		dat <- dat[!index,]
+		if (shrink) {
+			dat <- join_adjacent_segments(dat)
+		}
+		chrList <- unique(dat[,"chromosome"])
+		ntAI_score <- 0
+		ntAI_segs <- NULL
+		for (x in chrList) {
+			index <- dat[,"chromosome"] == x
+			chr_segs <- dat[index,]
+			cNum <- chromStrToNum(x)
+			if (nrow(chr_segs) < 2 ) {
+				next
+			}
+			if ( (chr_segs[1,"nA"] != chr_segs[1,"nB"]) && (chromInfo[cNum,"centstart"] > chr_segs[1,"endBP"]) ) {
+				ntAI_score <- ntAI_score+1
+				ntAI_segs <- rbind(chr_segs[1,],ntAI_segs)
+			}
+			eSeg <- nrow(chr_segs)
+			if ( (chr_segs[eSeg, "nA"] != chr_segs[eSeg, "nB"]) && (chr_segs[eSeg,"startBP"] > chromInfo[cNum,"centend"]) ) {
+				ntAI_score <- ntAI_score+1
+				ntAI_segs <- rbind(chr_segs[eSeg,],ntAI_segs)
+			}
+		}
+		tmp <- list()
+		tmp$segs <- ntAI_segs
+		tmp$score <- ntAI_score
+		return(invisible(tmp))
+	}
+
+	dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
+	dat = fix_facets_column_names(dat)
+	segs = fix_facet_segs(dat)
+	chromInfo = GetChrominfo()
+	ntai = score_ntAI(segs, chromInfo)
+	x = dplyr::tibble(sample_name = as.character(opt$sample_name),
+			  ntai = ntai$score)
+	readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option) == 4) {
+	
+	chromStrToNum <- function(str) {
+		suppressWarnings(cNum <- as.numeric(str))
+		if (is.na(cNum) && str == "X" ) { 
+			cNum <- 23
+		} else if (is.na(cNum) && str == "Y") {
+			cNum <- 24 
+		}
+		return(invisible(cNum))
+	}
+
+	GetChrominfo <- function() {
+	  f <- "modules/copy_number/hg19_chrominfo.txt"
+	  chrom <- read.table(file=f)
+	  chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1]))
+	  f <- "modules/copy_number/hg19_gaps.txt"
+	  gaps <- read.table(file=f)
+	  centro <- subset(gaps, gaps[,8] == "centromere")
+	  chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) 
+	  chrominfo$centromere <- rowMeans(chrominfo[,3:4]) 
+	  chrominfo <- chrominfo[,c(1,2,5,3,4)] 
+	  colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend")
+	  chrominfo[,1] <- as.character(chrominfo[,1])
+	  chrominfo$chr <- sub("chr", "", chrominfo$chr)
+	  chrominfo$chr <- sub("X", "23", chrominfo$chr)
+	  chrominfo$chr <- sub("Y", "24", chrominfo$chr)
+	  chrominfo[,1] <- as.numeric(chrominfo[,1])
+	  chrominfo <- chrominfo[order(chrominfo$chr), ]  
+	  rownames(chrominfo) <- as.character(chrominfo[,1])
+	  chrominfo <- as.matrix(chrominfo)
+	  return(invisible(chrominfo))
+	}
+
+	fix_facets_column_names <- function(dat) {
+		colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome"
+		colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP"
+		colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP"
+		colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB"
+		sz <- dat[,"endBP"] - dat[,"startBP"]
+		dat <- cbind(dat, size=sz)
+	    nA <- dat[,"tcn.em"] - dat[,"nB"]
+	    dat <- cbind(dat, nA=nA)
+		return(invisible(dat))
+	}
+
+	join_adjacent_segments <- function(dat) {
+		cur_segs <- dat
+		something_changed <- 1
+		while ( something_changed ) {
+			new_segs <- c()
+			something_changed <- 0
+			x <- 2
+			last_changed <- 0
+			while (x <= nrow(cur_segs)) {
+				last_changed <- 0
+				if ( 	(cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && 
+						(cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) &&
+						(cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"])
+				) {
+					t <- cur_segs[x-1,]
+					t["endBP"] <- cur_segs[x,"endBP"]
+					t["end"] <- cur_segs[x,"end"]
+					t["size"] <- t["endBP"] - t["startBP"]
+					something_changed <- 1
+					new_segs <- rbind(t, new_segs)
+					x <- x+2
+					last_changed <- 1
+				} else {
+					new_segs <- rbind(cur_segs[x-1,], new_segs)
+					x<-x+1
+				}
+			}
+			if (! last_changed ) {
+				new_segs <- rbind(cur_segs[x-1,],new_segs)
+			}
+			n <- nrow(new_segs)
+			new_segs <- new_segs[n:1,]
+			cur_segs <- new_segs
+		}	
+		return(invisible(cur_segs))
+	}
+
+	fix_facet_segs <- function(dat) {
+	    i <- which(is.na(dat$nB))
+	    if ( length(i) > 0 )  {
+		dat <- dat[-i, ]
+	    }
+	    dat <- join_adjacent_segments(dat)
+	    return(invisible(dat))
+	}
+
+	chrom_arm_LST_score <- function(dat) {
+		score <- 0
+		segs <- c()
+		SIZE_THRESH <- 10e6
+		SPACE_THRESH <- 3e6
+		if ( nrow(dat) >= 2 ) {
+			for (x in 2:nrow(dat)) {
+				if ( 	(dat[x-1,"size"] >= SIZE_THRESH) && 
+						(dat[x,"size"] >= SIZE_THRESH) &&
+						( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH)
+				) {
+					score <- score +1
+					segs <- rbind(dat[x-1,], segs)
+				}
+			}
+		}
+		tmp <- list()
+		tmp$score <- score
+		tmp$segs <- segs
+		return(invisible(tmp))
+	}
+
+	lst_filter <- function(dat, size_thresh) {
+		i <- which(dat[,"size"] < size_thresh)
+		sz <- dat[i,"size"]
+		i <- i[order(sz)]
+		segs_removed <- 0
+		while (length(i) > 0) {
+			dat <- dat[-i[1], ]
+			dat <- join_adjacent_segments(dat)
+			i<- which(dat[,"size"] < size_thresh)
+			sz <- dat[i,"size"]
+			i <- i[order(sz)]	
+			segs_removed <- segs_removed + 1
+		}
+		return(invisible(dat))
+	}
+
+	score_myriad_HRD <- function(dat, thresh=15e6) {
+		chrDel <- NULL
+		hrdSegs <- NULL
+		hrd_score <- 0
+		chrList <- unique(dat[,"chromosome"])
+		for (x in chrList) {
+			index <- which(dat[,"chromosome"] == x)
+			totalnB <- sum(dat[index,"nB"], na.rm=TRUE)
+			if (totalnB == 0) {
+				chrDel <- c(x, chrDel)
+			}
+		}
+		for (x in 1:nrow(dat)) {
+			if ( dat[x,"chromosome"] %in% chrDel ) {
+				next
+			}
+			if ( dat[x,"nB"] != 0 ) {
+				next
+			}
+			if (dat[x,"size"] < thresh) {
+				next
+			}
+			hrd_score <- hrd_score + 1
+			hrdSegs <- rbind(dat[x,], hrdSegs)
+		}
+		tmp <- list()
+		tmp$score = hrd_score
+		tmp$segs = hrdSegs
+		return(invisible(tmp))
+	}
+
+
+	dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE)
+	dat = fix_facets_column_names(dat)
+	segs = fix_facet_segs(dat)
+	chromInfo = GetChrominfo()
+	mrs = score_myriad_HRD(segs)
+	x = dplyr::tibble(sample_name = as.character(opt$sample_name),
+			  mrs = mrs$score)
+	readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE)
+	
+} else if (as.numeric(opt$option)==5) {
+	
+	sample_names = unlist(strsplit(opt$sample_name, split = " ", fixed = TRUE))
+	x1 = list()
+	for (i in 1:length(sample_names)) {
+		x1[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names[i], ".txt"),
+					    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert()
+	}
+	x1 = do.call(bind_rows, x1)
+	
+	x2 = list()
+	for (i in 1:length(sample_names)) {
+		x2[[i]] = readr::read_tsv(file = paste0("genome_summary/lst/", sample_names[i], ".txt"),
+					    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert()
+	}
+	x2 = do.call(bind_rows, x2)
+	
+	x3 = list()
+	for (i in 1:length(sample_names)) {
+		x3[[i]] = readr::read_tsv(file = paste0("genome_summary/ntai/", sample_names[i], ".txt"),
+					    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert()
+	}
+	x3 = do.call(bind_rows, x3)
+	
+	x4 = list()
+	for (i in 1:length(sample_names)) {
+		x4[[i]] = readr::read_tsv(file = paste0("genome_summary/myriad_score/", sample_names[i], ".txt"),
+					    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+			    readr::type_convert()
+	}
+	x4 = do.call(bind_rows, x4)
+	
+	data = x1 %>%
+	       dplyr::full_join(x2, by = "sample_name") %>%
+	       dplyr::full_join(x3, by = "sample_name") %>%
+	       dplyr::full_join(x4, by = "sample_name")
+	
+	readr::write_tsv(x = data, path = as.character(opt$file_out), append = FALSE, col_names = TRUE)
+}
diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk
index 45fbda98..35424cc8 100644
--- a/summary/genomesummary.mk
+++ b/summary/genomesummary.mk
@@ -1,25 +1,69 @@
 include modules/Makefile.inc
 
 LOGDIR ?= log/genome_summary.$(NOW)
-PHONY += genome_stats summary summary/tsv
 
-LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst))
-GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga))
-NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai))
-MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs))
 
-genome_summary : genome_stats/lst_score.tsv genome_stats/genome_altered.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv summary/tsv/genome_summary.tsv summary/genome_summary.xlsx
-
-genome_stats/lst_score.tsv genome_stats/genome_altered.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv summary/tsv/genome_summary.tsv : 
-	$(call RUN,-n 1 -s 4G -m 4G,"cat $(LST_SCORE) > genome_stats/lst_score.tsv && \
-				     			 cat $(GENOME_ALTERED) > genome_stats/genome_altered.tsv && \
-				     			 cat $(NTAI_SCORE) > genome_stats/ntai_score.tsv && \
-				     			 cat $(MYRIAD_SCORE) > genome_stats/myriad_score.tsv && \
-				     			 $(RSCRIPT) modules/summary/genomesummary.R")
-				     			 
-summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv
-	$(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py")
+genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \
+		 $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) \
+		 $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) \
+		 $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt) \
+		 genome_summary/summary.txt
+		 
+define fraction-genome-altered
+genome_summary/genome_altered/$1_$2.txt : facets/cncf/$1_$2.Rdata
+	$$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \
+				      $(RSCRIPT) modules/summary/genomesummary.R \
+				      --option 1 \
+				      --sample_name $1_$2 \
+				      --file_in $$(<) \
+				      --file_out $$(@)")
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+define lst-score
+genome_summary/lst/$1_$2.txt : facets/cncf/$1_$2.txt
+	$$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \
+				      $(RSCRIPT) modules/summary/genomesummary.R \
+				      --option 2 \
+				      --sample_name $1_$2 \
+				      --file_in $$(<) \
+				      --file_out $$(@)")
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+define ntai-score
+genome_summary/ntai/$1_$2.txt : facets/cncf/$1_$2.txt
+	$$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \
+				      $(RSCRIPT) modules/summary/genomesummary.R \
+				      --option 3 \
+				      --sample_name $1_$2 \
+				      --file_in $$(<) \
+				      --file_out $$(@)")
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair)))))
+		
+define myriad-score
+genome_summary/myriad_score/$1_$2.txt : facets/cncf/$1_$2.txt
+	$$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \
+				      $(RSCRIPT) modules/summary/genomesummary.R \
+				      --option 4 \
+				      --sample_name $1_$2 \
+				      --file_in $$(<) \
+				      --file_out $$(@)")
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair)))))
 
+genome_summary/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt)
+	$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \
+				     $(RSCRIPT) modules/summary/genomesummary.R \
+				      --option 5 \
+				      --sample_name '$(SAMPLE_PAIRS)' \
+				      --file_out $(@)")
+							 
 .DELETE_ON_ERROR:
 .SECONDARY:
-.PHONY: $(PHONY)
+.PHONY: genome_summary
diff --git a/summary/mutationSummary.mk b/summary/mutationsummary.mk
similarity index 100%
rename from summary/mutationSummary.mk
rename to summary/mutationsummary.mk
diff --git a/summary/sufamsummary.R b/summary/sufamsummary.R
deleted file mode 100644
index ef96e778..00000000
--- a/summary/sufamsummary.R
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("openxlsx"))
-suppressPackageStartupMessages(library("readr"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample_sets", default = NA, type = 'character', help = "sample sets file names"))
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-sample_names = na.omit(unlist(strsplit(x=opt$sample_sets, split=" ", fixed=TRUE)))
-list_of_dfs = list()
-for (i in 1:length(sample_names)) {
-	sample_vars = read_tsv(file=paste0("sufam/", sample_names[i], ".tsv"))
-	col_names = colnames(sample_vars)
-	sample_vars = as.data.frame(sample_vars)
-	sample_vars[sample_vars=="" | sample_vars==" " | is.na(sample_vars)] = "NA"
-	colnames(sample_vars) = col_names
-	list_of_dfs[[i]] = sample_vars
-}
-names(list_of_dfs) = sample_names
-write.xlsx(list_of_dfs, file="summary/sufam_summary.xlsx")
diff --git a/sv_callers/fusioncatcher.mk b/sv_callers/fusioncatcher.mk
index 946627bb..e7e3fde7 100644
--- a/sv_callers/fusioncatcher.mk
+++ b/sv_callers/fusioncatcher.mk
@@ -6,7 +6,7 @@ LOGDIR = log/fusioncatcher.$(NOW)
 ##### MAKE INCLUDES #####
 include modules/Makefile.inc
 
-FUSIONCATCHER = $(HOME)/share/usr/fusioncatcher/bin/fusioncatcher
+FUSIONCATCHER = $(HOME)/share/usr/fusioncatcher/fusioncatcher_v0.99.2/fusioncatcher
 FUSIONCATCHER_OPTS = -d $(HOME)/share/usr/fusioncatcher/data/current --extract-buffer-size=35000000000
 
 .DELETE_ON_ERROR:
diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk
new file mode 100644
index 00000000..a1f5a470
--- /dev/null
+++ b/sv_callers/gridss_tumor_normal.mk
@@ -0,0 +1,63 @@
+include modules/Makefile.inc
+
+LOGDIR = log/gridss_tumor_normal.$(NOW)
+
+GRIDSS_CORES ?= 8
+GRIDSS_MEM_CORE ?= 6G
+GRIDSS_REF ?= $(HOME)/share/lib/ref_files/b37/human_g1k_v37.fasta
+GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/example/ENCFF001TDO.bed
+GRIDSS ?= gridss
+GRIDSS_FILTER ?= gridss_somatic_filter
+GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon/
+
+gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) \
+	 $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf.bgz) \
+	 $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).gridss_sv.vcf) \
+	 $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/taskcomplete)
+
+define gridss-tumor-normal
+gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam
+	$$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV) -w 72:00:00,"set -o pipefail && \
+												    mkdir -p gridss/$1_$2 && \
+												    cd gridss/$1_$2 && \
+												    $$(GRIDSS) \
+												    -t $$(GRIDSS_CORES) \
+												    -r $$(GRIDSS_REF) \
+												    -o $1_$2.gridss_sv.vcf \
+												    -b $$(GRIDSS_BLACKLIST) \
+												    ../../bam/$2.bam \
+												    ../../bam/$1.bam")
+												    
+gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz : gridss/$1_$2/$1_$2.gridss_sv.vcf
+	$$(call RUN,-c -n 1 -s 12G -m 18G -v $(GRIDSS_ENV),"set -o pipefail && \
+							    cd gridss/$1_$2 && \
+							    $$(GRIDSS_FILTER) \
+							    --pondir $$(GRIDSS_PON_DIR) \
+							    --input $1_$2.gridss_sv.vcf \
+							    --output $1_$2.gridss_sv_ft.vcf \
+							    --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf \
+							    -n 1 \
+							    -t 2")
+
+vcf/$1_$2.gridss_sv.vcf : gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz
+	$$(INIT) zcat $$(<) > $$(@)
+	
+gridss/$1_$2/taskcomplete : vcf/$1_$2.gridss_sv.vcf
+	$$(INIT) rm -f gridss/$1_$2/$1.bam.gridss.working/$1.bam.sv.bam && \
+		 rm -f gridss/$1_$2/$1.bam.gridss.working/$1.bam.sv.bam.bai && \
+		 rm -f gridss/$1_$2/$2.bam.gridss.working/$2.bam.sv.bam && \
+		 rm -f gridss/$1_$2/$2.bam.gridss.working/$2.bam.sv.bam.bai && \
+		 rm -f gridss/$1_$2/$1_$2.gridss_sv.vcf.assembly.bam.gridss.working/FL001-101CD_FL001-101NL.gridss_sv.vcf.assembly.bam.sv.bam && \
+		 rm -f gridss/$1_$2/$1_$2.gridss_sv.vcf.assembly.bam.gridss.working/FL001-101CD_FL001-101NL.gridss_sv.vcf.assembly.bam.sv.bam.bai && \
+		 echo 'complete!' > $$(@)
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call gridss-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     echo 'gridss' > version/gridss_tumor_normal.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: gridss
diff --git a/sv_callers/manta.inc b/sv_callers/manta.inc
index 259fab2a..9def6aad 100644
--- a/sv_callers/manta.inc
+++ b/sv_callers/manta.inc
@@ -5,10 +5,10 @@ MANTA_HS_CONFIG = modules/sv_callers/manta_hs_config.py.ini
 MANTA_CONFIG = modules/sv_callers/manta_config.py.ini
 MANTA_HIGH_SENS ?= false
 CONFIG_MANTA_OPTS = --referenceFasta $(REF_FASTA) \
-					--config $(if $(findstring true,$(MANTA_HIGH_SENS)),\
-                            $(MANTA_HS_CONFIG),$(MANTA_CONFIG)) \
-                        $(if $(TARGETS_FILE),--exome) \
-                        $(if $(MANTA_REGION),--region $(MANTA_REGION))
+		    --config $(if $(findstring true,$(MANTA_HIGH_SENS)),\
+		    				    $(MANTA_HS_CONFIG),$(MANTA_CONFIG)) \
+		    $(if $(TARGETS_FILE),--exome) \
+		    $(if $(MANTA_REGION),--region $(MANTA_REGION))
 endif
 MANTA_INC = true
 
diff --git a/sv_callers/mantaTN.mk b/sv_callers/mantaTN.mk
deleted file mode 100644
index 7e5fd54f..00000000
--- a/sv_callers/mantaTN.mk
+++ /dev/null
@@ -1,39 +0,0 @@
-# run manta on tumour-normal matched pairs
-
-include modules/Makefile.inc
-include modules/sv_callers/manta.inc
-
-LOGDIR ?= log/manta.$(NOW)
-PHONY += manta manta_vcfs
-
-manta : manta_vcfs 
-
-manta_vcfs : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.eff.vcf vcf/$(pair).manta_indels.eff.vcf vcf/$(pair).manta_candidate_sv.eff.vcf)
-
-define manta-tumor-normal
-manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai
-	$$(INIT) $$(CONFIG_MANTA) $$(CONFIG_MANTA_OPTS) --tumorBam $$< --normalBam $$(<<) --runDir $$(@D) 
-
-manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py
-	$$(call RUN,-n 8 -s 2G -m 2G,"python $$< -m local -j 8 && touch $$@")
-
-manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp
-
-manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz : manta/$1_$2.manta_timestamp
-
-manta/$1_$2/results/variants/candidateSV.vcf.gz : manta/$1_$2.manta_timestamp
-
-vcf/$1_$2.manta_indels.vcf : manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz
-	$$(INIT) zcat $$< > $$@
-
-vcf/$1_$2.manta_sv.vcf : manta/$1_$2/results/variants/somaticSV.vcf.gz
-	$$(INIT) zcat $$< > $$@
-
-vcf/$1_$2.manta_candidate_sv.vcf : manta/$1_$2/results/variants/candidateSV.vcf.gz
-	$$(INIT) zcat $$< > $$@
-endef
-$(foreach pair,$(SAMPLE_PAIRS),$(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.PHONY: $(PHONY)
-
-include modules/vcf_tools/vcftools.mk
diff --git a/sv_callers/manta_tumor_normal.mk b/sv_callers/manta_tumor_normal.mk
new file mode 100644
index 00000000..1c24fa3e
--- /dev/null
+++ b/sv_callers/manta_tumor_normal.mk
@@ -0,0 +1,29 @@
+include modules/Makefile.inc
+include modules/sv_callers/manta.inc
+
+LOGDIR ?= log/manta_tumor_normal.$(NOW)
+
+manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf)
+
+define manta-tumor-normal
+manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai
+	$$(INIT) $$(CONFIG_MANTA) $$(CONFIG_MANTA_OPTS) --tumorBam $$(<) --normalBam $$(<<) --runDir $$(@D) 
+
+manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py
+	$$(call RUN,-n 8 -s 2G -m 4G -w 72:00:00,"set -o pipefail && \
+						  python $$(<) -m local -j 8 && touch $$(@)")
+
+manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp
+
+vcf/$1_$2.manta_sv.vcf : manta/$1_$2/results/variants/somaticSV.vcf.gz
+	$$(INIT) zcat $$(<) > $$(@)
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS), \
+	$(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+
+..DUMMY := $(shell mkdir -p version; \
+	     python --version &> version/manta_tumor_normal.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: manta
diff --git a/sv_callers/svaba_tumor_normal.mk b/sv_callers/svaba_tumor_normal.mk
new file mode 100644
index 00000000..cbb03b34
--- /dev/null
+++ b/sv_callers/svaba_tumor_normal.mk
@@ -0,0 +1,42 @@
+include modules/Makefile.inc
+
+LOGDIR = log/svaba_tumor_normal.$(NOW)
+
+SVABA_CORES ?= 8
+SVABA_MEM_CORE ?= 6G
+SVABA_REF ?= $(REF_FASTA)
+SVABA_DBSNP ?= $(HOME)/share/lib/resource_files/svaba/dbsnp_indel.vcf
+SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.bed
+SVABA ?= svaba
+
+svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf)
+
+define svaba-tumor-normal
+svaba/$1_$2.svaba.somatic.sv.vcf : bam/$1.bam bam/$2.bam
+	$$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \
+												 mkdir -p svaba && \
+										 		 cd svaba && \
+												 $$(SVABA) run \
+												 -t ../bam/$1.bam \
+												 -n ../bam/$2.bam \
+												 -p $$(SVABA_CORES) \
+												 -D $$(SVABA_DBSNP) \
+												 -L 100000 \
+												 -x 25000 \
+												 -k $$(SVABA_BLACKLIST) \
+												 -a $1_$2 \
+												 -G $$(SVABA_REF)")
+
+vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf
+	$$(INIT) cat $$< > $$@
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call svaba-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(SVABA) --help &> version/svaba_tumor_normal.txt)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: svaba
diff --git a/test/clonality/reportpyclone.R b/test/clonality/reportpyclone.R
deleted file mode 100644
index d9770bb5..00000000
--- a/test/clonality/reportpyclone.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-suppressPackageStartupMessages(library("ggplot2"))
-
-optList = list(make_option("--sample_name", default = NULL, help = "tumor normal sample name"))
-
-parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-tumor_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[1]
-normal_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[2]
-
-in_file = list(
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/", tumor_sample,".tsv"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/pyclone.tsv")
-)
-out_file = list(
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_std_by_cid.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_ccf_by_cid.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_std_by_cn.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_ccf_by_cn.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_vaf_by_cn.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_depth_by_cn.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/scatter_vaf_depth_by_cn.pdf"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/summary.tsv"),
-	paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/clusters.tsv")
-)
-
-mutation_summary = read_tsv(file=in_file[[1]], col_types = cols(.default = col_character())) %>%
-				   type_convert() %>%
-				   mutate(total_cn = factor(minor_cn+major_cn)) %>%
-				   mutate(DP = var_counts+ref_counts) %>%
-				   mutate(VAF = 100*var_counts/(var_counts+ref_counts))
-
-pyclone_summary = read_tsv(file=in_file[[2]], col_types = cols(.default = col_character()), col_names = c("mutation_id", "ccf", "std", "cluster_id")) %>%
-				  type_convert() %>%
-				  mutate(cluster_id = factor(cluster_id)) %>%
-				  mutate(ccf = as.numeric(ccf)) %>%
-				  mutate(std = as.numeric(std)) %>%
-				  slice(-1)
-			  
-mutation_summary = full_join(mutation_summary, pyclone_summary, by="mutation_id")
-
-plot.0 =  ggplot(mutation_summary, aes(x=std, fill=cluster_id)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x=expression(sigma), y="Frequency\n") +
-		  guides(fill=guide_legend(title=c("Cluster")))
-		  
-pdf(file=out_file[[1]], width=6, height=6)
-print(plot.0)
-dev.off()
-		 
-plot.0 =  ggplot(mutation_summary, aes(x=ccf, fill=cluster_id)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x="\nCCF\n", y="Frequency\n") +
-		  coord_cartesian(xlim=c(0,1)) +
-		  guides(fill=guide_legend(title=c("Cluster")))
-pdf(file=out_file[[2]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-plot.0 =  ggplot(mutation_summary, aes(x=std, fill=total_cn)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x=expression(sigma), y="Frequency\n") +
-		  guides(fill=guide_legend(title=c("Copy number")))
-
-pdf(file=out_file[[3]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-plot.0 =  ggplot(mutation_summary, aes(x=ccf, fill=total_cn)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x="\nCCF\n", y="Frequency\n") +
-		  coord_cartesian(xlim=c(0,1)) +
-		  guides(fill=guide_legend(title=c("Copy number")))
-
-pdf(file=out_file[[4]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-plot.0 =  ggplot(mutation_summary, aes(x = VAF, fill=total_cn)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x="\nVAF(%)\n", y="Frequency\n") +
-		  coord_cartesian(xlim=c(0,100)) +
-		  guides(fill=guide_legend(title=c("Copy number")))
-		  
-pdf(file=out_file[[5]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-plot.0 =  ggplot(mutation_summary, aes(x = DP, fill=total_cn)) +
-		  geom_histogram(alpha = .8) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x="\nDP\n", y="Frequency\n") +
-		  guides(fill=guide_legend(title=c("Copy number")))
-		  
-pdf(file=out_file[[6]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-plot.0 =  ggplot(mutation_summary, aes(x = VAF, y = DP, fill=total_cn)) +
-		  geom_point(alpha=.85, size=2.5, shape=21) +
-		  theme_classic() +
-		  theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) +
-		  labs(x="\nVAF (%)\n", y="DP\n") +
-		  scale_x_log10() +
-		  annotation_logticks(side="b") +
-		  coord_cartesian(xlim=c(5,100)) +
-		  guides(fill=guide_legend(title=c("Copy number")))
-		  
-pdf(file=out_file[[7]], width=6, height=6)
-print(plot.0)
-dev.off()
-
-
-tmp = mutation_summary %>%
-	  group_by(cluster_id) %>%
-	  summarize(
-	  		n = n(),
-	  		mean_ccf = mean(ccf),
-	    	median_ccf = median(ccf),
-	    	std_ccf = sd(ccf),
-	    	min_ccf = min(ccf),
-	    	max_ccf = max(ccf),
-	    	mean_sd = mean(std),
-	    	median_sd = median(std),
-	    	std_sd = sd(std),
-	    	min_sd = min(std),
-	    	max_sd = max(std))
-	    	
-write_tsv(x=mutation_summary, path=out_file[[8]])
-write_tsv(x=tmp, path=out_file[[9]])
diff --git a/test/clonality/tsvtopyclone.R b/test/clonality/tsvtopyclone.R
deleted file mode 100644
index e46b636b..00000000
--- a/test/clonality/tsvtopyclone.R
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-suppressPackageStartupMessages(library("ggplot2"))
-
-optList = list(make_option("--sample_name", default = NULL, help = "tumor normal sample name"))
-
-parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-tumor_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[1]
-normal_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[2]
-
-mutation_summary = read_tsv(file="summary/tsv/mutation_summary.tsv", col_types = cols(.default = col_character())) %>%
-				   type_convert() %>%
-				   filter(TUMOR_SAMPLE==tumor_sample) %>%
- 				   filter(NORMAL_SAMPLE==normal_sample) %>%
- 				   filter(grepl("mutect", variantCaller, fixed=TRUE)) %>%
- 				   filter(NORMAL_MAF==0) %>%
- 				   filter(TUMOR_MAF>=.05) %>%
- 				   filter(TUMOR_DP<=500) %>%
- 				   filter(TUMOR_DP>=20) %>%
- 				   filter(NORMAL_DP<=500) %>%
- 				   filter(NORMAL_DP>=10) %>%
- 				   mutate(CHROM = as.numeric(ifelse(CHROM=="X", 23, CHROM))) %>%
- 				   mutate(CHROM = as.numeric(ifelse(CHROM=="Y", 24, CHROM))) %>%
- 				   filter(CHROM<=22) %>%
- 				   mutate(UUID = paste0(CHROM, ":", POS, "_", REF, "_", ALT))
-
-load(paste0("facets/cncf/", opt$sample_name, ".Rdata"))
-qt = q1 = rep(NA, nrow(mutation_summary))
-for (i in 1:nrow(mutation_summary)) {
-	x = mutation_summary$CHROM[i]
-	y = mutation_summary$POS[i]
-	indx = which(fit$cncf[,"chrom"]==x & (fit$cncf[,"start"]<=y & fit$cncf[,"end"]>=y))
-	if (length(indx)!=0) {
-		qt[i] = fit$cncf[indx,"tcn.em"]
-		q1[i] = fit$cncf[indx,"lcn.em"]
-	}
-}
-fsq = as.numeric(mutation_summary$TUMOR_MAF)
-n = as.numeric(mutation_summary$TUMOR_DP)
-mutation_id = as.character(mutation_summary$UUID)
-var_counts = round(fsq*n)
-ref_counts = round((1-fsq)*n)
-normal_cn = rep(2, nrow(mutation_summary))
-minor_cn = q1
-major_cn = qt-q1
-sample_summary = data.frame(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn)
-index = apply(sample_summary, 1, function(x) {any(is.na(x))})
-sample_summary = sample_summary[!index,,drop=FALSE]
-index = sample_summary[,"major_cn"]==0
-sample_summary = sample_summary[!index,,drop=FALSE]
-write.table(sample_summary, paste0("pyclone/", opt$sample_name, "/", tumor_sample,".tsv"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE, append=FALSE)
-
-cat("num_iters: 100000\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = FALSE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("base_measure_params:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  alpha: 1\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  beta: 1\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("concentration:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  value: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  prior:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    shape: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    rate: 0.001\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("density: pyclone_beta_binomial\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("beta_binomial_precision_params:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  value: 1000\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  prior:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    shape: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    rate: 0.0001\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("  proposal:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    precision: 0.5\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat(paste0("working_dir: pyclone/",opt$sample_name, "\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("trace_dir: trace", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("init_method: connected\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("samples:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-
-cat(paste0("  ", tumor_sample, ":\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat(paste0("    mutations_file: ", tumor_sample, ".yaml\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    tumour_content:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat(paste0("      value: ", ifelse(is.na(fit$purity), 1.0, signif(fit$purity, 2)),"\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-cat("    error_rate: 0.01", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE)
-system(paste0("source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/PyClone-0.13.1 && PyClone build_mutations_file --in_file pyclone/",  opt$sample_name, "/", tumor_sample, ".tsv --out_file pyclone/", opt$sample_name, "/", tumor_sample, ".yaml  --prior parental_copy_number"))
diff --git a/test/copy_number/qdnaseqcopynumber.mk b/test/copy_number/qdnaseqcopynumber.mk
deleted file mode 100755
index 6a8a9e9c..00000000
--- a/test/copy_number/qdnaseqcopynumber.mk
+++ /dev/null
@@ -1,29 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/qdnaseq_copynumber.$(NOW)
-PHONY += qdnaseq qdnaseq/copynumber qdnaseq/copynumber/log2ratio qdnaseq/copynumber/segmented qdnaseq/copynumber/pcf
-
-qdnaseq_copynumber : $(foreach sample,$(SAMPLES),qdnaseq/copynumber/log2ratio/$(sample).pdf qdnaseq/copynumber/segmented/$(sample).RData qdnaseq/copynumber/pcf/$(sample).pdf)
-
-define qdnaseq-plot-log2ratio
-qdnaseq/copynumber/log2ratio/%.pdf : qdnaseq/bed/%.bed
-	$$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 10G -m 12G,"$(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'raw'")
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call qdnaseq-plot-log2ratio,$(sample))))
-		
-define qdnaseq-segment-log2ratio
-qdnaseq/copynumber/segmented/%.RData : qdnaseq/bed/%.bed
-	$$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 12G -m 16G,"$(RSCRIPT) modules/test/copy_number/qdnaseqsegment.R --sample $$(*)")
-	
-qdnaseq/copynumber/pcf/%.pdf : qdnaseq/copynumber/segmented/%.RData
-	$$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 12G -m 16G,"$(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'bychromosome' --rho '$${qdnaseq_rho.$1}' --psi '$${qdnaseq_psi.$1}' --gamma '$${qdnaseq_gamma.$1}' && \
-																	 $(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'segmented' --rho '$${qdnaseq_rho.$1}' --psi '$${qdnaseq_psi.$1}' --gamma '$${qdnaseq_gamma.$1}'")
-
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call qdnaseq-segment-log2ratio,$(sample))))
-		
-
-.PHONY: $(PHONY)
diff --git a/test/copy_number/qdnaseqextract.R b/test/copy_number/qdnaseqextract.R
deleted file mode 100755
index 696b84b8..00000000
--- a/test/copy_number/qdnaseqextract.R
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("QDNAseq"))
-suppressPackageStartupMessages(library("future"))
-
-future::plan("multiprocess")
-options(mc.cores=16L)
-
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-if (!dir.exists("qdnaseq/readcounts")) {
-	dir.create("qdnaseq/readcounts")
-}
-
-if (!dir.exists("qdnaseq/isobars")) {
-	dir.create("qdnaseq/isobars")
-}
-
-if (!dir.exists("qdnaseq/variance")) {
-	dir.create("qdnaseq/variance")
-}
-
-if (!dir.exists("qdnaseq/log2ratio")) {
-	dir.create("qdnaseq/log2ratio")
-}
-
-if (!dir.exists("qdnaseq/bed")) {
-	dir.create("qdnaseq/bed")
-}
-
-args_list = list(make_option("--sample", default = NA, type = 'character', help = "sample name"),
-				 make_option("--binsize", default = NA, type = 'character', help = "bin size"))
-				  
-parser = OptionParser(usage = "%prog", option_list = args_list)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-if (is.na(as.numeric(opt$binsize))) {
-	opt$binsize = 30
-} else {
-	opt$binsize = as.numeric(opt$binsize)
-}
-
-bins = getBinAnnotations(binSize=opt$binsize, genome="hg19")
-readCounts = binReadCounts(bins=bins, bamfiles=paste0("bam/", opt$sample, ".bam"),
-						   isPaired=TRUE,
-       					   isProperPair=TRUE,
-        				   minMapq=30,
-        				   pairedEnds=TRUE,
-        				   chunkSize=TRUE)
-       
-# read counts versus genomic coordinates
-pdf(file=paste0("qdnaseq/readcounts/", opt$sample, ".pdf"), width=14, height=9)
-plot(readCounts, logTransform=TRUE, ylim=c(0, 20))
-highlightFilters(readCounts, logTransform=TRUE, residual=TRUE, blacklist=TRUE)
-dev.off()
-
-readCountsFiltered = applyFilters(readCounts, residual=TRUE, blacklist=TRUE)
-
-# %GC content versus mappability
-pdf(file=paste0("qdnaseq/isobars/", opt$sample, ".pdf"), width=7, height=7)
-isobarPlot(readCountsFiltered)
-dev.off()
-
-readCountsFiltered = estimateCorrection(readCountsFiltered)
-
-# noise (variance) versus bin coverage
-pdf(file=paste0("qdnaseq/variance/", opt$sample, ".pdf"), width=7, height=7)
-noisePlot(readCountsFiltered)
-dev.off()
-
-copyNumbers = correctBins(readCountsFiltered)
-copyNumbersNormalized = normalizeBins(copyNumbers)
-copyNumbersSmooth = smoothOutlierBins(copyNumbersNormalized)
-
-# log2 ratio versus genomic coordinates
-pdf(file=paste0("qdnaseq/log2ratio/", opt$sample, ".pdf"), width=14, height=9)
-plot(copyNumbersSmooth, ylim=c(-4,4))
-dev.off()
-
-# write log2 ratio to file
-exportBins(copyNumbersSmooth, file=paste0("qdnaseq/bed/", opt$sample, ".bed"), format="bed")
diff --git a/test/copy_number/qdnaseqextract.mk b/test/copy_number/qdnaseqextract.mk
deleted file mode 100755
index 7eab2d2c..00000000
--- a/test/copy_number/qdnaseqextract.mk
+++ /dev/null
@@ -1,22 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/qdnaseq_extract.$(NOW)
-PHONY += qdnaseq qdnaseq/readcounts qdnaseq/isobars qdnaseq/variance qdnaseq/log2ratio qdnaseq/bed
-
-qdnaseq_extract : $(foreach sample,$(SAMPLES),qdnaseq/readcounts/$(sample).pdf qdnaseq/isobars/$(sample).pdf qdnaseq/variance/$(sample).pdf qdnaseq/log2ratio/$(sample).pdf qdnaseq/bed/$(sample).bed)
-
-DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6
-QDNASEQ_ENV = $(HOME)/share/usr/anaconda-envs/qdnaseq
-QDNASEQ_BINSIZE = 5
-
-define qdnaseq-log2ratio
-qdnaseq/readcounts/%.pdf qdnaseq/isobars/%.pdf qdnaseq/variance/%.pdf qdnaseq/log2ratio/%.pdf qdnaseq/bed/%.bed : bam/%.bam
-	$$(call RUN,-c -n 16 -s 2G -m 3G -w 7200 -v $$(DEFAULT_ENV),"source activate $$(QDNASEQ_ENV) && \
-																 $$(RSCRIPT) modules/test/copy_number/qdnaseqextract.R --sample $$(*) --binsize $(QDNASEQ_BINSIZE)")
-
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call qdnaseq-log2ratio,$(sample))))
-	
-.PHONY: $(PHONY)
diff --git a/test/copy_number/qdnaseqplot.R b/test/copy_number/qdnaseqplot.R
deleted file mode 100755
index 86f8ef97..00000000
--- a/test/copy_number/qdnaseqplot.R
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("copynumber"))
-suppressPackageStartupMessages(library("colorspace"))
-suppressPackageStartupMessages(library("ASCAT"))
-load("modules/copy_number/CytoBand.RData")
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list = list(make_option("--sample", default = NA, type = 'character', help = "tumor sample"),
-				 make_option("--type", default = NA, type = 'character', help = "type of plot"),
-				 make_option("--rho", default = NA, type = 'numeric', help = "tumor purity"),
-				 make_option("--psi", default = NA, type = 'numeric', help = "tumor ploidy"),
-				 make_option("--gamma", default = NA, type = 'numeric', help = "log2 ratio compression"))
-				  
-parser = OptionParser(usage = "%prog", option_list = args_list)
-arguments = parse_args(parser, positional_arguments = T)
-opt = arguments$options
-
-opt$rho = ifelse(is.na(as.numeric(opt$rho)), 1, as.numeric(opt$rho))
-opt$psi = ifelse(is.na(as.numeric(opt$psi)), 2, as.numeric(opt$psi))
-opt$gamma = ifelse(is.na(as.numeric(opt$gamma)), 1, as.numeric(opt$gamma))
-
-load("modules/copy_number/CytoBand.RData")
-
-'prunesegments.cn' <- function(x, n=10)
-{
-	cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x))
-	for (j in 1:nrow(x)) {
-		cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"])
-	}
-	cnt = hclust(as.dist(cnm), "average")
-	cnc = cutree(tree=cnt, k=n)
-	for (j in unique(cnc)) {
-		indx = which(cnc==j)
-		if (length(indx)>2) {
-			mcl = mean(x[indx,"Log2Ratio"])
-			scl = sd(x[indx,"Log2Ratio"])
-			ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl))
-			x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"])
-		} else {
-			x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"])
-		}
-	}
-	return(invisible(x))
-}
-
-if (opt$type=="raw") {
-
-	infile = paste0("qdnaseq/bed/", opt$sample, ".bed")
-	outfile = paste0("qdnaseq/copynumber/log2ratio/", opt$sample, ".pdf")
-	data = read.table(file=infile, header=FALSE, sep="\t", skip=1, stringsAsFactors=FALSE)[,c(1,2,3,5),drop=FALSE]
-	colnames(data) = c("Chromosome", "Start", "End", "Log2Ratio")
-	pdf(file=outfile, width=10, height=4.25)
-	par(mar=c(5, 5, 4, 2)+.1)
-	end = NULL
-	for (j in 1:22) {
-		end = c(end, max(CytoBand$End[CytoBand$Chromosome==j]))
-	}
-	end = cumsum(end)
-	start = rep(0, 22)
-	start[2:22] = end[1:21]+1
-	for (j in 1:22) {
-		data[data[,"Chromosome"]==j,"Start"] = data[data[,"Chromosome"]==j,"Start"] + start[j]
-	}
-	col = rep("grey75", nrow(data))
-	plot(data[,"Start"], data[,"Log2Ratio"], type="p", pch=".", cex=1.95, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5))
-	axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1)
-	mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25)
-	for (j in 1:22) {
-		v = start[j]
-		abline(v=v, col="goldenrod3", lty=3, lwd=1)
-	}
-	abline(v=max(data[,"Start"]), col="goldenrod3", lty=3, lwd=1)
-	abline(h=0, col="red")
-	axis(1, at = .5*(start+end), labels=c(1:22), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(data[,"Start"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
-	title(main = opt$sample, line=-1, cex.main=.75, font.main=1)
-    box(lwd=1.5)
-	dev.off()
-	
-} else if (opt$type=="segmented") {
-
-	infile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData")
-	outfile = paste0("qdnaseq/copynumber/pcf/", opt$sample, ".pdf")
-	load(infile)
-	
-	segmented = prunesegments.cn(x=segmented, n=7)
-	end = NULL
-	for (j in 1:22) {
-		end = c(end, max(CytoBand$End[CytoBand$Chromosome==j]))
-	}
-	end = cumsum(end)
-	start = rep(0, 22)
-	start[2:22] = end[1:21]+1
-	for (j in 1:22) {
-		segmented[segmented[,"Chromosome"]==j,"Start"] = segmented[segmented[,"Chromosome"]==j,"Start"] + start[j]
-		segmented[segmented[,"Chromosome"]==j,"End"] = segmented[segmented[,"Chromosome"]==j,"End"] + start[j]
-		data[data[,"Chromosome"]==j,"Start"] = data[data[,"Chromosome"]==j,"Start"] + start[j]
-	}
-	col = "grey75"
-	pdf(file=outfile, width=10, height=4.25)
-	par(mar=c(5, 5, 4, 2)+.1)
-	plot(data[,"Start"], data[,"Log2Ratio"], type="p", pch=".", cex=1.95, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5))
- 	axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1)
- 	for (j in 1:nrow(segmented)) {
- 		lines(x=c(segmented[j,"Start"], segmented[j,"End"]), y=rep(segmented[j,"Log2Ratio"],2), lty=1, lwd=2.75, col="red")
- 	} 	
- 	mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25)
-	for (j in 1:22) {
-		v = start[j]
-		abline(v=v, col="goldenrod3", lty=3, lwd=1)
-	}
-	abline(v=max(data[,"Start"]), col="goldenrod3", lty=3, lwd=1)
-	abline(h=0, col="red")
-	axis(1, at = .5*(start+end), labels=c(1:22), cex.axis = 0.85, las = 1)
-    rect(xleft=1-1e10, xright=max(data[,"Start"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5)
-	title(main = opt$sample, line=-1, cex.main=.75, font.main=1)
-	for (k in c(1,2,3,4,6,9)) {
-		abline(h=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), col="brown", lty=3, cex=.5)
-		mtext(text=k, side=4, line=.5, at=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), las=2, cex=.5, col="brown")
-	}
-	box(lwd=1.5)
-	dev.off()
-
-} else if (opt$type=="bychromosome") {
-
-	infile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData")
-	if (!dir.exists("qdnaseq/copynumber/bychr/")) {
-		dir.create("qdnaseq/copynumber/bychr/")
-	}
-	if (!dir.exists(paste0("qdnaseq/copynumber/bychr/", opt$sample, "/"))) {
-		dir.create(paste0("qdnaseq/copynumber/bychr/", opt$sample, "/"))
-	}
-	load(infile)
-	segmented = prunesegments.cn(x=segmented, n=7)
-	for (ii in 1:22) {
-		pdf(file=paste0("qdnaseq/copynumber/bychr/", opt$sample, "/", ii, ".pdf"))
-		zz = split.screen(figs=matrix(c(0,1,.15,1, 0.065,.975,0.1,.4), nrow=2, ncol=4, byrow=TRUE))
-		screen(zz[1])
-		par(mar = c(6.1, 6, 4.1, 3))
-		start = 1
-		end = max(CytoBand[CytoBand[,"Chromosome"]==ii,"End"])
-		plot(1, 1, type="n", xlim=c(start,end), ylim=c(-4,4), xlab="", ylab="", main="", frame.plot=FALSE, axes=FALSE)
-		index = data[,"Chromosome"]==ii
-		points(data[index,"Start"], data[index,"Log2Ratio"], type="p", pch=".", cex=1.15, col="grey75")
-		tmp = subset(segmented, segmented[,"Chromosome"]==ii)
-		for (i in 1:nrow(tmp)) {
-			points(c(tmp[i,"Start"], tmp[i,"End"]), rep(tmp[i,"Log2Ratio"],2), type="l", col="red", lwd=4)
-		}
-		for (i in 1:(nrow(tmp)-1)) {
-			points(c(tmp[i,"End"], tmp[i+1,"Start"]), c(tmp[i,"Log2Ratio"],tmp[i+1,"Log2Ratio"]), type="l", col="red", lwd=1)
-		}
-		abline(h=0, lwd=1)
-		axis(2, at = c(-4,-2,0,2,4), labels=c("-4","-2","0","2", "4"), cex.axis = 1.25, las = 1, lwd=1.5, lwd.ticks=1.35)
-		mtext(side = 2, text = expression("Log"[2]~"Ratio"), line = 4, cex = 1.5)
-		for (k in c(1,2,3,4,6,9)) {
-			abline(h=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), col="darkorange", lty=3)
-			mtext(text=k, side=4, line=.5, at=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), las=2, cex=.75, col="darkorange")
-		}
-		box(lwd=2)
-		screen(zz[2])
-		arg = copynumber:::getPlotParameters(type = "sample", nSeg = 10, cr = 3 * 3, sampleID = "dummy", plot.ideo = TRUE, xaxis = TRUE, assembly = "hg19")
-		copynumber:::plotIdeogram(chrom=ii, TRUE, cyto.data = arg$assembly, cex = .75, unit = "bp")
-		close.screen(all.screens=TRUE)
-		dev.off()
-	}
-	
-}
diff --git a/test/copy_number/qdnaseqsegment.R b/test/copy_number/qdnaseqsegment.R
deleted file mode 100644
index 459cdaf0..00000000
--- a/test/copy_number/qdnaseqsegment.R
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("copynumber"))
-suppressPackageStartupMessages(library("colorspace"))
-suppressPackageStartupMessages(library("ASCAT"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample", default = NA, type = 'character', help = "sample name"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-infile = paste0("qdnaseq/bed/", opt$sample, ".bed")
-outfile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData")
-data = read.table(file=infile, header=FALSE, sep="\t", skip=1, stringsAsFactors=FALSE)[,c(1,2,3,5),drop=FALSE]
-colnames(data) = c("Chromosome", "Start", "End", "Log2Ratio")
-segmented = pcf(data=winsorize(data=data[,c("Chromosome", "Start", "Log2Ratio"),drop=FALSE], method="mad", tau=2.5, k=25, verbose=FALSE), kmin = 100, gamma = 150, fast=FALSE, verbose=FALSE)[,2:7,drop=FALSE]
-colnames(segmented) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio")
-save(data, segmented, file=outfile)
diff --git a/test/phylogeny/bootstrapmedicc.R b/test/phylogeny/bootstrapmedicc.R
deleted file mode 100755
index 41d94e00..00000000
--- a/test/phylogeny/bootstrapmedicc.R
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-if (!dir.exists(paste0("medicc/boot/allele_specific/", opt$sample_set))) {
-	dir.create(paste0("medicc/boot/allele_specific/", opt$sample_set))
-}
-
-load(paste0("medicc/aspcf/", opt$sample_set, ".RData"))
-q1 = qt-q2
-index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) })
-q2 = q2[index,,drop=FALSE]
-q1 = q1[index,,drop=FALSE]
-tmp = tmp[index,,drop=FALSE]
-q2[q2>4] = 4
-q1[q1>4] = 4
-
-if (ncol(q2)<3) {
-	q1x = q1
-	colnames(q1x) = paste0(colnames(q1), "_pad00")
-	q1 = cbind(q1, q1x)
-	q2x = q2
-	colnames(q2x) = paste0(colnames(q2), "_pad00")
-	q2 = cbind(q2, q2x)
-}
-
-set.seed(0)
-for (ii in 1:100) {
-	n = nchar(ii)
-	if (n==1) {
-		n = paste0("00", ii)
-	} else if (n==2) {
-		n = paste0("0", ii)
-	} else {
-		n = ii
-	}
-	index = order(sample(x=1:nrow(tmp), size=nrow(tmp), replace=TRUE))
-	q2_b = q2[index,,drop=FALSE]
-	q1_b = q1[index,,drop=FALSE]
-	tmp_b = tmp[index,,drop=FALSE]
-	desc = cbind(paste0("chrom", unique(tmp_b[,"Chromosome"])),
-			 	 paste0("major_chr", unique(tmp_b[,"Chromosome"]), ".fasta"),
-			 	 paste0("minor_chr", unique(tmp_b[,"Chromosome"]), ".fasta"))
-	if (!dir.exists(paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n))) {
-		dir.create(paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n))
-	}
-	write.table(desc, file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE)
-	for (i in unique(tmp[,"Chromosome"])) {
-		cat(">diploid\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q2_b)) {
-			cat(paste0(">", gsub("-", "_", colnames(q2_b)[j]), "\n"), file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q2_b[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE)
-		}
-		cat(">diploid\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q1_b)) {
-			cat(paste0(">", gsub("-", "_", colnames(q1_b)[j]), "\n"), file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q1_b[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE)
-		}
-	}
-	if (ii==100) {
-		cat("done!", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/init.timestamp"))
-	}
-}
diff --git a/test/phylogeny/combinesamples.R b/test/phylogeny/combinesamples.R
deleted file mode 100644
index 776a708b..00000000
--- a/test/phylogeny/combinesamples.R
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set"),
-				  	make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"),
-				  	make_option("--type", default = NA, type = 'character', help = "allele specific or total copy")
-				 )
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)))
-normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)))
-normal_samples = normal_samples[normal_samples %in% all_samples]
-tumor_samples = all_samples[!(all_samples %in% normal_samples)]
-
-if (opt$type=="allele_specific") {
-
-	CN = list()
-	for (i in 1:length(tumor_samples)) {
-		load(paste0("facets/cncf/", tumor_samples[i], "_", normal_samples, ".Rdata"))
-		CN[[i]] = out2$jointseg[,c("chrom", "maploc", "cnlr", "vafT", "het"),drop=FALSE]
-		colnames(CN[[i]]) = c("Chromosome", "Position", "Log2Ratio", "BAF", "Genotype")
-	}
-	index = lapply(CN, function(x) {paste0(x[,1], ":", x[,2])})
-	featureNames = unique(unlist(index))
-	for (i in 1:length(index)) {
-		featureNames = intersect(featureNames, index[[i]])
-	}
-	chr = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[1] })))
-	pos = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[2] })))
-	index = order(pos, decreasing=FALSE)
-	chr = chr[index]
-	pos = pos[index]
-	index = order(chr, decreasing=FALSE)
-	chr = chr[index]
-	pos = pos[index]
-	featureNames = paste0(chr, ":", pos)
-	for (i in 1:length(CN)) {
-		rownames(CN[[i]]) = paste0(CN[[i]][,1], ":", CN[[i]][,2])
-		CN[[i]] = CN[[i]][featureNames,,drop=FALSE]
-	}
-	Log2Ratio = do.call(cbind, lapply(CN, function(x) { return(x[,"Log2Ratio"]) } ))
-	BAF = do.call(cbind, lapply(CN, function(x) { return(x[,"BAF"]) } ))
-	Genotype = do.call(cbind, lapply(CN, function(x) { return(x[,"Genotype"]) } ))
-	annotation = data.frame(Chromosome=chr,
-							Position=pos)
-	colnames(Log2Ratio) = colnames(BAF) = tumor_samples
-	save(Log2Ratio, BAF, Genotype, annotation, file=paste0("medicc/allele_specific/mad/", opt$sample_set, ".RData"))
-	
-} else if (opt$type=="total_copy") {
-
-	CN = list()
-	for (i in 1:length(tumor_samples)) {
-		load(paste0("facets/cncf/", tumor_samples[i], "_", normal_samples, ".Rdata"))
-		CN[[i]] = out2$jointseg[,c("chrom", "maploc", "cnlr"),drop=FALSE]
-		colnames(CN[[i]]) = c("Chromosome", "Position", "Log2Ratio")
-	}
-	index = lapply(CN, function(x) {paste0(x[,1], ":", x[,2])})
-	featureNames = unique(unlist(index))
-	for (i in 1:length(index)) {
-		featureNames = intersect(featureNames, index[[i]])
-	}
-	chr = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[1] })))
-	pos = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[2] })))
-	index = order(pos, decreasing=FALSE)
-	chr = chr[index]
-	pos = pos[index]
-	index = order(chr, decreasing=FALSE)
-	chr = chr[index]
-	pos = pos[index]
-	featureNames = paste0(chr, ":", pos)
-	for (i in 1:length(CN)) {
-		rownames(CN[[i]]) = paste0(CN[[i]][,1], ":", CN[[i]][,2])
-		CN[[i]] = CN[[i]][featureNames,,drop=FALSE]
-	}
-	Log2Ratio = do.call(cbind, lapply(CN, function(x) { return(x[,"Log2Ratio"]) } ))
-	annotation = data.frame(Chromosome=chr,
-							Position=pos)
-	colnames(Log2Ratio) = tumor_samples
-	save(Log2Ratio, annotation, file=paste0("medicc/total_copy/mad/", opt$sample_set, ".RData"))
-	
-}
\ No newline at end of file
diff --git a/test/phylogeny/initmedicc.R b/test/phylogeny/initmedicc.R
deleted file mode 100755
index 22984e30..00000000
--- a/test/phylogeny/initmedicc.R
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set"),
-					make_option("--type", default = NA, type = 'character', help = "allele specific or total copy")
-				 )
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-if (opt$type=="allele_specific") {
-
-	load(paste0("medicc/allele_specific/aspcf/", opt$sample_set, ".RData"))
-	q1 = qt-q2
-	index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) })
-	q2 = q2[index,,drop=FALSE]
-	q1 = q1[index,,drop=FALSE]
-	tmp = tmp[index,,drop=FALSE]
-	q2[q2>4] = 4
-	q1[q1>4] = 4
-	
-	if (ncol(q2)<3) {
-		q1x = q1
-		colnames(q1x) = paste0(colnames(q1), "_pad00")
-		q1 = cbind(q1, q1x)
-		q2x = q2
-		colnames(q2x) = paste0(colnames(q2), "_pad00")
-		q2 = cbind(q2, q2x)
-	}
-	
-	desc = cbind(paste0("chrom", unique(tmp[,"Chromosome"])),
-				 paste0("major_chr", unique(tmp[,"Chromosome"]), ".fasta"),
-				 paste0("minor_chr", unique(tmp[,"Chromosome"]), ".fasta"))
-	write.table(desc, file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE)
-	for (i in unique(tmp[,"Chromosome"])) {
-		cat(">diploid\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q2)) {
-			cat(paste0(">", gsub("-", "_", colnames(q2)[j]), "\n"), file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q2[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-		}
-	
-	
-		cat(">diploid\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q1)) {
-			cat(paste0(">", gsub("-", "_", colnames(q1)[j]), "\n"), file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q1[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-		}
-	}
-} else if (opt$type=="total_copy") {
-	
-	load(paste0("medicc/total_copy/mpcf/", opt$sample_set, ".RData"))
-	ploidy = round(apply(((tmp[,"End"]-tmp[,"Start"])*qt)/sum(tmp[,"End"]-tmp[,"Start"]), 2, sum))
-	ploidy[ploidy>=4] = 4
-	ploidy[ploidy<=2] = 2
-	if (length(unique(ploidy))>1) {
-		index = which(ploidy==4)
-		
-		qt_4n = ceiling(apply(qt[,index,drop=FALSE], 1, mean)/2)*2
-		qt_4n[qt_4n==0 & apply(qt[,index,drop=FALSE], 1, mean)!=0] = 1
-		qt_2n = round(qt_4n/2)
-		qt_2n[qt_2n==0 & apply(qt[,index,drop=FALSE], 1, mean)!=0] = 1
-		qt = cbind(qt, diploid_ancestor=qt_2n, tetraploid_ancestor=qt_4n)
-		
-		q2_4n = ceiling(apply(q2[,index,drop=FALSE], 1, mean)/2)*2
-		q2_4n[q2_4n==0 & apply(q2[,index,drop=FALSE], 1, mean)!=0] = 1
-		q2_2n = round(q2_4n/2)
-		q2_2n[q2_2n==0 & apply(q2[,index,drop=FALSE], 1, mean)!=0] = 1	
-		q2 = cbind(q2, diploid_ancestor=q2_2n, tetraploid_ancestor=q2_4n)
-		
-	}
-	
-	q1 = qt-q2
-	index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) })
-	q2 = q2[index,,drop=FALSE]
-	q1 = q1[index,,drop=FALSE]
-	tmp = tmp[index,,drop=FALSE]
-	
-	q2[q2>4] = 4
-	q1[q1>4] = 4
-	
-	if (ncol(q2)<3) {
-		q1x = q1
-		colnames(q1x) = paste0(colnames(q1), "_pad00")
-		q1 = cbind(q1, q1x)
-		q2x = q2
-		colnames(q2x) = paste0(colnames(q2), "_pad00")
-		q2 = cbind(q2, q2x)
-	}
-	
-	desc = cbind(paste0("chrom", unique(tmp[,"Chromosome"])),
-				 paste0("major_chr", unique(tmp[,"Chromosome"]), ".fasta"),
-				 paste0("minor_chr", unique(tmp[,"Chromosome"]), ".fasta"))
-	write.table(desc, file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE)
-	for (i in unique(tmp[,"Chromosome"])) {
-		cat(">diploid\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q2)) {
-			cat(paste0(">", gsub("-", "_", colnames(q2)[j]), "\n"), file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q2[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE)
-		}
-	
-	
-		cat(">diploid\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=FALSE)
-		cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-		for (j in 1:ncol(q1)) {
-			cat(paste0(">", gsub("-", "_", colnames(q1)[j]), "\n"), file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-			cat(paste0(q1[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE)
-		}
-	}
-}
diff --git a/test/phylogeny/plotmedicc.R b/test/phylogeny/plotmedicc.R
deleted file mode 100755
index 417f4b98..00000000
--- a/test/phylogeny/plotmedicc.R
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("ape"))
-suppressPackageStartupMessages(library("foreach"))
-suppressPackageStartupMessages(library("parallel"))
-suppressPackageStartupMessages(library("doMC"))
-suppressPackageStartupMessages(library("stringr"))
-suppressPackageStartupMessages(library("phytools"))
-
-registerDoMC(12)
-
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set"),
-					make_option("--type", default = NA, type = 'character', help = "allele specific or total copy")
-				  )
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-if (opt$type=="allele_specific") {
-
-	phylo_tree = read.tree(file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/tree_final.new"))
-	tip_labels = phylo_tree$tip.label
-	index = grep("pad00", tip_labels)
-	if (length(index)!=0) {
-		phylo_tree = drop.tip(phy=phylo_tree, tip=tip_labels[index], trim.internal=TRUE, rooted=FALSE)
-	}
-	phylo_tree = root(phylo_tree, outgroup="diploid")
-
-	pdf(file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/tree_final.pdf"), height=7, width=7)
-	plotTree(tree=phylo_tree, color="#8CC63F", lwd=3, offset=1)
-	edgelabels(text=paste0(phylo_tree$edge.length, " "), cex=.75)
-	dev.off()
-	
-} else if (opt$type=="total_copy") {
-	
-	phylo_tree = read.tree(file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/tree_final.new"))
-	tip_labels = phylo_tree$tip.label
-	index = grep("pad00", tip_labels)
-	if (length(index)!=0) {
-		phylo_tree = drop.tip(phy=phylo_tree, tip=tip_labels[index], trim.internal=TRUE, rooted=FALSE)
-	}
-	phylo_tree = root(phylo_tree, outgroup="diploid")
-
-	pdf(file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/tree_final.pdf"), height=7, width=7)
-	plotTree(tree=phylo_tree, color="#8CC63F", lwd=3, offset=1)
-	edgelabels(text=paste0(phylo_tree$edge.length, " "), cex=.75)
-	dev.off()
-	
-}
diff --git a/test/phylogeny/plotratchet.R b/test/phylogeny/plotratchet.R
deleted file mode 100755
index ac34dc2c..00000000
--- a/test/phylogeny/plotratchet.R
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("ape"))
-suppressPackageStartupMessages(library("foreach"))
-suppressPackageStartupMessages(library("parallel"))
-suppressPackageStartupMessages(library("doMC"))
-suppressPackageStartupMessages(library("stringr"))
-suppressPackageStartupMessages(library("phytools"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set")
-				  )
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-load(paste0("pratchet/", opt$sample_set, "/tree_final.RData"))
-
-pdf(file=paste0("pratchet/", opt$sample_set, "/tree_final.pdf"), height=7, width=7)
-plot.phylo(x=phy_tree_w_bl, edge.color="#8CC63F", edge.width=3, label.offset=1)
-nodelabels(node=1:phy_tree_w_bl$Nnode+Ntip(phy_tree_w_bl),
-		   pie = cbind(as.numeric(phy_tree_w_bl$node.label),100-as.numeric(phy_tree_w_bl$node.label)),
-		   piecol = c("goldenrod3","grey85"),
-		   cex = 1)
-edgelabels(text=paste0(phy_tree_w_bl$edge.length, " "), cex=.75)
-dev.off()
diff --git a/test/phylogeny/pratchet.R b/test/phylogeny/pratchet.R
deleted file mode 100755
index 46a6af76..00000000
--- a/test/phylogeny/pratchet.R
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("ape"))
-suppressPackageStartupMessages(library("phangorn"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-suppressPackageStartupMessages(library("magrittr"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set"),
-					make_option("--normal_samples", default = NA, type = 'character', help = "normal samples")
-				 )
-
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)))
-normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)))
-normal_samples = normal_samples[normal_samples %in% all_samples]
-tumor_samples = all_samples[!(all_samples %in% normal_samples)]
-
-mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv"), col_types = cols(.default = col_character()))  %>%
- 				   type_convert()
-
-mutation_binary = as.data.frame(mutation_summary[,paste0("CALL_", c(tumor_samples, normal_samples)),drop=FALSE])
-colnames(mutation_binary) = gsub("CALL_", "", colnames(mutation_binary))
-
-phy_data = as.phyDat(mutation_binary, type="USER", levels=c(0,1))
-phy_tree = pratchet(data=phy_data)
-phy_tree_w_bl = acctran(tree=phy_tree, data=phy_data)
-phy_tree_w_bl = root(phy_tree_w_bl, outgroup=normal_samples)
-
-'bootstrap_data' <- function(x, N=100)
-{
-	y = list()
-	for (i in 1:N) {
-		index = sample(1:nrow(x), size=nrow(x), replace=TRUE)
-		y[[i]] = x[index,,drop=FALSE]
-	}
-	return(y)
-}
-
-
-phy_tree_w_bl_boot = list()
-mutation_binary_boot = bootstrap_data(x=mutation_binary)
-for (i in 1:length(mutation_binary_boot)) {
-	phy_data = as.phyDat(mutation_binary_boot[[i]], type="USER", levels=c(0,1))
-	phy_tree = pratchet(data=phy_data)
-	phy_tree_w_bl_boot[[i]] = acctran(tree=phy_tree, data=phy_data)
-	phy_tree_w_bl_boot[[i]] = root(phy_tree_w_bl_boot[[i]], outgroup=normal_samples)
-}
-
-class(phy_tree_w_bl) = "phylo"
-class(phy_tree_w_bl_boot) = "multiPhylo"
-node_labels = prop.clades(phy_tree_w_bl, phy_tree_w_bl_boot, rooted=TRUE)
-phy_tree_w_bl$node.label = node_labels
-save(list=ls(all=TRUE), file=paste0("pratchet/", opt$sample_set, "/tree_final.RData"))
diff --git a/test/phylogeny/segmentsamples.R b/test/phylogeny/segmentsamples.R
deleted file mode 100755
index 253f6bd0..00000000
--- a/test/phylogeny/segmentsamples.R
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("copynumber"))
-suppressPackageStartupMessages(library("colorspace"))
-suppressPackageStartupMessages(library("ASCAT"))
-
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(
-					make_option("--sample_set", default = NA, type = 'character', help = "sample names set"),
-					make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"),
-					make_option("--gamma", default = NA, type = 'character', help = "segmentation parameter gamma"),
-					make_option("--nlog2", default = NA, type = 'character', help = "number of clusters in Log2 ratio"),
-					make_option("--nbaf", default = NA, type = 'character', help = "number of clusters in BAF"),
-					make_option("--type", default = NA, type = 'character', help = "allele specific or total copy")
-				 )
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)))
-normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)))
-normal_samples = normal_samples[normal_samples %in% all_samples]
-tumor_samples = all_samples[!(all_samples %in% normal_samples)]
-
-if (opt$type=="allele_specific") {
-
-	load(paste0("medicc/allele_specific/mad/", opt$sample_set, ".RData"))
-	gamma = ifelse(is.na(as.numeric(opt$gamma)), 50, as.numeric(opt$gamma))
-	nlog2 = ifelse(is.na(as.numeric(opt$nlog2)), 10, as.numeric(opt$nlog2))
-	nbaf = ifelse(is.na(as.numeric(opt$nbaf)), 15, as.numeric(opt$nbaf))
-	index = apply(Genotype, 1, function(x) {sum(x==1)==length(x)})
-	Log2Ratio = Log2Ratio[index,,drop=FALSE]
-	BAF = BAF[index,,drop=FALSE]
-	annotation = annotation[index,,drop=FALSE]
-	colnames(Log2Ratio) = paste0("Log2Ratio_", colnames(Log2Ratio))
-	colnames(BAF) = paste0("BAF_", colnames(BAF))
-	index = BAF>.5
-	BAF[index] = 1 - BAF[index]
-	CN_and_BAF = cbind(annotation, Log2Ratio, BAF)
-	tmp = NULL
-	for (i in 1:23) {
-		cn_and_baf = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"]==i)
-		x = try(multipcf(data=winsorize(data=cn_and_baf, method="mad", tau=2.5, k=15, verbose=FALSE), gamma=gamma, normalize=FALSE, fast=FALSE, verbose=FALSE), silent=TRUE)
-		if (!("try-error" %in% is(x))) {
-			colnames(x)[1:5] = c("Chromosome", "Arm", "Start", "End", "N")
-			tmp = rbind(tmp, x)
-		}
-	}
-	CN_and_BAF = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"] %in% tmp[,"Chromosome"])
-	qt = q2 = matrix(NA, nrow=nrow(tmp), ncol=length(tumor_samples))
-	colnames(qt) = colnames(q2) = tumor_samples
-	for (i in 1:length(tumor_samples)) {
-		ascat = new.env()
-		load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_samples, ".RData"), envir=ascat)
-
-		'prunesegments.cn' <- function(x, n=10)
-		{
-			cnm = matrix(NA, nrow=length(x), ncol=length(x))
-			for (j in 1:length(x)) {
-				cnm[,j] = abs(2^x[j] - 2^x)
-			}
-			cnt = hclust(as.dist(cnm), "average")
-			cnc = cutree(tree=cnt, k=n)
-			for (j in unique(cnc)) {
-				indx = which(cnc==j)
-				if (length(indx)>2) {
-					mcl = mean(x[indx])
-					scl = sd(x[indx])
-					ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl))
-					x[indx[ind]] = mean(x[indx[ind]])
-				} else {
-					x[indx] = mean(x[indx])
-				}
-			}
-			return(x)
-		}
-
-		'prunesegments.baf' <- function(x, n=10)
-		{
-			cnm = matrix(NA, nrow=length(x), ncol=length(x))
-			for (j in 1:length(x)) {
-				cnm[,j] = abs(2^x[j] - 2^x)
-			}
-			cnt = hclust(as.dist(cnm), "average")
-			cnc = cutree(tree=cnt, k=n)
-			for (j in unique(cnc)) {
-				indx = which(cnc==j)
-				if (length(indx)>2) {
-					mcl = mean(x[indx])
-					scl = sd(x[indx])
-					ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl))
-					x[indx[ind]] = mean(x[indx[ind]])
-				} else {
-					x[indx] = mean(x[indx])
-				}
-			}
-			return(x)
-		}
-		tmp[,paste0("Log2Ratio_", tumor_samples[i])] = prunesegments.cn(x=tmp[,paste0("Log2Ratio_", tumor_samples[i])], n=nlog2)
-		tmp[,paste0("BAF_", tumor_samples[i])] = prunesegments.baf(x=tmp[,paste0("BAF_", tumor_samples[i])], n=nbaf)
-	
-		Tumor_LogR = as.numeric(CN_and_BAF[,paste0("Log2Ratio_", tumor_samples[i])])
-		Tumor_BAF = as.numeric(CN_and_BAF[,paste0("BAF_", tumor_samples[i])])
-		Tumor_LogR_segmented = rep(tmp[,paste0("Log2Ratio_", tumor_samples[i])], times=tmp[,"N"])
-		Tumor_BAF_segmented = rep(tmp[,paste0("BAF_", tumor_samples[i])], times=tmp[,"N"])
-		SNPpos = CN_and_BAF[,c("Chromosome", "Position"), drop=FALSE]
-		names(Tumor_LogR) = names(Tumor_BAF) = names(Tumor_LogR_segmented) = names(Tumor_BAF_segmented) = rownames(SNPpos) = paste0("chr", CN_and_BAF[,"Chromosome"], ":", CN_and_BAF[,"Position"])
-		colnames(SNPpos) = c("chrs", "pos")
-		ch = list()
-		j = 1
-		for (j in 1:length(unique(CN_and_BAF[,"Chromosome"]))) {
-			index = which(CN_and_BAF[,"Chromosome"]==(unique(CN_and_BAF[,"Chromosome"]))[j])
-			ch[[j]] = index
-			j = j + 1
-		}
-		chr = ch
-		chrs = unique(CN_and_BAF[,"Chromosome"])
-		gender = "2323"
-		sexchromosomes = c(23, 24)
-		tmp2 = list(Tumor_LogR=Tumor_LogR,
-					Tumor_BAF=Tumor_BAF,
-					Tumor_LogR_segmented=Tumor_LogR_segmented,
-					Tumor_BAF_segmented=Tumor_BAF_segmented,
-					SNPpos=SNPpos,
-					chromosomes=ch,
-					chrnames=chrs,
-					gender=gender,
-					sexchromosomes=sexchromosomes)
-	
-		tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR,
-							baf=tmp2$Tumor_BAF,
-							lrrsegmented=tmp2$Tumor_LogR_segmented,
-							bafsegmented=tmp2$Tumor_BAF_segmented,
-							gender=tmp2$gender,
-							SNPpos=tmp2$SNPpos,
-							chromosomes=tmp2$chromosomes,
-							chrnames=tmp2$chrnames,
-							sexchromosomes=tmp2$sexchromosomes,
-							failedqualitycheck=FALSE,
-							distance = paste0("medicc/allele_specific/ascat/", tumor_samples[i], "_", normal_samples, ".pdf"),
-							copynumberprofile = NULL,
-							nonroundedprofile = NULL, 
-							aberrationreliability = NULL,
-							gamma = 1, rho_manual = ascat$tmp3$rho, psi_manual = ascat$tmp3$psi, y_limit = 3, circos = NA))
-						
-		if (!("try-error" %in% is(tmp3))) {
-			chr = SNPpos[tmp3$seg_raw[,1],1]
-			pos = SNPpos[tmp3$seg_raw[,1],2]
-			qt[tmp[,1] %in% chr & tmp[,3] %in% pos,tumor_samples[i]] = tmp3$seg_raw[,"nA"] + tmp3$seg_raw[,"nB"]
-			q2[tmp[,1] %in% chr & tmp[,3] %in% pos,tumor_samples[i]] = apply(tmp3$seg_raw[,c("nA", "nB"),drop=FALSE], 1, max, na.rm=TRUE)
-		}
-	}
-	save(list=ls(all=TRUE), file=paste0("medicc/allele_specific/aspcf/", opt$sample_set, ".RData"))
-	
-} else if (opt$type=="total_copy") {
-
-	load(paste0("medicc/total_copy/mad/", opt$sample_set, ".RData"))
-	gamma = ifelse(is.na(as.numeric(opt$gamma)), 150, as.numeric(opt$gamma))
-	nlog2 = ifelse(is.na(as.numeric(opt$nlog2)), 10, as.numeric(opt$nlog2))
-	colnames(Log2Ratio) = paste0("Log2Ratio_", colnames(Log2Ratio))
-	CN_and_BAF = cbind(annotation, Log2Ratio)
-	tmp = NULL
-	for (i in 1:23) {
-		cn_and_baf = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"]==i)
-		x = try(multipcf(data=winsorize(data=cn_and_baf, method="mad", tau=2.5, k=15, verbose=FALSE), gamma=gamma, normalize=FALSE, fast=FALSE, verbose=FALSE), silent=TRUE)
-		if (!("try-error" %in% is(x))) {
-			colnames(x)[1:5] = c("Chromosome", "Arm", "Start", "End", "N")
-			tmp = rbind(tmp, x)
-		}
-	}
-	CN_and_BAF = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"] %in% tmp[,"Chromosome"])
-	qt = q2 = matrix(NA, nrow=nrow(tmp), ncol=length(tumor_samples))
-	colnames(qt) = colnames(q2) = tumor_samples
-	for (i in 1:length(tumor_samples)) {
-		ascat = new.env()
-		load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_samples, ".RData"), envir=ascat)
-
-		'prunesegments.cn' <- function(x, n=10)
-		{
-			cnm = matrix(NA, nrow=length(x), ncol=length(x))
-			for (j in 1:length(x)) {
-				cnm[,j] = abs(2^x[j] - 2^x)
-			}
-			cnt = hclust(as.dist(cnm), "average")
-			cnc = cutree(tree=cnt, k=n)
-			for (j in unique(cnc)) {
-				indx = which(cnc==j)
-				if (length(indx)>2) {
-					mcl = mean(x[indx])
-					scl = sd(x[indx])
-					ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl))
-					x[indx[ind]] = mean(x[indx[ind]])
-				} else {
-					x[indx] = mean(x[indx])
-				}
-			}
-			return(x)
-		}
-		
-		'absolute.cn' <- function(rho, psi, gamma=1, x)
-		{
-			rho = ifelse(is.na(rho), 1, rho)
-			psi = ifelse(is.na(psi), 2, psi)
-			return(invisible(((((2^(x/gamma))*(rho*psi+(1-rho)*2)) - ((1-rho)*2))/rho)))
-		}
-
-		tmp[,paste0("Log2Ratio_", tumor_samples[i])] = prunesegments.cn(x=tmp[,paste0("Log2Ratio_", tumor_samples[i])], n=nlog2)
-		purity = ifelse(is.na(ascat$tmp3$rho), 1, ascat$tmp3$rho)
-		ploidy = ifelse(is.na(ascat$tmp3$psi), 1, ascat$tmp3$psi)
-		qt[,tumor_samples[i]] = ifelse(round(absolute.cn(rho=purity, psi=ploidy, x=tmp[,paste0("Log2Ratio_", tumor_samples[i])]))<0, 0, round(absolute.cn(rho=purity, psi=ploidy, x=tmp[,paste0("Log2Ratio_", tumor_samples[i])])))
-		q2[,tumor_samples[i]] = ceiling(qt[,tumor_samples[i]]/2)
-	}
-	save(list=ls(all=TRUE), file=paste0("medicc/total_copy/mpcf/", opt$sample_set, ".RData"))
-}
diff --git a/test/workflows/cnvkit.mk b/test/workflows/cnvkit.mk
deleted file mode 100644
index 042067fb..00000000
--- a/test/workflows/cnvkit.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/cnvkit.$(NOW)
-PHONY += cnvkit cnvkit/cnn cnvkit/cnn/tumor cnvkit/cnn/normal cnvkit/reference cnvkit/cnr cnvkit/log2 cnvkit/segmented cnvkit/called cnvkit/summary
-
-CNV_KIT_WORKFLOW += cnvkit_coverage
-CNV_KIT_WORKFLOW += cnvkit_reference
-CNV_KIT_WORKFLOW += cnvkit_fix
-CNV_KIT_WORKFLOW += cnvkit_plot
-CNV_KIT_WORKFLOW += cnvkit_segment
-CNV_KIT_WORKFLOW += cnvkit_summary
-
-cnv_kit_workflow : $(CNV_KIT_WORKFLOW)
-
-include modules/copy_number/cnvkitcoverage.mk
-include modules/copy_number/cnvkitreference.mk
-include modules/copy_number/cnvkitfix.mk
-include modules/copy_number/cnvkitplot.mk
-include modules/copy_number/cnvkitsegment.mk
-include modules/copy_number/cnvkitsummary.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/copynumber_summary.mk b/test/workflows/copynumber_summary.mk
deleted file mode 100644
index ff3ed56b..00000000
--- a/test/workflows/copynumber_summary.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-include modules/Makefile.inc
-include modules/config.inc
-
-LOGDIR = log/copynumber_summary.$(NOW)
-PHONY += genome_stats summary summary/tsv
-
-CN_SUMMARY_WORKFLOW += genome_altered
-CN_SUMMARY_WORKFLOW += lst_score
-CN_SUMMARY_WORKFLOW += ntai_score
-CN_SUMMARY_WORKFLOW += myriad_score
-CN_SUMMARY_WORKFLOW += genome_summary
-
-cn_summary_workflow : $(CN_SUMMARY_WORKFLOW)
-
-include modules/copy_number/genomealtered.mk
-include modules/copy_number/lstscore.mk
-include modules/copy_number/ntaiscore.mk
-include modules/copy_number/myriadhrdscore.mk
-include modules/summary/genomesummary.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/cravat_annotation.mk b/test/workflows/cravat_annotation.mk
deleted file mode 100644
index d83dfcf4..00000000
--- a/test/workflows/cravat_annotation.mk
+++ /dev/null
@@ -1,21 +0,0 @@
-include modules/Makefile.inc
-include modules/config.inc
-
-LOGDIR = log/cravat_annotation.$(NOW)
-PHONY += gatk cravat summary summary/tsv
-
-ANNOTATION_WORKFLOW += gatk_vcfs
-ANNOTATION_WORKFLOW += cravat_annotate
-ANNOTATION_WORKFLOW += cravat_summary
-
-cravat_annotation_workflow : $(ANNOTATION_WORKFLOW)
-
-include modules/variant_callers/gatk.mk
-include modules/vcf_tools/cravat_annotation.mk
-include modules/summary/cravat_summary.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
-
-
diff --git a/test/workflows/fetchimpact.mk b/test/workflows/fetchimpact.mk
deleted file mode 100644
index 4e03d3e1..00000000
--- a/test/workflows/fetchimpact.mk
+++ /dev/null
@@ -1,16 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/fetchimpact.$(NOW)
-PHONY += unprocessed_bam
-
-fetch_impact : $(foreach sample,$(SAMPLES),unprocessed_bam/$(sample).bam)
-
-define fetch-impact
-unprocessed_bam/%.bam :
-	$$(call RUN,-c -s 4G -m 12G,"scp luna.mskcc.org:/ifs/dmpshare/share/irb12_245/$$(*).bam unprocessed_bam/$$(*).bam")
-	
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call fetch-impact,$(sample))))
-
-.PHONY : $(PHONY)
diff --git a/test/workflows/medicc.mk b/test/workflows/medicc.mk
deleted file mode 100644
index f2c4ad37..00000000
--- a/test/workflows/medicc.mk
+++ /dev/null
@@ -1,79 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/medicc.$(NOW)
-
-ALLELE_SPECIFIC_COPY ?= false
-
-ifeq ($(ALLELE_SPECIFIC_COPY),true)
-
-PHONY += medicc medicc/allele_specific medicc/allele_specific/mad medicc/allele_specific/ascat medicc/allele_specific/aspcf medicc/allele_specific/medicc
-
-medicc : $(foreach set,$(SAMPLE_SETS),medicc/allele_specific/medicc/$(set)/tree_final.new) $(foreach set,$(SAMPLE_SETS),medicc/allele_specific/medicc/$(set)/tree_final.pdf)
-
-define allele-specific-medicc
-medicc/allele_specific/mad/%.RData : $(wildcard $(foreach pair,$(SAMPLE_PAIRS),facets/cncf/$(pair).Rdata))
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific && \
-												 mkdir -p medicc/allele_specific/mad && \
-												 $(RSCRIPT) modules/test/phylogeny/combinesamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --type allele_specific")
-
-medicc/allele_specific/aspcf/%.RData : medicc/allele_specific/mad/%.RData
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific/ascat && \
-												 mkdir -p medicc/allele_specific/aspcf && \
-												 $(RSCRIPT) modules/test/phylogeny/segmentsamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --gamma '$${mpcf_gamma}' --nlog2 '$${mpcf_nlog2}' --nbaf '$${mpcf_nbaf}'  --type allele_specific")
-
-medicc/allele_specific/medicc/%/desc.txt : medicc/allele_specific/aspcf/%.RData
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific/medicc && \
-												 mkdir -p medicc/allele_specific/medicc/$$* && \
-												 $(RSCRIPT) modules/test/phylogeny/initmedicc.R --sample_set $$* --type allele_specific")
-
-medicc/allele_specific/medicc/%/tree_final.new : medicc/allele_specific/medicc/%/desc.txt
-	$$(call RUN,-c -s 8G -m 12G -v $(MEDICC_ENV),"source $(MEDICC_VAR) && \
-												  $(MEDICC_BIN)/medicc.py medicc/allele_specific/medicc/$$*/desc.txt medicc/allele_specific/medicc/$$* -v")
-												  
-medicc/allele_specific/medicc/%/tree_final.pdf : medicc/allele_specific/medicc/%/tree_final.new
-	$$(call RUN,-c -n 12 -s 1G -m 2G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotmedicc.R --sample_set $$(*) --type allele_specific")
-
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call allele-specific-medicc,$(set))))
-		
-else
-
-PHONY += medicc medicc/total_copy medicc/total_copy/mad medicc/total_copy/mpcf medicc/total_copy/medicc
-
-medicc : $(foreach set,$(SAMPLE_SETS),medicc/total_copy/medicc/$(set)/tree_final.new) $(foreach set,$(SAMPLE_SETS),medicc/total_copy/medicc/$(set)/tree_final.pdf)
-
-define total-copy-medicc
-medicc/total_copy/mad/%.RData : $(wildcard $(foreach pair,$(SAMPLE_PAIRS),facets/cncf/$(pair).Rdata))
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy && \
-												 mkdir -p medicc/total_copy/mad && \
-												 $(RSCRIPT) modules/test/phylogeny/combinesamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --type total_copy")
-												 
-medicc/total_copy/mpcf/%.RData : medicc/total_copy/mad/%.RData
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy/mpcf && \
-												 $(RSCRIPT) modules/test/phylogeny/segmentsamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --gamma '$${mpcf_gamma}' --nlog2 '$${mpcf_nlog2}' --type total_copy")
-												 
-medicc/total_copy/medicc/%/desc.txt : medicc/total_copy/mpcf/%.RData
-	$$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy/medicc && \
-												 mkdir -p medicc/total_copy/medicc/$$* && \
-												 $(RSCRIPT) modules/test/phylogeny/initmedicc.R --sample_set $$* --type total_copy")
-												 
-medicc/total_copy/medicc/%/tree_final.new : medicc/total_copy/medicc/%/desc.txt
-	$$(call RUN,-c -s 8G -m 12G -v $(MEDICC_ENV),"source $(MEDICC_VAR) && \
-												  $(MEDICC_BIN)/medicc.py medicc/total_copy/medicc/$$*/desc.txt medicc/total_copy/medicc/$$* -t -v && \
-												  cp medicc/total_copy/medicc/$$*/tree_fitch_nc.xml medicc/total_copy/medicc/$$*/tree_final.xml && \
-												  cp medicc/total_copy/medicc/$$*/tree_fitch_nc.graph medicc/total_copy/medicc/$$*/tree_final.graph && \
-												  cp medicc/total_copy/medicc/$$*/tree_fitch_nc.new medicc/total_copy/medicc/$$*/tree_final.new")
-												  
-medicc/total_copy/medicc/%/tree_final.pdf : medicc/total_copy/medicc/%/tree_final.new
-	$$(call RUN,-c -n 12 -s 1G -m 2G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotmedicc.R --sample_set $$(*) --type total_copy")
-
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call total-copy-medicc,$(set))))
-
-endif
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/mspyclone.mk b/test/workflows/mspyclone.mk
deleted file mode 100644
index df313959..00000000
--- a/test/workflows/mspyclone.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/ms_pyclone.$(NOW)
-PHONY += pyclone sufam summary pyclone
-
-PYCLONE_WORKFLOW += sufam_multisample
-PYCLONE_WORKFLOW += setup_pyclone
-PYCLONE_WORKFLOW += run_pyclone
-PYCLONE_WORKFLOW += plot_pyclone
-
-pyclone_workflow : $(PYCLONE_WORKFLOW)
-
-include modules/variant_callers/sufammultisample.mk
-include modules/clonality/setuppyclone.mk
-include modules/clonality/runpyclone.mk
-include modules/clonality/plotpyclone.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/pratchet.mk b/test/workflows/pratchet.mk
deleted file mode 100644
index 5b56f83c..00000000
--- a/test/workflows/pratchet.mk
+++ /dev/null
@@ -1,24 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/pratchet.$(NOW)
-PHONY += pratchet
-
-pratchet : $(foreach set,$(SAMPLE_SETS),pratchet/$(set)/tree_final.RData) $(foreach set,$(SAMPLE_SETS),pratchet/$(set)/tree_final.pdf)
-
-define parsimony-ratchet
-pratchet/%/tree_final.RData : sufam/%.tsv
-	$$(call RUN,-c -s 8G -m 12G -v $(PHANGORN_ENV),"mkdir -p pratchet && \
-								 					mkdir -p pratchet/$$* && \
-								 					$(RSCRIPT) modules/test/phylogeny/pratchet.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)'")
-
-pratchet/%/tree_final.pdf : pratchet/%/tree_final.RData
-	$$(call RUN,-c -s 4G -m 6G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotratchet.R --sample_set $$(*)")
-
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call parsimony-ratchet,$(set))))
-		
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/qdnaseq.mk b/test/workflows/qdnaseq.mk
deleted file mode 100644
index 81a86893..00000000
--- a/test/workflows/qdnaseq.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-include modules/Makefile.inc
-include modules/genome_inc/b37.inc
-
-LOGDIR ?= log/qdnaseq.$(NOW)
-PHONY += qdnaseq qdnaseq/copynumber qdnaseq/copynumber/log2ratio qdnaseq/copynumber/segmented qdnaseq/copynumber/pcf
-
-QDNA_SEQ_WORKFLOW += qdnaseq_extract
-QDNA_SEQ_WORKFLOW += qdnaseq_copynumber
-
-qdna_seq_workflow : $(QDNA_SEQ_WORKFLOW)
-
-include modules/test/copy_number/qdnaseqextract.mk
-include modules/test/copy_number/qdnaseqcopynumber.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/sspyclone.mk b/test/workflows/sspyclone.mk
deleted file mode 100644
index c0ac7432..00000000
--- a/test/workflows/sspyclone.mk
+++ /dev/null
@@ -1,37 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/ss_pyclone.$(NOW)
-PHONY += pyclone
-
-pyclone : $(foreach pair,$(SAMPLE_PAIRS),pyclone/$(pair)/report/summary.tsv)
-
-MAX_CLUSTER ?= 5
-
-define make-pyclone
-pyclone/$1_$2/config.yaml : summary/tsv/mutation_summary.tsv
-	$$(call RUN, -s 16G -m 24G,"mkdir -p pyclone/$1_$2 && \
-								mkdir -p pyclone/$1_$2/report && \
-							    $(RSCRIPT) modules/test/clonality/tsvtopyclone.R --sample_name $1_$2")
-							    
-pyclone/$1_$2/trace/alpha.tsv.bz2 : pyclone/$1_$2/config.yaml
-	$$(call RUN,-s 16G -m 24G -w 7200,"mkdir -p pyclone/$1_$2 && \
-									   mkdir -p pyclone/$1_$2/trace && \
-									   source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \
-							 		   PyClone run_analysis --config_file pyclone/$1_$2/config.yaml --seed 0")
-							 		  
-pyclone/$1_$2/report/pyclone.tsv : pyclone/$1_$2/trace/alpha.tsv.bz2
-	$$(call RUN,-s 16G -m 24G -w 7200,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \
-							 		   PyClone build_table --config_file pyclone/$1_$2/config.yaml --out_file pyclone/$1_$2/report/pyclone.tsv --max_cluster $(MAX_CLUSTER) --table_type old_style --burnin 50000")
-							 		   
-pyclone/$1_$2/report/summary.tsv : pyclone/$1_$2/report/pyclone.tsv
-	$$(call RUN, -s 24G -m 48G,"mkdir -p pyclone/$1_$2 && \
-								mkdir -p pyclone/$1_$2/report && \
-							    $(RSCRIPT) modules/test/clonality/reportpyclone.R --sample_name $1_$2")
-							    
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call make-pyclone,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/test/workflows/viral_detection.mk b/test/workflows/viral_detection.mk
deleted file mode 100644
index 89f34fdf..00000000
--- a/test/workflows/viral_detection.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-include modules/Makefile.inc
-include modules/config.inc
-
-LOGDIR = log/viral_detection.$(NOW)
-PHONY += unmapped_reads
-
-VIRUS_WORKFLOW += extract_unmapped
-VIRUS_WORKFLOW += bam_to_fasta
-VIRUS_WORKFLOW += blast_reads
-VIRUS_WORKFLOW += krona_classify
-
-viral_detection_workflow : $(VIRUS_WORKFLOW)
-
-include modules/fastq_tools/extractReads.mk
-include modules/fastq_tools/bamtoFasta.mk
-include modules/fastq_tools/blastReads.mk
-include modules/virus/krona_classify.mk
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
-
-
diff --git a/variant_callers/combinesamples.R b/variant_callers/combinesamples.R
deleted file mode 100644
index 5c54d37a..00000000
--- a/variant_callers/combinesamples.R
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-
-all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE)
-tmp_vars = all_vars[all_vars$TUMOR_SAMPLE %in% sample_names,,drop=FALSE]
-keys = paste0(tmp_vars$CHROM, ":", tmp_vars$POS, ":", tmp_vars$REF, ":", tmp_vars$ALT)
-ukeys = unique(keys)
-vars = NULL
-for (i in 1:length(ukeys)) {
-	index = which(keys==ukeys[i])
- 	Chromosome = tmp_vars[index[1],"CHROM"]
-	Position = tmp_vars[index[1],"POS"]
-	Ref = tmp_vars[index[1],"REF"]
-	Alt = tmp_vars[index[1],"ALT"]
-	Variant_Caller = tmp_vars[index[1],"variantCaller"]
-	Gene_Symbol = tmp_vars[index[1],"SYMBOL"]
-	Variant_Classification = tmp_vars[index[1],"Variant_Classification"]
-	HGVSp_Short = tmp_vars[index[1],"HGVSp_Short"]
-	Fuentes = tmp_vars[index[1],"fuentes"]
-	dgd = tmp_vars[index[1],"dgd"]
-	OncoKB_Level = tmp_vars[index[1],"oncoKB_level"]
-	OncoKB_Cancer_Type = tmp_vars[index[1],"oncoKB_cancer_type"]
-	Cancer_Gene_Census = tmp_vars[index[1],"cancer_gene_census"]
-	Kandoth = tmp_vars[index[1],"kandoth"]
-	Lawrence = tmp_vars[index[1],"lawrence"]
-	Hap_Insuf = tmp_vars[index[1],"hap_insuf"]
-	ExAC_AF = tmp_vars[index[1],"ExAC_AF"]
-	MutationTaster = tmp_vars[index[1],"MutationTaster_pred"]
-	PROVEAN = tmp_vars[index[1],"PROVEAN_pred"]
-	FATHMM = tmp_vars[index[1],"FATHMM_pred"]
-	BRCA_Chasm = tmp_vars[index[1],"BRCA_chasm_pred"]
-	Parssnp = tmp_vars[index[1],"parssnp_pred"]
-	Pathogenicity = tmp_vars[index[1],"pathogenicity"]
-	HOTSPOT = tmp_vars[index[1],"HOTSPOT"]
-	HOTSPOT_INTERNAL = tmp_vars[index[1],"HOTSPOT_INTERNAL"]
-	CMO_HOTSPOT = tmp_vars[index[1],"cmo_hotspot"]
-	vars = rbind(vars, c("Chromosome"=Chromosome,
-					   	 "Position"=Position,
-					     "Ref"=Ref,
-					     "Alt"=Alt,
-					     "Variant_Caller"=Variant_Caller,
-					     "Gene_Symbol"=Gene_Symbol,
-					     "Variant_Classification"=Variant_Classification,
-					     "HGVSp"=HGVSp_Short,
-					     "Fuentes"=Fuentes,
-					     "dgd"=dgd,
-					     "OncoKB_Level"=OncoKB_Level,
-					     "OncoKB_Cancer_Type"=OncoKB_Cancer_Type,
-					     "Cancer_Gene_Census"=Cancer_Gene_Census,
-					     "Kandoth"=Kandoth,
-					     "Lawrence"=Lawrence,
-					     "Hap_Insuf"=Hap_Insuf,
-					     "ExAC"=ExAC_AF,
-					     "MutationTaster"=MutationTaster,
-					     "PROVEAN"=PROVEAN,
-					     "FATHMM"=FATHMM,
-					     "BRCA_Chasm"=BRCA_Chasm,
-					     "Parssnp"=Parssnp,
-					     "Pathogenicity"=Pathogenicity,
-					     "HOTSPOT"=HOTSPOT,
-					     "HOTSPOT_INTERNAL"=HOTSPOT_INTERNAL,
-					     "HOTSPOT_CMO"=CMO_HOTSPOT))
-}
-
-normal_name = tmp_vars[1,"NORMAL_SAMPLE"]
-
-VAF = DEPTH = LOH = CALLS = matrix(NA, nrow=length(ukeys), ncol=length(sample_names), dimnames=list(ukeys, sample_names))
-for (j in 1:nrow(tmp_vars)) {
-	sample_name = tmp_vars[j,"TUMOR_SAMPLE"]
-	ukey = paste0(tmp_vars$CHROM[j], ":", tmp_vars$POS[j], ":", tmp_vars$REF[j], ":", tmp_vars$ALT[j])
-	VAF[ukey,sample_name] = tmp_vars[j,"TUMOR_MAF"]
-	VAF[ukey,normal_name] = tmp_vars[j,"NORMAL_MAF"]
-	DEPTH[ukey,sample_name] = tmp_vars[j,"TUMOR_DP"]
-	DEPTH[ukey,normal_name] = tmp_vars[j,"NORMAL_DP"]
-	LOH[ukey,sample_name] = tmp_vars[j,"facetsLOHCall"]
-	CALLS[ukey,sample_name] = 1
-}
-colnames(VAF) = paste0("MAF_", colnames(VAF))
-colnames(DEPTH) = paste0("DP_", colnames(DEPTH))
-colnames(LOH) = paste0("LOH_", colnames(LOH))
-colnames(CALLS) = paste0("CALL_", colnames(CALLS))
-CALLS[is.na(CALLS)] = 0
-vars = cbind(vars, VAF, DEPTH, LOH, CALLS)
-mutect = grepl("mutect", vars[,"Variant_Caller"])
-main_indels = grepl("varscan", vars[,"Variant_Caller"]) & grepl("strelka", vars[,"Variant_Caller"])
-other_indels = ((grepl("platypus", vars[,"Variant_Caller"]) & grepl("scalpel", vars[,"Variant_Caller"])) |
-			   (grepl("platypus", vars[,"Variant_Caller"]) & grepl("lancet", vars[,"Variant_Caller"]))) &
-			   (nchar(vars[,"Ref"])>3 | nchar(vars[,"Alt"])>3) &
-			   !grepl("In_Frame", vars[,"Variant_Classification"])
-index = mutect | main_indels | other_indels
-vars = vars[index,,drop=FALSE]
-index = vars[,"Variant_Classification"] %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", "Splice_Site")
-vars = vars[index,,drop=FALSE]
-
-write.table(vars, file=paste0("sufam/", opt$sample_set, ".txt"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
diff --git a/variant_callers/combinesamplesf.R b/variant_callers/combinesamplesf.R
deleted file mode 100644
index 0e90be09..00000000
--- a/variant_callers/combinesamplesf.R
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-
-all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE)
-tmp_vars = all_vars[all_vars$TUMOR_SAMPLE %in% sample_names,,drop=FALSE]
-keys = paste0(tmp_vars$CHROM, ":", tmp_vars$POS, ":", tmp_vars$REF, ":", tmp_vars$ALT)
-ukeys = unique(keys)
-vars = NULL
-for (i in 1:length(ukeys)) {
-	index = which(keys==ukeys[i])
- 	Chromosome = tmp_vars[index[1],"CHROM"]
-	Position = tmp_vars[index[1],"POS"]
-	Ref = tmp_vars[index[1],"REF"]
-	Alt = tmp_vars[index[1],"ALT"]
-	Variant_Caller = tmp_vars[index[1],"variantCaller"]
-	Gene_Symbol = tmp_vars[index[1],"SYMBOL"]
-	Variant_Classification = tmp_vars[index[1],"Variant_Classification"]
-	HGVSp_Short = tmp_vars[index[1],"HGVSp_Short"]
-	Fuentes = tmp_vars[index[1],"fuentes"]
-	dgd = tmp_vars[index[1],"dgd"]
-	OncoKB_Level = tmp_vars[index[1],"oncoKB_level"]
-	OncoKB_Cancer_Type = tmp_vars[index[1],"oncoKB_cancer_type"]
-	Cancer_Gene_Census = tmp_vars[index[1],"cancer_gene_census"]
-	Kandoth = tmp_vars[index[1],"kandoth"]
-	Lawrence = tmp_vars[index[1],"lawrence"]
-	Hap_Insuf = tmp_vars[index[1],"hap_insuf"]
-	ExAC_AF = tmp_vars[index[1],"ExAC_AF"]
-	MutationTaster = tmp_vars[index[1],"MutationTaster_pred"]
-	PROVEAN = tmp_vars[index[1],"PROVEAN_pred"]
-	FATHMM = tmp_vars[index[1],"FATHMM_pred"]
-	BRCA_Chasm = tmp_vars[index[1],"BRCA_chasm_pred"]
-	Parssnp = tmp_vars[index[1],"parssnp_pred"]
-	Pathogenicity = tmp_vars[index[1],"pathogenicity"]
-	HOTSPOT = tmp_vars[index[1],"HOTSPOT"]
-	HOTSPOT_INTERNAL = tmp_vars[index[1],"HOTSPOT_INTERNAL"]
-	CMO_HOTSPOT = tmp_vars[index[1],"cmo_hotspot"]
-	vars = rbind(vars, c("Chromosome"=Chromosome,
-					   	 "Position"=Position,
-					     "Ref"=Ref,
-					     "Alt"=Alt,
-					     "Variant_Caller"=Variant_Caller,
-					     "Gene_Symbol"=Gene_Symbol,
-					     "Variant_Classification"=Variant_Classification,
-					     "HGVSp"=HGVSp_Short,
-					     "Fuentes"=Fuentes,
-					     "dgd"=dgd,
-					     "OncoKB_Level"=OncoKB_Level,
-					     "OncoKB_Cancer_Type"=OncoKB_Cancer_Type,
-					     "Cancer_Gene_Census"=Cancer_Gene_Census,
-					     "Kandoth"=Kandoth,
-					     "Lawrence"=Lawrence,
-					     "Hap_Insuf"=Hap_Insuf,
-					     "ExAC"=ExAC_AF,
-					     "MutationTaster"=MutationTaster,
-					     "PROVEAN"=PROVEAN,
-					     "FATHMM"=FATHMM,
-					     "BRCA_Chasm"=BRCA_Chasm,
-					     "Parssnp"=Parssnp,
-					     "Pathogenicity"=Pathogenicity,
-					     "HOTSPOT"=HOTSPOT,
-					     "HOTSPOT_INTERNAL"=HOTSPOT_INTERNAL,
-					     "HOTSPOT_CMO"=CMO_HOTSPOT))
-}
-
-normal_name = tmp_vars[1,"NORMAL_SAMPLE"]
-
-VAF = DEPTH = LOH = CALLS = matrix(NA, nrow=length(ukeys), ncol=length(sample_names), dimnames=list(ukeys, sample_names))
-for (j in 1:nrow(tmp_vars)) {
-	sample_name = tmp_vars[j,"TUMOR_SAMPLE"]
-	ukey = paste0(tmp_vars$CHROM[j], ":", tmp_vars$POS[j], ":", tmp_vars$REF[j], ":", tmp_vars$ALT[j])
-	VAF[ukey,sample_name] = tmp_vars[j,"TUMOR_MAF"]
-	VAF[ukey,normal_name] = tmp_vars[j,"NORMAL_MAF"]
-	DEPTH[ukey,sample_name] = tmp_vars[j,"TUMOR_DP"]
-	DEPTH[ukey,normal_name] = tmp_vars[j,"NORMAL_DP"]
-	LOH[ukey,sample_name] = tmp_vars[j,"facetsLOHCall"]
-	CALLS[ukey,sample_name] = 1
-}
-colnames(VAF) = paste0("MAF_", colnames(VAF))
-colnames(DEPTH) = paste0("DP_", colnames(DEPTH))
-colnames(LOH) = paste0("LOH_", colnames(LOH))
-colnames(CALLS) = paste0("CALL_", colnames(CALLS))
-CALLS[is.na(CALLS)] = 0
-vars = cbind(vars, VAF, DEPTH, LOH, CALLS)
-mutect = grepl("mutect", vars[,"Variant_Caller"])
-main_indels = grepl("varscan", vars[,"Variant_Caller"]) & grepl("strelka", vars[,"Variant_Caller"])
-other_indels = ((grepl("platypus", vars[,"Variant_Caller"]) & grepl("scalpel", vars[,"Variant_Caller"])) |
-			   (grepl("platypus", vars[,"Variant_Caller"]) & grepl("lancet", vars[,"Variant_Caller"]))) &
-			   (nchar(vars[,"Ref"])>3 | nchar(vars[,"Alt"])>3) &
-			   !grepl("In_Frame", vars[,"Variant_Classification"])
-index = mutect | main_indels | other_indels
-vars = vars[index,,drop=FALSE]
-index = vars[,"Variant_Classification"] %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", "Splice_Site")
-vars = vars[index,,drop=FALSE]
-
-blacklist = read.csv(file="summary/tsv/mouse_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE)
-indx = grep("AD", colnames(blacklist))
-index = matrix(0, nrow=nrow(blacklist), ncol=length(indx))
-for (i in 1:length(indx)) {
-	index[blacklist[,indx[i]]!=0,i] = 1
-}
-index = apply(index, 1, sum)>0
-all_id = paste0(vars[,"Chromosome"], ":", vars[,"Position"], "_", vars[,"Ref"], ">", vars[,"Alt"])
-blacklist_id = paste0(blacklist[index,"Chromosome"], ":", blacklist[index,"Position"], "_", blacklist[,"Reference_Allele"], ">", blacklist[,"Alternate_Allele"])
-keep_id = which(!(all_id %in% blacklist_id))
-vars = vars[keep_id,,drop=FALSE]
-
-write.table(vars, file=paste0("sufam/", opt$sample_set, ".txt"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
diff --git a/variant_callers/genotypehotspots.mk b/variant_callers/genotypehotspots.mk
deleted file mode 100644
index 6d425f0e..00000000
--- a/variant_callers/genotypehotspots.mk
+++ /dev/null
@@ -1,22 +0,0 @@
-include modules/Makefile.inc
-
-SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
-SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000'
-
-LOGDIR ?= log/genotype_hotspots.$(NOW)
-PHONY += hotspot
-
-genotype_hotspots : $(foreach sample,$(SAMPLES),hotspot/$(sample).txt)
-
-define genotype-hotspots
-hotspot/%.txt : bam/%.bam
-	$$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) modules/reference/hotspots/hotspot-dedup.vcf bam/$$(*).bam > hotspot/$$(*).txt")
-	
-endef
- $(foreach sample,$(SAMPLES),\
-		$(eval $(call genotype-hotspots,$(sample))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
-
diff --git a/variant_callers/genotypepdx.R b/variant_callers/genotypepdx.R
deleted file mode 100644
index 2c84bf14..00000000
--- a/variant_callers/genotypepdx.R
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env Rscript
-
-all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE)
-CHROM = all_vars[,"CHROM"]
-POS = all_vars[,"POS"]
-ID = all_vars[,"ID"]
-REF = all_vars[,"REF"]
-ALT = all_vars[,"ALT"]
-QUAL = FILTER = rep(".", nrow(all_vars))
-INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"])
-vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO)
-
-cat("#", file="sufam/pdx.vcf", append=FALSE)
-write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE)
diff --git a/variant_callers/genotypepdx.mk b/variant_callers/genotypepdx.mk
deleted file mode 100644
index c599b82d..00000000
--- a/variant_callers/genotypepdx.mk
+++ /dev/null
@@ -1,29 +0,0 @@
-include modules/Makefile.inc
-
-SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
-SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000'
-MOUSE_SAMPLES = $(sample_category.mouse)
-
-LOGDIR ?= log/genotype_pdx.$(NOW)
-PHONY += sufam summary
-
-genotype_pdx : $(foreach sample,$(sample_category.mouse),sufam/$(sample).txt) sufam/pdx.vcf summary/mouse_summary.xlsx
-
-sufam/pdx.vcf : summary/tsv/mutation_summary.tsv
-	$(call RUN, -c -s 8G -m 16G,"$(RSCRIPT) modules/variant_callers/genotypepdx.R")
-
-define genotype-pdx
-sufam/%.txt : bam/%.bam sufam/pdx.vcf
-	$$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) sufam/pdx.vcf bam/$$(*).bam > sufam/$$(*).txt")
-	
-endef
- $(foreach sample,$(sample_category.mouse),\
-		$(eval $(call genotype-pdx,$(sample))))
-		
-summary/mouse_summary.xlsx : $(wildcard $(foreach sample,$(sample_category.mouse),sufam/$(sample).txt))
-	$(call RUN,-n 1 -s 4G -m 4G,"$(RSCRIPT) modules/summary/mousesummary.R --sample_names '$(MOUSE_SAMPLES)' --out_file summary/tsv/mouse_summary.tsv && \
-								 python modules/summary/mouse_summary_excel.py")
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/variant_callers/get_basecounts.mk b/variant_callers/get_basecounts.mk
new file mode 100644
index 00000000..331d9aa2
--- /dev/null
+++ b/variant_callers/get_basecounts.mk
@@ -0,0 +1,47 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/get_basecount.$(NOW)
+
+MAPQ := 0
+BAQ := 0
+COV := 0
+
+getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) \
+	       gbc/summary.txt
+
+define get-basecount
+gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf
+	$$(call RUN,-n 6 -s 3G -m 6G,"set -o pipefail && \
+				      $(GBC) --fasta $(REF_FASTA) \
+				      --bam $$(<) \
+				      --vcf $$(<<) \
+				      --output $$(@) \
+				      --thread 6 \
+				      --sort_output \
+				      --compress_output \
+				      --maq $(MAPQ) \
+				      --baq $(BAQ) \
+				      --cov $(COV) \
+				      --filter_duplicate 0 \
+				      --filter_improper_pair 0 \
+				      --filter_qc_failed 1 \
+				      --filter_indel 0 \
+				      --filter_non_primary 1")
+						    
+endef
+$(foreach sample,$(SAMPLES),\
+		$(eval $(call get-basecount,$(sample))))
+		
+
+gbc/summary.txt : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz)
+	$(call RUN,-n 1 -s 24G -m 32G,"set -o pipefail && \
+				       $(RSCRIPT) $(SCRIPTS_DIR)/get_basecounts.R \
+				       --option 1 \
+				       --sample_name '$(SAMPLES)'")
+		
+
+..DUMMY := $(shell mkdir -p version; \
+	     ${GBC} &> version/get_basecount.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: getbasecount
diff --git a/variant_callers/somatic/hla_summary.R b/variant_callers/somatic/hla_summary.R
index 5d3ef8f0..e08590db 100644
--- a/variant_callers/somatic/hla_summary.R
+++ b/variant_callers/somatic/hla_summary.R
@@ -1,25 +1,59 @@
 suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("magrittr"))
 
 options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
 
-optList <- list(
-                make_option("--sample_names", default = "NA", help = "tumor normal sample pair names")
-                )
+optList <- list(make_option("--option", default = "NA", help = "which option?"),
+                make_option("--sample_names", default = "NA", help = "sample names"))
 
 parser <- OptionParser(usage = "%prog [options]", option_list = optList)
 arguments <- parse_args(parser, positional_arguments = T)
 opt <- arguments$options
 
 sample_names = unlist(strsplit(opt$sample_names, split=" ", fixed=TRUE))
-hla_genotypes = list()
-for (i in 1:length(sample_names)) {
-	data = read.csv(file=paste0("hla_polysolver/", sample_names[i], "/winners.hla.txt"), header=FALSE, sep="\t", stringsAsFactors=FALSE)
-	gen_1 = t(data[,2,drop=FALSE])
-	gen_2 = t(data[,3,drop=FALSE])
-	colnames(gen_1) = paste0(c("HLA-A", "HLA-B", "HLA-C"), "_1")
-	colnames(gen_2) = paste0(c("HLA-A", "HLA-B", "HLA-C"), "_2")
-	hla_genotypes[[i]] = cbind(gen_1, gen_2)
+
+if (as.numeric(opt$option)==1) {
+	hla_genotypes = list()
+	for (i in 1:length(sample_names)) {
+		hla_genotypes[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/winners.hla.txt"),
+						     col_names = FALSE, col_types = cols(.default = col_character())) %>%
+				     readr::type_convert() %>%
+				     dplyr::rename(hla = X1, major_allele = X2, minor_allele = X3) %>%
+				     dplyr::mutate(sample_name = sample_names[i])
+	}
+	hla_genotypes = do.call(rbind, hla_genotypes)
+	readr::write_tsv(x = hla_genotypes, path = "hla_polysolver/summary/hla_summary.txt", col_names = TRUE, append = FALSE)
+
+} else if (as.numeric(opt$option)==2) {
+	somatic_vars = list()
+	for (i in 1:length(sample_names)) {
+		somatic_vars[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/", sample_names[i], ".mutect.unfiltered.annotated"),
+						    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+				    readr::type_convert()
+	}
+	somatic_vars = do.call(rbind, somatic_vars)
+	if (nrow(somatic_vars)>0) {
+		somatic_vars = somatic_vars %>%
+			       dplyr::mutate(tumor_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[1] }))) %>%
+			       dplyr::mutate(normal_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[2] })))
+	}
+	readr::write_tsv(x = somatic_vars, path = "hla_polysolver/summary/mutect_summary.txt", col_names = TRUE, append = FALSE)
+	
+} else if (as.numeric(opt$option)==3) {
+	somatic_vars = list()
+	for (i in 1:length(sample_names)) {
+		somatic_vars[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/", sample_names[i], ".strelka_indels.unfiltered.annotated"),
+						    col_names = TRUE, col_types = cols(.default = col_character())) %>%
+				    readr::type_convert()
+	}
+	somatic_vars = do.call(rbind, somatic_vars)
+	if (nrow(somatic_vars)>0) {
+		somatic_vars = somatic_vars %>%
+			       dplyr::mutate(tumor_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[1] }))) %>%
+			       dplyr::mutate(normal_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[2] })))
+	}
+	readr::write_tsv(x = somatic_vars, path = "hla_polysolver/summary/strelka_summary.txt", col_names = TRUE, append = FALSE)
+
 }
-hla_genotypes = do.call(rbind, hla_genotypes)
-hla_genotypes = cbind("SAMPLE_NAMES"=sample_names, hla_genotypes)
-write.table(hla_genotypes, file="hla_polysolver/summary/genotype_summary.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
diff --git a/variant_callers/somatic/macs2TN.mk b/variant_callers/somatic/macs2TN.mk
deleted file mode 100644
index 259e4e65..00000000
--- a/variant_callers/somatic/macs2TN.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/macs2TN.$(NOW)
-PHONY += macs2 macs2/broadpeaks macs2/narrowpeaks
-
-macs2TN : $(foreach pair,$(SAMPLE_PAIRS),macs2/$(pair).timestamp)
-
-define macs2-case-control
-macs2/broadpeaks/$1_$2.task.complete : bam/$1.bam bam/$2.bam
-	$$(call RUN,-c -s 8G -m 12G,"macs2 callpeak -t $$< -c $$(<<) -f BAM -g hs --keep-dup all --broad --outdir macs2/broadpeaks -n $1_$2 -B --verbose 2 --nomodel -p 0.1 && echo $$< $$(<<) > macs2/broadpeaks/$1_$2.task.complete")
-	
-macs2/narrowpeaks/$1_$2.task.complete : bam/$1.bam bam/$2.bam
-	$$(call RUN,-c -s 8G -m 12G,"macs2 callpeak -t $$< -c $$(<<) -f BAM -g hs --keep-dup all --outdir macs2/narrowpeaks -n $1_$2 -B --verbose 2 --nomodel -p 0.1 && echo $$< $$(<<) > macs2/narrowpeaks/$1_$2.task.complete")
-
-macs2/$1_$2.timestamp : macs2/broadpeaks/$1_$2.task.complete macs2/narrowpeaks/$1_$2.task.complete
-	$$(call RUN,-c -s 1G -m 1G,"echo $$< $$(<<) > macs2/$1_$2.timestamp")
-endef
-$(foreach pair,$(SAMPLE_PAIRS),\
-		$(eval $(call macs2-case-control,$(tumor.$(pair)),$(normal.$(pair)))))
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
diff --git a/variant_callers/somatic/mimsi.mk b/variant_callers/somatic/mimsi.mk
new file mode 100644
index 00000000..59585ecb
--- /dev/null
+++ b/variant_callers/somatic/mimsi.mk
@@ -0,0 +1,37 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/mimsi.$(NOW)
+
+mimsi: $(foreach pair,$(SAMPLE_PAIRS),mimsi/$(pair)/$(pair).txt) \
+       mimsi/summary.txt
+
+MICROSATELLITES_LIST = $(HOME)/share/lib/resource_files/mimsi/microsatellites_impact_only.list
+MODEL = $(HOME)/share/lib/resource_files/mimsi/mi_msi_v0_4_0_200x.model
+
+define mimsi-tumor-normal
+mimsi/$1_$2/$1_$2.txt : bam/$1.bam bam/$2.bam
+	$$(call RUN,-c -n 8 -s 1G -m 2G -v $(MIMSI_ENV),"set -o pipefail && \
+							 mkdir -p mimsi/$1_$2/ && \
+							 analyze \
+							 --tumor-bam $$(<) \
+							 --normal-bam $$(<<) \
+							 --case-id $1 \
+							 --norm-case-id $2 \
+							 --microsatellites-list $$(MICROSATELLITES_LIST) \
+							 --save-location mimsi/$1_$2/ \
+							 --model $$(MODEL) \
+							 --save && \
+							 mv mimsi/$1_$2/BATCH_results.txt $$(@)")
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+	$(eval $(call mimsi-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+	
+mimsi/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),mimsi/$(pair)/$(pair).txt)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/mimsi.R --option 1 --sample_names '$(SAMPLE_PAIRS)'")
+
+
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: mimsi
diff --git a/variant_callers/somatic/msisensor.mk b/variant_callers/somatic/msisensor.mk
index b61e0298..42f5f108 100644
--- a/variant_callers/somatic/msisensor.mk
+++ b/variant_callers/somatic/msisensor.mk
@@ -2,30 +2,29 @@ include modules/Makefile.inc
 
 LOGDIR ?= log/msisensor.$(NOW)
 
-MSISENSOR_OPTS ?= -d $(REF_MSI) $(if $(TARGETS_FILE),-e $(TARGETS_FILE))
+msisensor: $(foreach pair,$(SAMPLE_PAIRS),msisensor/$(pair).msi) \
+	   msisensor/msi.tsv
 
-PHONY += msisensor
-
-.DELETE_ON_ERROR:
-.SECONDARY: 
-.PHONY : $(PHONY)
-
-msisensor: msisensor/msi.tsv
+MICROSATELLITES_LIST = $(HOME)/share/lib/resource_files/MSIsensor/microsatellites.list
+MSI_REGIONS = $(HOME)/share/lib/resource_files/MSIsensor/msiregions.bed
 
 define msisensor-tumor-normal
-msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai
-	$$(call RUN,-c -n 8 -s 1G -m 1.2G,"source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/msisensor && \
-									   msisensor msi $$(MSISENSOR_OPTS) -n $$(<<) -t $$< -b 8 -o $$@")
+msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam
+	$$(call RUN,-c -n 8 -s 1G -m 2G -v $(MSISENSOR_ENV),"set -o pipefail && \
+							     msisensor msi $$(MSISENSOR_OPTS) \
+							     -d $$(MICROSATELLITES_LIST) \
+							     -e $$(MSI_REGIONS) \
+							     -n $$(<<) \
+							     -t $$(<) \
+							     -b 8 \
+							     -o $$(@)")
 endef
-$(foreach pair,$(SAMPLE_PAIRS),$(eval $(call msisensor-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+$(foreach pair,$(SAMPLE_PAIRS),\
+	$(eval $(call msisensor-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
 
 msisensor/msi.tsv : $(foreach pair,$(SAMPLE_PAIRS),msisensor/$(pair).msi)
 	$(INIT) (head -1 $< | sed 's/^/sample\t/'; for x in $^; do sed "1d; s/^/$$(basename $$x)\t/" $$x; done | sed 's/_.*msi//' ) > $@
 
-bam/%.ds.bam : metrics/hs_metrics.tsv bam/%.bam
-	   $(call RUN,-s 4G -m 6G,\
-		   "ds=\`py 'round(500 / pandas.read_table(\"$<\", index_col=0).ix[\"$*\", \"MEAN_TARGET_COVERAGE\"], 2)'\`; \
-		   if [ \$$(echo \"\$$ds >= 1\" | bc) -eq 1 ]; then ln -s \$$(readlink -f $(<<)) $@; else \
-		   samtools view -hb -s \$$ds $(<<) > $@; fi")
-
-include modules/bam_tools/processBam.mk
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY: msisensor
diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk
index 3c4d3dd8..ce302f5e 100644
--- a/variant_callers/somatic/polysolver.mk
+++ b/variant_callers/somatic/polysolver.mk
@@ -1,44 +1,75 @@
 include modules/Makefile.inc
 
 LOGDIR ?= log/hla_polysolver.$(NOW)
-PHONY += hla_polysolver hla_polysolver/summary
 
-hla_polysolver : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).taskcomplete) hla_polysolver/summary/genotype_summary.txt
+
+hla_polysolver : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/winners.hla.txt) \
+		 $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/hla.intervals) \
+		 $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) \
+		 $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) \
+		 hla_polysolver/summary/hla_summary.txt \
+		 hla_polysolver/summary/mutect_summary.txt \
+		 hla_polysolver/summary/strelka_summary.txt
+		 
 
 define hla-polysolver
 hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam
-	$$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \
-								   /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \
-								   if [ ! -d hla_polysolver/$1_$2 ]; then mkdir hla_polysolver/$1_$2; fi && \
-								   shell_call_hla_type bam/$2.bam Unknown 1 hg19 STDFQ 0 hla_polysolver/$1_$2")
+	$$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \
+									   export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \
+									   export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \
+									   shell_call_hla_type \
+									   $$(<<) \
+									   Unknown \
+									   1 \
+									   hg19 \
+									   STDFQ \
+									   0 \
+									   hla_polysolver/$1_$2")
 								 	  
-hla_polysolver/$1_$2/hla.intervals : hla_polysolver/$1_$2/winners.hla.txt
-	$$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \
-								   /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \
-								   shell_call_hla_mutations_from_type bam/$2.bam bam/$1.bam hla_polysolver/$1_$2/winners.hla.txt hg19 STDFQ hla_polysolver/$1_$2")
+hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/winners.hla.txt
+	$$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \
+									   export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \
+									   export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \
+									   shell_call_hla_mutations_from_type \
+									   $$(<<) \
+									   $$(<) \
+									   $$(<<<) \
+									   hg19 \
+									   STDFQ \
+									   hla_polysolver/$1_$2")
 								 	 
-hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals
-	$$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \
-								   /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \
-								   export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \
-								   shell_annotate_hla_mutations $1_$2 hla_polysolver/$1_$2")
+hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals
+	$$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \
+									   export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \
+									   export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \
+									   shell_annotate_hla_mutations \
+									   $1_$2 \
+									   hla_polysolver/$1_$2")
 
-hla_polysolver/$1_$2/$1_$2.taskcomplete : hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated
-	$$(call RUN,-n 1 -s 1G -m 1G,"touch hla_polysolver/$1_$2/$1_$2.taskcomplete")
+hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated : hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated
+	
 
 endef
 $(foreach pair,$(SAMPLE_PAIRS),\
 		$(eval $(call hla-polysolver,$(tumor.$(pair)),$(normal.$(pair)))))
 		
-hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).taskcomplete)
-	$(call RUN,-c -s 12G -m 24G,"mkdir -p hla_polysolver/summary && \
-							 	 $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --sample_names '$(SAMPLE_PAIRS)'")
+hla_polysolver/summary/hla_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated)
+	$(call RUN,-c -s 12G -m 24G,"set -o pipefail && \
+				     $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 1 --sample_names '$(SAMPLE_PAIRS)'")
 
-.DELETE_ON_ERROR:
+hla_polysolver/summary/mutect_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated)
+	$(call RUN,-c -s 12G -m 24G,"set -o pipefail && \
+				     $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 2 --sample_names '$(SAMPLE_PAIRS)'")
+
+hla_polysolver/summary/strelka_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated)
+	$(call RUN,-c -s 12G -m 24G,"set -o pipefail && \
+				     $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 3 --sample_names '$(SAMPLE_PAIRS)'")
+
+
+..DUMMY := $(shell mkdir -p version; \
+	     $(POLYSOLVER_ENV)/bin/shell_call_hla_type --help &> version/hla_polysolver.txt; \
+	     $(POLYSOLVER_ENV)/bin/shell_call_hla_mutations_from_type --help &>> version/hla_polysolver.txt; \
+	     $(POLYSOLVER_ENV)/bin/shell_annotate_hla_mutations --help &>> version/hla_polysolver.txt)
 .SECONDARY:
-.PHONY: $(PHONY)
+.DELETE_ON_ERROR:
+.PHONY: hla_polysolver
diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk
index b22b0e1f..560f0719 100644
--- a/variant_callers/somatic/varscanTN.mk
+++ b/variant_callers/somatic/varscanTN.mk
@@ -1,47 +1,40 @@
-# Run VarScan on tumour-normal matched pairs
-# Detect point mutations
-##### DEFAULTS ######
+include modules/Makefile.inc
 
 LOGDIR ?= log/varscanTN.$(NOW)
 
-##### MAKE INCLUDES #####
-include modules/Makefile.inc
-
 IGNORE_FP_FILTER ?= true
-
+VALIDATION ?= false
 FP_FILTER = $(PERL) $(HOME)/share/usr/bin/fpfilter.pl
 BAM_READCOUNT = $(HOME)/share/usr/bin/bam-readcount
-
 VARSCAN_TO_VCF = $(PERL) modules/variant_callers/somatic/varscanTNtoVcf.pl
-
 MIN_MAP_QUAL ?= 1
-VALIDATION ?= false
 MIN_VAR_FREQ ?= $(if $(findstring false,$(VALIDATION)),0.05,0.000001)
 
-#VARSCAN
 VARSCAN_MEM = $(JAVA7) -Xmx$1 -jar $(VARSCAN_JAR)
 VARSCAN = $(call VARSCAN_MEM,8G)
 VARSCAN_OPTS = $(if $(findstring true,$(VALIDATION)),--validation 1 --strand-filter 0) --min-var-freq $(MIN_VAR_FREQ)
-
 VARSCAN_SOURCE_ANN_VCF = python modules/vcf_tools/annotate_source_vcf.py --source varscan
-
 VPATH ?= bam
-
 VARSCAN_VARIANT_TYPES = varscan_indels varscan_snps
 
-PHONY += varscan varscan_vcfs varscan_mafs
-varscan : varscan_vcfs #varscan_mafs
-varscan_vcfs : $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf))
-varscan_mafs : $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),maf/$(pair).$(type).maf))
-
-
-%.Somatic.txt : %.txt
-	$(call RUN,-s 5G -m 8G,"$(call VARSCAN_MEM,4G) somaticFilter $< && $(call VARSCAN_MEM,4G) processSomatic $< && rename .txt.Somatic .Somatic.txt $** && rename .txt.Germline .Germline.txt $** && rename .txt.LOH .LOH.txt $** && rename .txt.hc .hc.txt $**")
+varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) \
+	  $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \
+	  $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).snp.txt) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).indel.txt) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).snp.Somatic.txt) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).indel.Somatic.txt) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/vcf/$(pair).snp.Somatic.vcf) \
+	  $(foreach pair,$(SAMPLE_PAIRS),varscan/vcf/$(pair).indel.Somatic.vcf) \
+	  $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf))
 
 define varscan-somatic-tumor-normal-chr
 varscan/chr_tables/$1_$2.$3.varscan_timestamp : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai
 	if [[ $$$$($$(SAMTOOLS) view $$< $3 | head -1 | wc -l) -gt 0 ]]; then \
-		$$(call RUN,-s 9G -m 12G,"$$(VARSCAN) somatic \
+		$$(call RUN,-s 9G -m 12G -w 72:00:00,"set -o pipefail && \
+		rm -rf varscan/chr_tables/$1_$2.$3.snp.txt && \
+		rm -rf varscan/chr_tables/$1_$2.$3.indel.txt && \
+		$$(VARSCAN) somatic \
 		<($$(SAMTOOLS) mpileup -A -r $3 -q $$(MIN_MAP_QUAL) -f $$(REF_FASTA) $$(word 2,$$^)) \
 		<($$(SAMTOOLS) mpileup -A -r $3 -q $$(MIN_MAP_QUAL) -f $$(REF_FASTA) $$<) \
 		$$(VARSCAN_OPTS) \
@@ -54,14 +47,16 @@ varscan/chr_tables/$1_$2.$3.varscan_timestamp : bam/$1.bam bam/$2.bam bam/$1.bam
 	fi
 
 varscan/chr_tables/$1_$2.$3.indel.txt : varscan/chr_tables/$1_$2.$3.varscan_timestamp
+
 varscan/chr_tables/$1_$2.$3.snp.txt : varscan/chr_tables/$1_$2.$3.varscan_timestamp
 
 varscan/chr_tables/$1_$2.$3.%.fp_pass.txt : varscan/chr_tables/$1_$2.$3.%.txt bamrc/$1.$3.bamrc.gz
 	$$(call RUN,-s 8G -m 55G,"$$(VARSCAN) fpfilter $$< <(zcat $$(<<)) --output-file $$@")
+
 endef
 $(foreach chr,$(CHROMOSOMES), \
 	$(foreach pair,$(SAMPLE_PAIRS), \
-	$(eval $(call varscan-somatic-tumor-normal-chr,$(tumor.$(pair)),$(normal.$(pair)),$(chr)))))
+		$(eval $(call varscan-somatic-tumor-normal-chr,$(tumor.$(pair)),$(normal.$(pair)),$(chr)))))
 
 define merge-varscan-pair-type
 varscan/tables/$1.$2.txt : $$(foreach chr,$$(CHROMOSOMES),\
@@ -69,16 +64,34 @@ varscan/tables/$1.$2.txt : $$(foreach chr,$$(CHROMOSOMES),\
 	varscan/chr_tables/$1.$$(chr).$2.txt,\
 	varscan/chr_tables/$1.$$(chr).$2.fp_pass.txt))
 	$$(INIT) head -1 $$< > $$@ && for x in $$^; do sed 1d $$$$x >> $$@; done
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS), \
+	$(foreach type,snp indel, \
+		$(eval $(call merge-varscan-pair-type,$(pair),$(type)))))
+	
+define filter-varscan-pair-type
+varscan/tables/$1.$2.Somatic.txt : varscan/tables/$1.$2.txt
+	$$(call RUN,-s 5G -m 8G,"set -o pipefail && \
+				$$(VARSCAN) somaticFilter $$(<) && \
+				$$(VARSCAN) processSomatic $$(<) && \
+				cp varscan/tables/$1.$2.txt.Somatic varscan/tables/$1.$2.Somatic.txt")
+
 endef
 $(foreach pair,$(SAMPLE_PAIRS), \
-	$(foreach type,snp indel,$(eval $(call merge-varscan-pair-type,$(pair),$(type)))))
+	$(foreach type,snp indel, \
+		$(eval $(call filter-varscan-pair-type,$(pair),$(type)))))
 
 define convert-varscan-tumor-normal
-varscan/vcf/$1_$2.%.vcf : varscan/tables/$1_$2.%.txt
-	$$(call RUN,-s 4G -m 8G,"$$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$< | $$(VCF_SORT) $$(REF_DICT) - > $$@")
+varscan/vcf/$1_$2.$3.Somatic.vcf : varscan/tables/$1_$2.$3.Somatic.txt
+	$$(call RUN,-s 4G -m 8G,"set -o pipefail && \
+				 $$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$(<) | $$(VCF_SORT) $$(REF_DICT) - > $$(@)")
+
+
 endef
 $(foreach pair,$(SAMPLE_PAIRS), \
-	$(eval $(call convert-varscan-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)))))
+	$(foreach type,snp indel, \
+		$(eval $(call convert-varscan-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)),$(type)))))
 
 vcf/%.varscan_indels.vcf : varscan/vcf/%.indel.Somatic.vcf
 	$(INIT) $(VARSCAN_SOURCE_ANN_VCF) < $< > $@
@@ -86,15 +99,8 @@ vcf/%.varscan_indels.vcf : varscan/vcf/%.indel.Somatic.vcf
 vcf/%.varscan_snps.vcf : varscan/vcf/%.snp.Somatic.vcf
 	$(INIT) $(VARSCAN_SOURCE_ANN_VCF) < $< > $@
 
-define bamrc-chr
-bamrc/%.$1.bamrc.gz : bam/%.bam
-	$$(call RUN,-s 8G -m 12G,"$$(BAM_READCOUNT) -f $$(REF_FASTA) $$< $1 | gzip > $$@ 2> /dev/null")
-endef
-$(foreach chr,$(CHROMOSOMES),$(eval $(call bamrc-chr,$(chr))))
-
 include modules/variant_callers/gatk.mk
 
 .DELETE_ON_ERROR:
 .SECONDARY: 
-.PHONY: $(PHONY)
-
+.PHONY: varscan
diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk
new file mode 100644
index 00000000..1b58f247
--- /dev/null
+++ b/variant_callers/sufam_gt.mk
@@ -0,0 +1,95 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/sufam_gt.$(NOW)
+
+SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev
+SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000'
+
+sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \
+	   $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \
+	   $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \
+	   $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) \
+	   $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \
+	   sufam/mutation_summary.maf \
+	   sufam/mutation_summary_ft.maf
+
+define sufam-gt
+sufam/$1.vcf : summary/tsv/all.tsv
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \
+							      $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							      --option 1 \
+							      --sample_set '$(set.$1)' \
+							      --normal_sample '$(normal.$1)' \
+							      --input_file $$(<) \
+							      --output_file $$(@)")
+					 
+sufam/$1.txt : sufam/$1.vcf bam/$1.bam
+	$$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \
+					 		 sufam \
+							 --sample_name $1 \
+							 $$(SUFAM_OPTS) \
+							 $$(REF_FASTA) \
+							 $$(<) \
+							 $$(<<) \
+							 > $$(@)")
+
+sufam/$1.maf : sufam/$1.vcf
+	$$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \
+							$$(VCF2MAF) \
+							--input-vcf $$< \
+							--tumor-id $1 \
+							--filter-vcf $$(EXAC_NONTCGA) \
+							--ref-fasta $$(REF_FASTA) \
+							--vep-path $$(VEP_PATH) \
+							--vep-data $$(VEP_DATA) \
+							--tmp-dir `mktemp -d` \
+							--output-maf $$(@)")
+							
+sufam/$1_ann.maf : sufam/$1.maf
+	$$(call RUN,-c -n 1 -s 2G -m 3G -v $(INNOVATION_ENV),"set -o pipefail && \
+							      $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							      --option 2 \
+							      --tumor_sample $1 \
+							      --normal_sample '$(normal.$1)' \
+							      --input_file $$(<) \
+							      --output_file $$(@)")
+
+
+endef
+$(foreach sample,$(TUMOR_SAMPLES),\
+		$(eval $(call sufam-gt,$(sample))))
+		
+define combine-maf
+sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf)
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \
+							      $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							      --option 3 \
+							      --sample_set '$(set.$1)' \
+							      --normal_sample '$(normal.$1)' \
+							      --output_file $$(@)")
+					 
+endef
+$(foreach set,$(SAMPLE_SETS),\
+		$(eval $(call combine-maf,$(set))))
+		
+
+sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf)
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							       --option 4 \
+							       --sample_set '$(SAMPLE_SETS)' \
+							       --input_file $(<) \
+							       --output_file $(@)")
+							       
+sufam/mutation_summary_ft.maf : sufam/mutation_summary.maf
+	$(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \
+							       $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \
+							       --option 5 \
+							       --input_file $(<) \
+							       --output_file $(@)")
+
+..DUMMY := $(shell mkdir -p version; \
+	     R --version > version/sufam_gt.txt)
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: sufam_gt
diff --git a/variant_callers/sufammultisample.mk b/variant_callers/sufammultisample.mk
deleted file mode 100644
index cb0157c8..00000000
--- a/variant_callers/sufammultisample.mk
+++ /dev/null
@@ -1,42 +0,0 @@
-include modules/Makefile.inc
-
-LOGDIR ?= log/sufam_multisample.$(NOW)
-PHONY += sufam summary
-
-sufam_multisample : $(foreach set,$(SAMPLE_SETS),sufam/$(set).tsv) summary/sufam_summary.xlsx
-
-ifeq ($(PDX),true)
-
-define combine-samples-pdx
-sufam/%.txt : summary/tsv/mutation_summary.tsv
-	$$(call RUN,-c -s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/combinesamplesf.R --sample_set $$*")
-
-sufam/%.tsv : sufam/%.txt
-	$$(call RUN,-c -s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/updatesamples.R --sample_set $$*")
-	
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call combine-samples-pdx,$(set))))
-		
-else 
-
-define combine-samples
-sufam/%.txt : summary/tsv/mutation_summary.tsv
-	$$(call RUN,-s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/combinesamples.R --sample_set $$*")
-
-sufam/%.tsv : sufam/%.txt
-	$$(call RUN,-s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/updatesamples.R --sample_set $$*")
-	
-endef
-$(foreach set,$(SAMPLE_SETS),\
-		$(eval $(call combine-samples,$(set))))
-		
-endif
-
-summary/sufam_summary.xlsx : $(wildcard $(foreach set,$(SAMPLE_SETS),sufam/$(set).tsv))
-	$(call RUN,-s 12G -m 16G,"export R_LIBS='~/share/usr/anaconda-envs/jrflab-modules-0.1.4/lib/R/library:~/share/usr/lib64/R/library' && \
-							  $(RSCRIPT) modules/summary/sufamsummary.R --sample_sets '$(SAMPLE_SETS)'")
-
-.DELETE_ON_ERROR:
-.SECONDARY:
-.PHONY: $(PHONY)
\ No newline at end of file
diff --git a/variant_callers/updatesamples.R b/variant_callers/updatesamples.R
deleted file mode 100644
index d9be2876..00000000
--- a/variant_callers/updatesamples.R
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-suppressPackageStartupMessages(library("CNtu"))
-suppressPackageStartupMessages(library("readr"))
-
-if (!interactive()) {
-    options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
-}
-
-args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set"))
-				  
-parser <- OptionParser(usage = "%prog", option_list = args_list)
-arguments <- parse_args(parser, positional_arguments = T)
-opt <- arguments$options
-
-sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))
-
-vars = read_tsv(file=paste0("sufam/", opt$sample_set, ".txt"))
-col_names = colnames(vars)
-vars = as.data.frame(vars)
-colnames(vars) = col_names
-
-#====================================
-# sufam
-#====================================
-chr = vars$Chromosome
-pos = vars$Position
-id = rep(".", nrow(vars))
-ref = vars$Ref
-alt = vars$Alt
-qual = rep(100, nrow(vars))
-filter = rep("PASS", nrow(vars))
-info = rep(".", nrow(vars))
-vcf = cbind(chr, pos, id, ref, alt, qual, filter, info)
-colnames(vcf) = c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
-write.table(vcf, file=paste0("sufam/", opt$sample_set, ".vcf"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
- 
-#====================================
-# dp and maf
-#====================================
-for (i in 1:length(sample_names)) {
- 	if (!file.exists(paste0("sufam/", sample_names[i], ".mat"))) {
- 		system(paste0("source ~/share/usr/anaconda/bin/activate ~/share/usr/anaconda-envs/sufam-dev && sufam ~/share/reference/GATK_bundle/2.3/human_g1k_v37.fa sufam/", opt$sample_set, ".vcf bam/", sample_names[i], ".bam > sufam/", sample_names[i], ".mat"))
- 	}
- 	tmp = read.csv(file=paste0("sufam/", sample_names[i], ".mat"), header=TRUE, sep="\t", stringsAsFactors=FALSE)
- 	index = paste0("DP_", sample_names[i])
- 	vars[,index] = tmp[,"cov"]
- 	index = paste0("MAF_", sample_names[i])
- 	vars[,index] = tmp[,"val_maf"]
-}
- 
-#====================================
-# qt and q2
-#====================================
-q_t = q_2 = NULL
-for (i in 1:length(sample_names)) {
-	file_names = dir(path="ascat/ascat", pattern=".RData", full.names=TRUE)
-	index = grep(sample_names[i], file_names, fixed=TRUE)
-	if (length(index)==1) {
-		load(file_names[index])
-		Chromosomes = tmp2$SNPpos[tmp3$seg[,"start"],1]
-		Chromosomes[Chromosomes==23] = "X"
-		Start = tmp2$SNPpos[tmp3$seg[,"start"],2]
-		End = tmp2$SNPpos[tmp3$seg[,"end"],2]
-		qt = tmp3$seg[,"nA"] + tmp3$seg[,"nB"]
-		q2 = apply(tmp3$seg[,c("nA","nB")], 1, max)
-		index = rep(NA, nrow(vars))
-		for (j in 1:nrow(vars)) {
-			indx = which(Chromosomes==vars[j,"Chromosome"] & Start<=vars[j,"Position"] & End>=vars[j,"Position"])
-			if (length(indx)!=0) {
-				index[j] = indx
-			} else {
-				index[j] = NA
-			}
-		}
-		q_t = cbind(q_t, qt[index])
-		q_2 = cbind(q_2, q2[index])
-	} else {
-		q_t = cbind(q_t, rep(2, nrow(vars)))
-		q_2 = cbind(q_2, rep(1, nrow(vars)))
-	}
-}
-q_t[is.na(q_t)] = 2
-q_2[is.na(q_2)] = 1
-colnames(q_t) = colnames(q_2) = sample_names
-colnames(q_t) = paste0("qt_", colnames(q_t))
-colnames(q_2) = paste0("q2_", colnames(q_2))
-vars = cbind(vars, q_t, q_2)
- 
-#====================================
-# loh
-#====================================
-for (i in 1:length(sample_names)) {
-	loh = rep(0, nrow(vars))
-	for (j in 1:nrow(vars)) {
-		if (q_t[j,i]==q_2[j,i]) {
-			loh[j] = 1
-		}
-	}
-	vars[,paste0("LOH_", sample_names[i])] = loh
-}
-
-#====================================
-# ccf
-#====================================
-cancer_cell_fraction = NULL
-ccf_95CI_low = NULL
-ccf_95CI_high = NULL
-pr_somatic_clonal = NULL
-ll = NULL
-sq = NULL
-clonal_status = NULL
-for (i in 1:length(sample_names)) {
-	file_names = dir(path="ascat/ascat", pattern=".RData", full.names=TRUE)
-	index = grep(sample_names[i], file_names, fixed=TRUE)
-	if (length(index)==1) {
-		load(file_names[index])
-		f_hat = vars[,paste0("MAF_", sample_names[i])]
-		n = vars[,paste0("DP_", sample_names[i])]
-		qt = vars[,paste0("qt_", sample_names[i])]
-		qt[qt>10] = 10
-		q2 = vars[,paste0("q2_", sample_names[i])]
-		q2[q2>10] = 10
-		alpha = seq(.1, to=.9, length=50)
-		alpha_hat = list()
-		indx = f_hat>.1
-		if (sum(indx)>5) {
-			for (j in 1:length(alpha)) {
-				alpha_hat[[j]] = cancercellFraction(f_hat[indx], n[indx], qt[indx], q2[indx], alpha[j], e=0.01)
-			}
-			LL = unlist(lapply(alpha_hat, function(x) {sum(x[,"LL"])}))
-			pdf(file=paste0("sufam/", sample_names[i], ".pdf"))
-			plot(alpha, LL, type="o", col="steelblue", axes=FALSE, frame.plot=FALSE, xlab="", ylab="")
-			axis(1, at = NULL, cex.axis = 1.5, padj = 0.25)
-    		axis(2, at = NULL, cex.axis = 1.5, las = 1)
-    		mtext(side = 1, text = expression(alpha), line = 4, cex = 1.5)
-    		mtext(side = 2, text = expression(Sigma~"LL"), line = 4, cex = 1.5)
-    		index = which.max(LL)
-    		title(main = paste0("alpha* = ", signif(alpha[index], 3)), cex.main = 1.5)
-    		box(lwd = 2)
-			dev.off()
-			index = which.max(LL)
-			alpha_hat = cancercellFraction(f_hat, n, qt, q2, ifelse((alpha[index]-.25)<=0, alpha[index], alpha[index]-.25), e=0.01)
-			cancer_cell_fraction = cbind(cancer_cell_fraction, alpha_hat[,"cancer_cell_frac"])
-			ccf_95CI_low = cbind(ccf_95CI_low, alpha_hat[,"ccf_95CI_low"])
-			ccf_95CI_high = cbind(ccf_95CI_high, alpha_hat[,"ccf_95CI_high"])
-			pr_somatic_clonal = cbind(pr_somatic_clonal, alpha_hat[,"Pr_somatic_clonal"])
-			ll = cbind(ll, alpha_hat[,"LL"])
-			sq = cbind(sq, alpha_hat[,"sq"])
-			clonal_estimate = rep("Subclonal", nrow(vars))
-			clonal_estimate[cancer_cell_fraction[,i]>.75 | pr_somatic_clonal[,i]>.5 | ccf_95CI_low[,i]>.9] = "Clonal"
-			clonal_status = cbind(clonal_status, clonal_estimate)
-		} else {
-			cancer_cell_fraction = cbind(cancer_cell_fraction, rep(NA, nrow(vars)))
-			ccf_95CI_low = cbind(ccf_95CI_low, rep(NA, nrow(vars)))
-			ccf_95CI_high = cbind(ccf_95CI_high, rep(NA, nrow(vars)))
-			pr_somatic_clonal = cbind(pr_somatic_clonal, rep(NA, nrow(vars)))
-			ll = cbind(ll, rep(NA, nrow(vars)))
-			sq = cbind(sq, rep(NA, nrow(vars)))
-			clonal_status = cbind(clonal_status, rep(NA, nrow(vars)))
-		}
-	} else {
-		cancer_cell_fraction = cbind(cancer_cell_fraction, rep(NA, nrow(vars)))
-		ccf_95CI_low = cbind(ccf_95CI_low, rep(NA, nrow(vars)))
-		ccf_95CI_high = cbind(ccf_95CI_high, rep(NA, nrow(vars)))
-		pr_somatic_clonal = cbind(pr_somatic_clonal, rep(NA, nrow(vars)))
-		ll = cbind(ll, rep(NA, nrow(vars)))
-		sq = cbind(sq, rep(NA, nrow(vars)))
-		clonal_status = cbind(clonal_status, rep(NA, nrow(vars)))
-	}
-}
-colnames(cancer_cell_fraction) = colnames(ccf_95CI_low) = colnames(ccf_95CI_high) = colnames(pr_somatic_clonal) = colnames(ll) = colnames(sq) = colnames(clonal_status) = sample_names
-colnames(cancer_cell_fraction) = paste0("CCF_", colnames(cancer_cell_fraction))
-colnames(ccf_95CI_low) = paste0("CCF_95CI_Low_", colnames(ccf_95CI_low))
-colnames(ccf_95CI_high) = paste0("CCF_95CI_High_", colnames(ccf_95CI_high))
-colnames(pr_somatic_clonal) = paste0("Pr_Somatic_Clonal_", colnames(pr_somatic_clonal))
-colnames(ll) = paste0("LL_", colnames(ll))
-colnames(sq) = paste0("sq_", colnames(sq))
-colnames(clonal_status) = paste0("Clonal_Status_", colnames(clonal_status))
-
-vars = cbind(vars, cancer_cell_fraction,
-				   ccf_95CI_low,
-				   ccf_95CI_high,
-				   pr_somatic_clonal,
-				   ll,
-				   sq,
-				   clonal_status)
-
-write.table(vars, file=paste0("sufam/", opt$sample_set, ".tsv"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk
new file mode 100644
index 00000000..58233a61
--- /dev/null
+++ b/vcf_tools/annotateSummaryVcf.mk
@@ -0,0 +1,40 @@
+include modules/Makefile.inc
+include modules/genome_inc/b37.inc
+
+LOGDIR ?= log/annotate_smry_maf.$(NOW)
+
+annotate_smry_maf : vcf2maf/mutation_summary.vcf \
+		    vcf2maf/mutation_summary.maf \
+		    vcf2maf/mutation_summary.txt
+		   
+vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 1 --input $(<) --output $(@)")
+							
+vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf
+	$(call RUN, -c -n 12 -s 2G -m 3G -v $(VEP_ENV) -w 72:00:00,"set -o pipefail && \
+									$(VCF2MAF) \
+									--input-vcf $(<) \
+									--output-maf $(@) \
+									--tmp-dir $(TMPDIR) \
+									--tumor-id NA \
+									--normal-id NA \
+									--vep-path $(VEP_ENV)/bin \
+									--vep-data $(HOME)/share/reference/vep/v86/ \
+									--vep-forks 12 \
+									--ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa \
+									--filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \
+									--species homo_sapiens \
+									--ncbi-build GRCh37 \
+									--maf-center MSKCC && \
+									$(RM) $(TMPDIR)/mutation_summary.vep.vcf")
+							
+vcf2maf/mutation_summary.txt : summary/tsv/mutation_summary.tsv vcf2maf/mutation_summary.maf
+	$(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \
+					  $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 2 --input $(<) --maf $(<<) --output $(@)")
+							  
+..DUMMY := $(shell mkdir -p version; \
+	     source $(VCF2MAF_ENV)/bin/activate $(VCF2MAF_ENV) && $(VCF2MAF) --man >> version/annotate_smry_maf.txt)
+.DELETE_ON_ERROR:
+.SECONDARY: 
+.PHONY: annotate_smry_maf
diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk
new file mode 100644
index 00000000..78d340d7
--- /dev/null
+++ b/vcf_tools/annotate_sv.mk
@@ -0,0 +1,44 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/anotate_sv.$(NOW)
+
+SV_CALLERS = svaba manta gridss merged
+ANNOTATE_SV ?= $(HOME)/share/usr/env/annot_sv-3.1.3/opt/AnnotSV/bin/AnnotSV
+
+annotate_sv :  $(foreach pair,$(SAMPLE_PAIRS), \
+			$(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) \
+	       $(foreach pair,$(SAMPLE_PAIRS), \
+			$(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.maf))
+			
+define annotate-sv
+annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(ANNOTATE_SV_ENV),"set -o pipefail && \
+							       mkdir -p annotate_sv/$1/$2 && \
+							       $$(ANNOTATE_SV) \
+							       -SVinputFile $$(<) \
+							       -outputFile ./annotate_sv/$1/$2/$1.$2_sv.tsv \
+							       -genomeBuild GRCh37")
+							       
+annotate_sv/$1/$1.$2_sv.tsv : annotate_sv/$1/$2/$1.$2_sv.tsv
+	$$(INIT) cat $$(<) > $$(@)
+	
+annotate_sv/$1/$1.$2_sv.maf : vcf/$1.$2_sv.vcf
+	$$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \
+							$$(VCF2MAF) \
+							--input-vcf $$(<) \
+							--tumor-id $1 \
+							--filter-vcf $$(EXAC_NONTCGA) \
+							--ref-fasta $$(REF_FASTA) \
+							--vep-path $$(VEP_PATH) \
+							--vep-data $$(VEP_DATA) \
+							--tmp-dir `mktemp -d` \
+							--output-maf $$(@)")
+
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+	$(foreach caller,$(SV_CALLERS), \
+		$(eval $(call annotate-sv,$(pair),$(caller)))))
+		
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: annotate_sv
diff --git a/vcf_tools/cravat_annotation.mk b/vcf_tools/cravat_annotation.mk
index 9181800f..d8ae4961 100644
--- a/vcf_tools/cravat_annotation.mk
+++ b/vcf_tools/cravat_annotation.mk
@@ -1,33 +1,57 @@
 include modules/Makefile.inc
 
 LOGDIR ?= log/cravat_annotate.$(NOW)
-PHONY += cravat
 
-cravat_annotate : $(foreach sample,$(SAMPLES),cravat/$(sample).vcf cravat/$(sample).maf cravat/$(sample).cravat.vcf cravat/$(sample).tsv cravat/$(sample).txt)
-
-DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6
-CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat
+cravat_annotate : $(foreach sample,$(SAMPLES),cravat/$(sample).vcf) \
+		  $(foreach sample,$(SAMPLES),cravat/$(sample).maf) \
+		  $(foreach sample,$(SAMPLES),cravat/$(sample).cravat.vcf) \
+		  $(foreach sample,$(SAMPLES),cravat/$(sample).tsv) \
+		  $(foreach sample,$(SAMPLES),cravat/$(sample).txt)
 
 define cravat-annotation
-cravat/%.vcf : vcf_ann/%.gatk_snps.vcf vcf_ann/%.gatk_indels.vcf
-	$$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/combine_vcf.R --sample_name $$(*)")
+cravat/$1.vcf : vcf_ann/$1.gatk_snps.vcf vcf_ann/$1.gatk_indels.vcf
+	$$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \
+						 $(RSCRIPT) modules/vcf_tools/combine_vcf.R \
+						 --sample_name $$(*)")
 	
-cravat/%.maf : cravat/%.vcf
-	$$(call RUN,-s 9G -m 12G -v $$(VEP_ENV) -w 7200,"$$(VCF2MAF) --input-vcf $$< --tumor-id $$(*) $$(if $$(EXAC_NONTCGA),--filter-vcf $$(EXAC_NONTCGA)) --ref-fasta $$(REF_FASTA) --vep-path $$(VEP_PATH) --vep-data $$(VEP_DATA) --tmp-dir `mktemp -d` --output-maf $$@")
+cravat/$1.maf : cravat/$1.vcf
+	$$(call RUN,-c -s 9G -m 12G -v $(VEP_ENV) -w 24:00:00,"set -o pipefail && \
+							       $$(VCF2MAF) \
+							       --input-vcf $$(<) \
+							       --tumor-id $1 \
+							       $$(if $$(EXAC_NONTCGA),--filter-vcf $$(EXAC_NONTCGA)) \
+							       --ref-fasta $$(REF_FASTA) \
+							       --vep-path $$(VEP_PATH) \
+							       --vep-data $$(VEP_DATA) \
+							       --tmp-dir `mktemp -d` \
+							       --output-maf $$(@)")
 
-cravat/%.cravat.vcf : cravat/%.vcf cravat/%.maf
-	$$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/filter_vcf.R --sample_name $$(*)")
+cravat/$1.cravat.vcf : cravat/$1.vcf cravat/$1.maf
+	$$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \
+						 $(RSCRIPT) modules/vcf_tools/filter_vcf.R \
+						 --sample_name $1")
 
-cravat/%.tsv: cravat/%.cravat.vcf
-	$$(call RUN,-c -s 9G -m 12G -v $$(DEFAULT_ENV) -w 7200,"source activate $$(CRAVAT_ENV) && \
-												    		cravat cravat/$$(*).cravat.vcf -n $$(*) -d cravat -a clinvar cosmic dbsnp gnomad hgvs -v -l hg19 -t text")
+cravat/$1.tsv: cravat/$1.cravat.vcf
+	$$(call RUN,-c -s 9G -m 12G -v $(CRAVAT_ENV) -w 24:00:00,"set -o pipefail && \
+								  cravat $$(<) \
+								  -n $1 \
+								  -d cravat \
+								  -a clinvar cosmic dbsnp gnomad hgvs \
+								  -v \
+								  -l hg19 \
+								  -t text")
 												    
-cravat/%.txt : cravat/%.tsv
-	$$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/summary_vcf.R --sample_name $$(*)")
+cravat/$1.txt : cravat/$1.tsv
+	$$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \
+						 $(RSCRIPT) modules/vcf_tools/summary_vcf.R \
+						 --sample_name $1")
 	
 endef
  $(foreach sample,$(SAMPLES),\
 		$(eval $(call cravat-annotation,$(sample))))
 		
-.PHONY: $(PHONY)
-
+..DUMMY := $(shell mkdir -p version; \
+             echo "cravat" > version/cravat_annotate.txt;)
+.SECONDARY:
+.DELETE_ON_ERROR:
+.PHONY : cravat_annotate
diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk
new file mode 100644
index 00000000..1c301488
--- /dev/null
+++ b/vcf_tools/merge_sv.mk
@@ -0,0 +1,42 @@
+include modules/Makefile.inc
+
+LOGDIR ?= log/merge_sv.$(NOW)
+
+SV_CALLERS = svaba gridss manta
+MAX_DIST = 500
+NUM_CALLERS = 2
+TYPE = 0
+STRAND = 0
+MIN_SIZE = 30
+
+merge_sv :  $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) \
+	    $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) \
+	    $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv_ft.vcf) \
+	    $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf)
+	   
+define merge-sv
+merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf)
+	mkdir -p merge_sv/$1_$2 && \
+	$(foreach caller,$(SV_CALLERS),echo vcf/$1_$2.$(caller)_sv.vcf >> $$(@);)
+
+merge_sv/$1_$2/$1_$2.merged_sv.vcf : merge_sv/$1_$2/samples.txt
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \
+							    SURVIVOR merge $$(<) \
+							    $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)")
+
+merge_sv/$1_$2/$1_$2.merged_sv_ft.vcf : merge_sv/$1_$2/$1_$2.merged_sv.vcf
+	$$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \
+							      grep '##' $$(<) > $$(@) && \
+							      $$(RSCRIPT) modules/scripts/filter_sv.R --input_file $$(<) --output_file $$(@)")
+
+
+vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/$1_$2.merged_sv_ft.vcf
+	$$(INIT) cat $$(<) > $$(@)
+	
+endef
+$(foreach pair,$(SAMPLE_PAIRS),\
+		$(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair)))))
+	
+.DELETE_ON_ERROR:
+.SECONDARY:
+.PHONY: merge_sv
diff --git a/vcf_tools/vcftools.mk b/vcf_tools/vcftools.mk
index 18656fc6..0bda8c32 100644
--- a/vcf_tools/vcftools.mk
+++ b/vcf_tools/vcftools.mk
@@ -1,6 +1,3 @@
-# vim: set ft=make :
-# sub module containing vcf related tools
-
 ifndef VCFTOOLS_MK
 
 include modules/Makefile.inc