renamed some files

NYU-Molecular-Pathology · Sep 2, 2016 · 325f4d3 · 325f4d3
1 parent a62a074
commit 325f4d3
Show file tree

Hide file tree

Showing 24 changed files with 423 additions and 64 deletions.
diff --git a/routes/atac.sh b/routes/atac.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+
+##
+## ATAC-seq using Bowtie 2
+##
+
+
+# script filename
+script_name=$(basename "${BASH_SOURCE[0]}")
+route_name=${script_name/%.sh/}
+echo -e "\n ========== ROUTE: $route_name ========== \n" >&2
+
+# check for correct number of arguments
+if [ ! $# == 2 ] ; then
+	echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2
+	echo -e "\n USAGE: $script_name project_dir sample_name \n" >&2
+	exit 1
+fi
+
+# standard route arguments
+proj_dir=$(readlink -f "$1")
+sample=$2
+
+# additional settings
+threads=$NSLOTS
+code_dir=$(dirname "$(dirname "${BASH_SOURCE[0]}")")
+qsub_dir="${proj_dir}/logs-qsub"
+
+# display settings
+echo " * proj_dir: $proj_dir "
+echo " * sample: $sample "
+echo " * code_dir: $code_dir "
+echo " * qsub_dir: $qsub_dir "
+echo " * threads: $threads "
+
+
+#########################
+
+
+# delete empty qsub .po files
+rm -f ${qsub_dir}/sns.*.po*
+
+
+#########################
+
+
+# segments
+
+# rename and/or merge raw input FASTQs
+segment_fastq_clean="fastq-clean"
+fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
+fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
+if [ -z "$fastq_R1" ] ; then
+	bash_cmd="bash ${code_dir}/segments/${segment_fastq_clean}.sh $proj_dir $sample"
+	($bash_cmd)
+	fastq_R1=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
+	fastq_R2=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
+fi
+
+# run alignment
+segment_align="align-bowtie2-atac"
+bam_bt2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
+if [ -z "$bam_bt2" ] ; then
+	bash_cmd="bash ${code_dir}/segments/${segment_align}.sh $proj_dir $sample $threads $fastq_R1 $fastq_R2"
+	($bash_cmd)
+	bam_bt2=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
+fi
+
+# remove duplicates
+segment_dedup="bam-dedup-sambamba"
+bam_dd=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2)
+if [ -z "$bam_dd" ] ; then
+	bash_cmd="bash ${code_dir}/segments/${segment_dedup}.sh $proj_dir $sample $threads $bam_bt2"
+	($bash_cmd)
+	bam_dd=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2)
+fi
+
+
+#########################
+
+
+# combine summary from each step
+
+sleep 30
+
+summary_csv="${proj_dir}/summary-combined.wes.csv"
+
+bash_cmd="
+bash ${code_dir}/scripts/join-many.sh , X \
+${proj_dir}/summary.${segment_fastq_clean}.csv \
+${proj_dir}/summary.${segment_align}.csv \
+${proj_dir}/summary.${segment_dedup}.csv \
+> $summary_csv
+"
+(eval $bash_cmd)
+
+
+#########################
+
+
+# delete empty qsub .po files
+rm -f ${qsub_dir}/sns.*.po*
+
+
+#########################
+
+
+date
+
+
+
+# end
diff --git a/routes/rna-rsem.sh b/routes/rna-rsem.sh
@@ -48,7 +48,7 @@ rm -f ${qsub_dir}/sns.*.po*
 # segments
 
 # rename and/or merge raw input FASTQs
-segment_fastq_clean="fastq-fastq-clean"
+segment_fastq_clean="fastq-clean"
 fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
 fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1" ] ; then
@@ -59,37 +59,18 @@ if [ -z "$fastq_R1" ] ; then
 fi
 
 # fastq_screen
-bash_cmd="bash ${code_dir}/segments/fastq-qc-fastqscreen.sh $proj_dir $sample $fastq_R1"
+bash_cmd="bash ${code_dir}/segments/qc-fastqscreen.sh $proj_dir $sample $fastq_R1"
 ($bash_cmd)
 
 # RSEM
-segment_quant="fastq-quant-rsem"
+segment_quant="quant-rsem"
 bash_cmd="bash ${code_dir}/segments/${segment_quant}.sh $proj_dir $sample $threads unstr $fastq_R1 $fastq_R2"
 ($bash_cmd)
 
 
 #########################
 
 
-# combine summary from each step
-
-sleep 30
-
-summary_csv="${proj_dir}/summary-combined.rna-rsem.csv"
-
-bash_cmd="
-bash ${code_dir}/scripts/join-many.sh , X \
-${proj_dir}/summary.${segment_fastq_clean}.csv \
-${proj_dir}/summary.x.csv \
-${proj_dir}/summary.x.csv \
-> $summary_csv
-"
-(eval $bash_cmd)
-
-
-#########################
-
-
 # delete empty qsub .po files
 rm -f ${qsub_dir}/sns.*.po*
 

diff --git a/routes/rna-star.sh b/routes/rna-star.sh
@@ -48,7 +48,7 @@ rm -f ${qsub_dir}/sns.*.po*
 # segments
 
 # rename and/or merge raw input FASTQs
-segment_fastq_clean="fastq-fastq-clean"
+segment_fastq_clean="fastq-clean"
 fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
 fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1" ] ; then
@@ -59,11 +59,11 @@ if [ -z "$fastq_R1" ] ; then
 fi
 
 # fastq_screen
-bash_cmd="bash ${code_dir}/segments/fastq-qc-fastqscreen.sh $proj_dir $sample $fastq_R1"
+bash_cmd="bash ${code_dir}/segments/qc-fastqscreen.sh $proj_dir $sample $fastq_R1"
 ($bash_cmd)
 
 # run STAR
-segment_align="fastq-bam-star"
+segment_align="align-star"
 bam_star=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.bam-star.csv" | cut -d ',' -f 2)
 if [ -z "$bam_star" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_align}.sh $proj_dir $sample $threads $fastq_R1 $fastq_R2"
@@ -72,19 +72,19 @@ if [ -z "$bam_star" ] ; then
 fi
 
 # generate BigWig (deeptools)
-segment_bigwig_deeptools="bam-bigwig-deeptools"
+segment_bigwig_deeptools="bigwig-deeptools"
 bash_cmd="bash ${code_dir}/segments/${segment_bigwig_deeptools}.sh $proj_dir $sample 4 $bam_star"
 qsub_cmd="qsub -N sns.${segment_bigwig_deeptools}.${sample} -M ${USER}@nyumc.org -m a -j y -cwd -pe threaded 4 -b y ${bash_cmd}"
 $qsub_cmd
 
 # generate BigWig (bedtools)
-segment_bigwig_bedtools="bam-bigwig-bedtools"
+segment_bigwig_bedtools="bigwig-bedtools"
 bash_cmd="bash ${code_dir}/segments/${segment_bigwig_bedtools}.sh $proj_dir $sample $bam_star"
 qsub_cmd="qsub -N sns.${segment_bigwig_bedtools}.${sample} -M ${USER}@nyumc.org -m a -j y -cwd -b y ${bash_cmd}"
 $qsub_cmd
 
 # Picard CollectRnaSeqMetrics
-segment_qc_picard="bam-qc-picard-rnaseqmetrics"
+segment_qc_picard="qc-picard-rnaseqmetrics"
 bash_cmd="bash ${code_dir}/segments/${segment_qc_picard}.sh $proj_dir $sample $bam_star"
 ($bash_cmd)
 
@@ -99,7 +99,7 @@ fi
 exp_strand=$(bash ${code_dir}/scripts/get-set-setting.sh "${proj_dir}/settings.txt" EXP-STRAND);
 
 # generate counts
-segment_quant="bam-quant-featurecounts"
+segment_quant="quant-featurecounts"
 bash_cmd="bash ${code_dir}/segments/${segment_quant}.sh $proj_dir $sample $threads $bam_star $run_type $exp_strand"
 ($bash_cmd)
 
@@ -140,7 +140,7 @@ samples_groups_csv="${proj_dir}/samples.groups.csv"
 
 if [ ! -s "$samples_groups_csv" ] ; then
 	echo "#SAMPLE,group" > $samples_groups_csv
-	sed 's/\,.*/,NA/g' samples.fastq-raw.csv | LC_ALL=C sort -u >> $samples_groups_csv
+	sed 's/\,.*/,NA/g' ${proj_dir}/samples.fastq-raw.csv | LC_ALL=C sort -u >> $samples_groups_csv
 fi
 
 

diff --git a/routes/rrbs.sh b/routes/rrbs.sh
@@ -48,41 +48,45 @@ rm -f ${qsub_dir}/sns.*.po*
 # segments
 
 # rename and/or merge raw input FASTQs
-fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.fastq-clean.csv" | cut -d ',' -f 2)
-fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.fastq-clean.csv" | cut -d ',' -f 3)
+segment_fastq_clean="fastq-clean"
+fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
+fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1" ] ; then
-	bash_cmd="bash ${code_dir}/segments/fastq-fastq-clean.sh $proj_dir $sample"
+	bash_cmd="bash ${code_dir}/segments/${segment_fastq_clean}.sh $proj_dir $sample"
 	($bash_cmd)
-	fastq_R1=$(grep -m 1 "^${sample}," "${proj_dir}/samples.fastq-clean.csv" | cut -d ',' -f 2)
-	fastq_R2=$(grep -m 1 "^${sample}," "${proj_dir}/samples.fastq-clean.csv" | cut -d ',' -f 3)
+	fastq_R1=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
+	fastq_R2=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 fi
 
 # trim FASTQs with Trim Galore
-fastq_R1_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.fastq-trim.csv" | cut -d ',' -f 2)
-fastq_R2_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.fastq-trim.csv" | cut -d ',' -f 3)
+segment_fastq_trim="fastq-trim-trimgalore"
+fastq_R1_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 2)
+fastq_R2_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1_trimmed" ] ; then
-	bash_cmd="bash ${code_dir}/segments/fastq-fastq-trim-trimgalore.sh $proj_dir $sample rrbs $fastq_R1 $fastq_R2"
+	bash_cmd="bash ${code_dir}/segments/${segment_fastq_trim}.sh $proj_dir $sample rrbs $fastq_R1 $fastq_R2"
 	($bash_cmd)
-	fastq_R1_trimmed=$(grep -m 1 "^${sample}," "${proj_dir}/samples.fastq-trim.csv" | cut -d ',' -f 2)
-	fastq_R2_trimmed=$(grep -m 1 "^${sample}," "${proj_dir}/samples.fastq-trim.csv" | cut -d ',' -f 3)
+	fastq_R1_trimmed=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 2)
+	fastq_R2_trimmed=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 3)
 fi
 
 # run Bismark alignment
-bam_bismark=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.bam-bismark.csv" | cut -d ',' -f 2)
+segment_align="align-bismark"
+bam_bismark=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_bismark" ] ; then
-	bash_cmd="bash ${code_dir}/segments/fastq-bam-bismark.sh $proj_dir $sample $threads $fastq_R1_trimmed $fastq_R2_trimmed"
+	bash_cmd="bash ${code_dir}/segments/${segment_align}.sh $proj_dir $sample $threads $fastq_R1_trimmed $fastq_R2_trimmed"
 	($bash_cmd)
-	bam_bismark=$(grep -m 1 "^${sample}," "${proj_dir}/samples.bam-bismark.csv" | cut -d ',' -f 2)
+	bam_bismark=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
 fi
 
 # run Bismark methylation extractor
+segment_meth="meth-bismark"
 if [ -n "$fastq_R2" ] ; then
 	#
 	echo "pe"
 else
-	bash_cmd="bash ${code_dir}/segments/bam-meth-bismark.sh $proj_dir $sample $threads $bam_bismark se"
+	bash_cmd="bash ${code_dir}/segments/${segment_meth}.sh $proj_dir $sample $threads $bam_bismark se"
 	($bash_cmd)
-	bash_cmd="bash ${code_dir}/segments/bam-meth-bismark.sh $proj_dir $sample $threads $bam_bismark se-ignore-r1-3"
+	bash_cmd="bash ${code_dir}/segments/${segment_meth}.sh $proj_dir $sample $threads $bam_bismark se-ignore-r1-3"
 	($bash_cmd)
 fi
 

diff --git a/routes/wes.sh b/routes/wes.sh
@@ -48,7 +48,7 @@ rm -f ${qsub_dir}/sns.*.po*
 # segments
 
 # rename and/or merge raw input FASTQs
-segment_fastq_clean="fastq-fastq-clean"
+segment_fastq_clean="fastq-clean"
 fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
 fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1" ] ; then
@@ -59,7 +59,7 @@ if [ -z "$fastq_R1" ] ; then
 fi
 
 # trim FASTQs with Trimmomatic
-segment_fastq_trim="fastq-fastq-trim-trimmomatic"
+segment_fastq_trim="fastq-trim-trimmomatic"
 fastq_R1_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 2)
 fastq_R2_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1_trimmed" ] ; then
@@ -70,7 +70,7 @@ if [ -z "$fastq_R1_trimmed" ] ; then
 fi
 
 # run BWA-MEM alignment
-segment_align="fastq-bam-bwa-mem"
+segment_align="align-bwa-mem"
 bam_bwa=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_bwa" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_align}.sh $proj_dir $sample $threads $fastq_R1_trimmed $fastq_R2_trimmed"
@@ -79,7 +79,7 @@ if [ -z "$bam_bwa" ] ; then
 fi
 
 # remove duplicates
-segment_dedup="bam-bam-dd-sambamba"
+segment_dedup="bam-dedup-sambamba"
 bam_dd=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_dd" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_dedup}.sh $proj_dir $sample $threads $bam_bwa"
@@ -88,7 +88,7 @@ if [ -z "$bam_dd" ] ; then
 fi
 
 # realign and recalibrate
-segment_gatk="bam-bam-ra-rc-gatk"
+segment_gatk="bam-ra-rc-gatk"
 bam_gatk=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_gatk}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_gatk" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_gatk}.sh $proj_dir $sample $threads $bam_dd"

diff --git a/routes/wgbs.sh b/routes/wgbs.sh
@@ -48,7 +48,7 @@ rm -f ${qsub_dir}/sns.*.po*
 # segments
 
 # rename and/or merge raw input FASTQs
-segment_fastq_clean="fastq-fastq-clean"
+segment_fastq_clean="fastq-clean"
 fastq_R1=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 2)
 fastq_R2=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_clean}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1" ] ; then
@@ -59,7 +59,7 @@ if [ -z "$fastq_R1" ] ; then
 fi
 
 # trim FASTQs with Trimmomatic
-segment_fastq_trim="fastq-fastq-trim-trimmomatic"
+segment_fastq_trim="fastq-trim-trimmomatic"
 fastq_R1_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 2)
 fastq_R2_trimmed=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_fastq_trim}.csv" | cut -d ',' -f 3)
 if [ -z "$fastq_R1_trimmed" ] ; then
@@ -70,7 +70,7 @@ if [ -z "$fastq_R1_trimmed" ] ; then
 fi
 
 # run Bismark alignment
-segment_align="fastq-bam-bismark"
+segment_align="align-bismark"
 bam_bismark=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_align}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_bismark" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_align}.sh $proj_dir $sample $threads $fastq_R1_trimmed $fastq_R2_trimmed"
@@ -79,7 +79,7 @@ if [ -z "$bam_bismark" ] ; then
 fi
 
 # run Bismark dedup
-segment_dedup="bam-bam-dd-bismark"
+segment_dedup="bam-dedup-bismark"
 bam_dd_bismark=$(grep -s -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2)
 if [ -z "$bam_dd_bismark" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_dedup}.sh $proj_dir $sample $bam_bismark pe"
@@ -90,7 +90,7 @@ fi
 bam_bismark="$bam_dd_bismark"
 
 # run Bismark methylation extractor
-segment_meth="bam-meth-bismark"
+segment_meth="meth-bismark"
 if [ -n "$fastq_R2" ] ; then
 	bash_cmd="bash ${code_dir}/segments/${segment_meth}.sh $proj_dir $sample $threads $bam_bismark pe"
 	($bash_cmd)

diff --git a/scripts/join-many.sh b/scripts/join-many.sh
@@ -13,7 +13,7 @@ script_name=$(basename "${BASH_SOURCE[0]}")
 # check for correct number of arguments
 if [ $# -lt 3 ] ; then
 	echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2
-	echo -e "\n USAGE: $script_name field_separator missing_field_char in1.txt in2.txt in3.txt ... > merged.txt \n" >&2
+	echo -e "\n USAGE: $script_name field_separator missing_field_char in1.txt [in2.txt in3.txt ...] > merged.txt \n" >&2
 	exit 1
 fi
 

diff --git a/segments/fastq-bam-bismark.sh → segments/align-bismark.sh b/segments/fastq-bam-bismark.sh → segments/align-bismark.sh