From 52f9117814b24e907d6cc2dbf3057aaee0ade465 Mon Sep 17 00:00:00 2001 From: igor Date: Thu, 8 Sep 2016 15:09:04 -0400 Subject: [PATCH] minor adjustments --- .gitignore | 1 + routes/atac.sh | 21 +++- routes/comp-rna-star-dge.sh | 2 +- routes/rna-star.sh | 2 +- routes/wes.sh | 2 +- scripts/fragment-sizes.R | 80 ++++++++++++ segments/align-bowtie2-atac.sh | 9 +- segments/bam-dedup-sambamba.sh | 5 +- segments/bigwig-deeptools.sh | 50 ++++---- segments/peaks-macs-atac.sh | 181 ++++++++++++++++++++++++++++ segments/qc-fragment-sizes.sh | 142 ++++++++++++++++++++++ segments/qc-picard-rnaseqmetrics.sh | 33 ++--- 12 files changed, 467 insertions(+), 61 deletions(-) create mode 100755 scripts/fragment-sizes.R create mode 100755 segments/peaks-macs-atac.sh create mode 100755 segments/qc-fragment-sizes.sh diff --git a/.gitignore b/.gitignore index cc82a48..1f6548f 100755 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Created by https://www.gitignore.io/ + ### OSX ### *.DS_Store .AppleDouble diff --git a/routes/atac.sh b/routes/atac.sh index 5af2d5c..382985a 100755 --- a/routes/atac.sh +++ b/routes/atac.sh @@ -76,6 +76,24 @@ if [ -z "$bam_dd" ] ; then bam_dd=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2) fi +# generate BigWig (deeptools) +segment_bigwig_deeptools="bigwig-deeptools" +bash_cmd="bash ${code_dir}/segments/${segment_bigwig_deeptools}.sh $proj_dir $sample 4 $bam_dd" +qsub_cmd="qsub -N sns.${segment_bigwig_deeptools}.${sample} -M ${USER}@nyumc.org -m a -j y -cwd -pe threaded 4 -b y ${bash_cmd}" +$qsub_cmd + +# fragment size distribution +segment_qc_frag_size="qc-fragment-sizes" +bash_cmd="bash ${code_dir}/segments/${segment_qc_frag_size}.sh $proj_dir $sample $bam_dd" +($bash_cmd) + +# call peaks +segment_peaks="peaks-macs-atac" +bash_cmd="bash ${code_dir}/segments/${segment_peaks}.sh $proj_dir $sample $bam_dd 0.05" +($bash_cmd) +bash_cmd="bash ${code_dir}/segments/${segment_peaks}.sh $proj_dir $sample $bam_dd 0.20" +($bash_cmd) + ######################### @@ -84,13 +102,14 @@ fi sleep 30 -summary_csv="${proj_dir}/summary-combined.wes.csv" +summary_csv="${proj_dir}/summary-combined.${route_name}.csv" bash_cmd=" bash ${code_dir}/scripts/join-many.sh , X \ ${proj_dir}/summary.${segment_fastq_clean}.csv \ ${proj_dir}/summary.${segment_align}.csv \ ${proj_dir}/summary.${segment_dedup}.csv \ +${proj_dir}/summary.${segment_peaks}-q-0.05.csv \ > $summary_csv " (eval $bash_cmd) diff --git a/routes/comp-rna-star-dge.sh b/routes/comp-rna-star-dge.sh index f0e83a9..00cf0fb 100755 --- a/routes/comp-rna-star-dge.sh +++ b/routes/comp-rna-star-dge.sh @@ -160,7 +160,7 @@ echo -e "\n ========== start analysis ========== \n" cd "$dge_dir" || exit 1 -# test R installation and setting +# launch the analysis R script bash_cmd="Rscript --vanilla ${code_dir}/scripts/dge-deseq2.R $input_counts_table $input_groups_table" echo "CMD: $bash_cmd" ($bash_cmd) diff --git a/routes/rna-star.sh b/routes/rna-star.sh index 85579ba..e4239b5 100755 --- a/routes/rna-star.sh +++ b/routes/rna-star.sh @@ -117,7 +117,7 @@ fi sleep 30 -summary_csv="${proj_dir}/summary-combined.rna-star.csv" +summary_csv="${proj_dir}/summary-combined.${route_name}.csv" bash_cmd=" bash ${code_dir}/scripts/join-many.sh , X \ diff --git a/routes/wes.sh b/routes/wes.sh index 48e4c3a..07f5cea 100755 --- a/routes/wes.sh +++ b/routes/wes.sh @@ -106,7 +106,7 @@ fi sleep 30 -summary_csv="${proj_dir}/summary-combined.wes.csv" +summary_csv="${proj_dir}/summary-combined.${route_name}.csv" bash_cmd=" bash ${code_dir}/scripts/join-many.sh , X \ diff --git a/scripts/fragment-sizes.R b/scripts/fragment-sizes.R new file mode 100755 index 0000000..8b786f6 --- /dev/null +++ b/scripts/fragment-sizes.R @@ -0,0 +1,80 @@ +#!/usr/bin/env Rscript + + +## +## Extract and plot fragment size distribution from a BAM file. +## +## usage: Rscript --vanilla fragment-sizes.R sample_name file.bam +## + + +# increase output width +options(width = 150) + +# relevent arguments +args = commandArgs(trailingOnly = TRUE) +sample_name = args[1] +bam_file = args[2] + +# check that input files exist +if (!file.exists(bam_file)) stop("file does not exist: ", bam_file) + +# load relevant packages +library(Rsamtools, quietly = TRUE) +library(readr, quietly = TRUE) +library(reshape2, quietly = TRUE) +library(ggplot2, quietly = TRUE) +library(cowplot, quietly = TRUE) + +# import fragment size for only one mate of paired reads (other mate will be opposite) +scan_flag = scanBamFlag(isPaired = TRUE, isFirstMateRead = TRUE, isSecondMateRead = FALSE) +scan_params = ScanBamParam(what = c("isize"), flag = scan_flag) +scanned_sizes = scanBam(file = bam_file, param = scan_params) + +# convert to vector +sizes = unlist(scanned_sizes[[1]]["isize"], use.names = FALSE) + +# make all sizes positive (half should be negative) +sizes = abs(sizes) + +# display stats +message("min: ", min(sizes)) +message("max: ", max(sizes)) +message("mean: ", round(mean(sizes), digits = 1)) +message("median: ", median(sizes)) + +# stats table +stats_table = data.frame(SAMPLE = sample_name, + MIN = min(sizes), + MAX = max(sizes), + MEAN = round(mean(sizes), digits = 1), + MEDIAN = median(sizes), + SD = round(sd(sizes), digits = 1)) +stats_table_file = paste0(sample_name, ".stats.csv") +write_csv(x = stats_table, path = stats_table_file) + +# calculate frequency table +freq_table = as.data.frame(table(sizes), stringsAsFactors = FALSE) +colnames(freq_table) = c("#SIZE", sample_name) + +# export frequency table +freq_table_file = paste0(sample_name, ".freq.csv") +write_csv(x = freq_table, path = freq_table_file) + +# filter out fragments that will not be plotted (with bin width padding) +sizes = sizes[sizes < 1210] + +# plot +plot_file = paste0(sample_name, ".png") +pdf(file = NULL) +ggplot(melt(sizes, value.name = "size"), aes(size)) + +geom_freqpoly(binwidth = 10, size = 1.5) + +scale_x_continuous(limits = c(min(sizes), 1200), breaks = 1:12*100) + +background_grid(major = "x", minor = "none") + +ggtitle(sample_name) + +ggsave(plot_file, width = 8, height = 5, units = "in") +dev.off() + + + +# end diff --git a/segments/align-bowtie2-atac.sh b/segments/align-bowtie2-atac.sh index 6b22bfc..7fa8be3 100755 --- a/segments/align-bowtie2-atac.sh +++ b/segments/align-bowtie2-atac.sh @@ -71,6 +71,7 @@ bam="${bam_dir}/${sample}.bam" logs_dir="${proj_dir}/logs-${segment_name}" mkdir -p "$logs_dir" unfiltered_sam="${logs_dir}/${sample}.unfiltered.sam" +bowtie2_txt="${logs_dir}/${sample}.bowtie2.txt" flagstat_txt="${logs_dir}/${sample}.flagstat.txt" @@ -121,8 +122,8 @@ bowtie2 \ --threads $threads \ -x $ref_bowtie2 \ -1 $fastq_R1 -2 $fastq_R2 \ -> \ -$unfiltered_sam +-S $unfiltered_sam \ +2> $bowtie2_txt \ " echo "CMD: $bash_cmd" eval "$bash_cmd" @@ -167,7 +168,7 @@ cat $unfiltered_sam \ $sambamba_bin view \ --sam-input \ --nthreads $threads \ ---filter \"mapping_quality >= 30 and ref_name == 'chrM'\" \ +--filter \"mapping_quality >= 30 and ref_name != 'chrM'\" \ --format bam \ --compression-level 0 \ /dev/stdin \ @@ -225,7 +226,7 @@ reads_filtered_pct=$(echo "(${reads_filtered}/${reads_input})*100" | bc -l | cut reads_filtered_pct="${reads_filtered_pct}%" # header for summary file -echo "#SAMPLE,TOTAL READS,MAPPED %,CHR M %,MAPPED MQ30 %" > "$summary_csv" +echo "#SAMPLE,TOTAL READS,MAPPED READS %,CHR M READS %,MAPPED READS MQ30 %" > "$summary_csv" # summarize log file echo "${sample},${reads_input},${reads_mapped_pct},${reads_chrM_pct},${reads_filtered_pct}" >> "$summary_csv" diff --git a/segments/bam-dedup-sambamba.sh b/segments/bam-dedup-sambamba.sh index 9610a33..6e13ae7 100755 --- a/segments/bam-dedup-sambamba.sh +++ b/segments/bam-dedup-sambamba.sh @@ -86,8 +86,9 @@ echo " * BAM OUT: $bam_dd " bash_cmd=" $sambamba_bin markdup \ --remove-duplicates \ ---nthreads=${threads} \ ---overflow-list-size=500000 \ +--nthreads $threads \ +--hash-table-size 1500000 \ +--overflow-list-size 1500000 \ $bam \ $bam_dd \ 2> $bam_dd_log diff --git a/segments/bigwig-deeptools.sh b/segments/bigwig-deeptools.sh index 947dea9..a17bae4 100755 --- a/segments/bigwig-deeptools.sh +++ b/segments/bigwig-deeptools.sh @@ -12,15 +12,15 @@ echo -e "\n ========== SEGMENT: $segment_name ========== \n" >&2 # check for correct number of arguments if [ ! $# == 4 ] ; then echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2 - echo -e "\n USAGE: $script_name [project dir] [sample] [threads] [BAM] \n" >&2 + echo -e "\n USAGE: $script_name project_dir sample_name num_threads BAM \n" >&2 exit 1 fi # arguments -PROJ_DIR=$1 -SAMPLE=$2 -THREADS=$3 -BAM=$4 +proj_dir=$1 +sample=$2 +threads=$3 +bam=$4 ######################### @@ -28,13 +28,13 @@ BAM=$4 # check that inputs exist -if [ ! -d "$PROJ_DIR" ] || [ ! "$PROJ_DIR" ] ; then - echo -e "\n $script_name ERROR: DIR $PROJ_DIR DOES NOT EXIST \n" >&2 +if [ ! -d "$proj_dir" ] ; then + echo -e "\n $script_name ERROR: DIR $proj_dir DOES NOT EXIST \n" >&2 exit 1 fi -if [ ! -s "$BAM" ] || [ ! "$BAM" ] ; then - echo -e "\n $script_name ERROR: BAM $BAM DOES NOT EXIST \n" >&2 +if [ ! -s "$bam" ] ; then + echo -e "\n $script_name ERROR: BAM $bam DOES NOT EXIST \n" >&2 exit 1 fi @@ -42,11 +42,11 @@ fi ######################### -# output +# settings and files -BIGWIG_DIR="${PROJ_DIR}/BIGWIG" -mkdir -p "$BIGWIG_DIR" -BIGWIG="${BIGWIG_DIR}/${SAMPLE}.bin1.rpkm.bw" +bigwig_dir="${proj_dir}/BIGWIG" +mkdir -p "$bigwig_dir" +bigwig="${bigwig_dir}/${sample}.bin1.rpkm.bw" # BIGWIG_RPKM_BIN1=${BIGWIG_RPKM_DIR}/${ID}.bin1.rpkm.bw # BIGWIG_RPKM_BIN10=${BIGWIG_RPKM_DIR}/${ID}.bin10.smooth50.rpkm.bw @@ -57,8 +57,8 @@ BIGWIG="${BIGWIG_DIR}/${SAMPLE}.bin1.rpkm.bw" # exit if output exits already -if [ -s "$BIGWIG" ] ; then - echo -e "\n $script_name SKIP SAMPLE $SAMPLE \n" >&2 +if [ -s "$bigwig" ] ; then + echo -e "\n $script_name SKIP SAMPLE $sample \n" >&2 exit 1 fi @@ -74,21 +74,21 @@ module load deeptools/2.2.4 module load kentutils/329 echo " * bamCoverage: $(readlink -f $(which bamCoverage)) " -echo " * BAM: $BAM " -echo " * BIGWIG: $BIGWIG " +echo " * BAM: $bam " +echo " * BIGWIG: $bigwig " -CMD=" +bash_cmd=" bamCoverage \ --verbose \ ---numberOfProcessors $THREADS \ +--numberOfProcessors $threads \ --binSize 1 \ --normalizeUsingRPKM \ --outFileFormat bigwig \ ---bam $BAM \ ---outFileName $BIGWIG +--bam $bam \ +--outFileName $bigwig " -echo "CMD: $CMD" -eval "$CMD" +echo "CMD: $bash_cmd" +eval "$bash_cmd" # echo " * BIGWIG: $BIGWIG_RPKM_BIN10 " @@ -113,8 +113,8 @@ eval "$CMD" # check that output generated -if [ ! -s "$BIGWIG" ] ; then - echo -e "\n $script_name ERROR: BIGWIG $BIGWIG NOT GENERATED \n" >&2 +if [ ! -s "$bigwig" ] ; then + echo -e "\n $script_name ERROR: BIGWIG $bigwig NOT GENERATED \n" >&2 exit 1 fi diff --git a/segments/peaks-macs-atac.sh b/segments/peaks-macs-atac.sh new file mode 100755 index 0000000..956dd12 --- /dev/null +++ b/segments/peaks-macs-atac.sh @@ -0,0 +1,181 @@ +#!/bin/bash + + +# MACS peak calling for ATAC-seq + + +# script filename +script_name=$(basename "${BASH_SOURCE[0]}") +segment_name=${script_name/%.sh/} +echo -e "\n ========== SEGMENT: $segment_name ========== \n" >&2 + +# check for correct number of arguments +if [ ! $# == 4 ] ; then + echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2 + echo -e "\n USAGE: $script_name project_dir sample_name BAM q_value \n" >&2 + exit 1 +fi + +# arguments +proj_dir=$1 +sample=$2 +bam=$3 +qvalue=$4 + + +######################### + + +# check that inputs exist + +if [ ! -d "$proj_dir" ] ; then + echo -e "\n $script_name ERROR: PROJ DIR $proj_dir DOES NOT EXIST \n" >&2 + exit 1 +fi + +if [ ! -s "$bam" ] ; then + echo -e "\n $script_name ERROR: BAM $bam DOES NOT EXIST \n" >&2 + exit 1 +fi + +code_dir=$(dirname "$(dirname "${BASH_SOURCE[0]}")") + +genome_dir=$(bash ${code_dir}/scripts/get-set-setting.sh "${proj_dir}/settings.txt" GENOME-DIR); + +if [ ! -d "$genome_dir" ] ; then + echo -e "\n $script_name ERROR: GENOME DIR $genome_dir DOES NOT EXIST \n" >&2 + exit 1 +fi + + +######################### + + +# MACS-stlye genome (mm,hs,dm,ce) + +# keep first two characters of build name +genome_build=$(basename "$genome_dir") +macs_genome="${genome_build:0:2}" + +# fix if hgXX +if [ "$macs_genome" == "hg" ] ; then + macs_genome="hs" +fi + + +######################### + + +# settings and files + +segment_name="${segment_name}-q-${qvalue}" + +summary_dir="${proj_dir}/summary" +mkdir -p "$summary_dir" +summary_csv="${summary_dir}/${sample}.${segment_name}.csv" + +macs_dir="${proj_dir}/peaks-macs-atac-q-${qvalue}" +mkdir -p "$macs_dir" +peaks_xls="${macs_dir}/${sample}_peaks.xls" +peaks_narrow="${macs_dir}/${sample}_peaks.narrowPeak" +peaks_bed="${macs_dir}/${sample}.bed" + + +######################### + + +# exit if output exits already + +if [ -s "$peaks_xls" ] ; then + echo -e "\n $script_name SKIP SAMPLE $sample \n" >&2 + exit 1 +fi + + +######################### + + +# MACS + +# MACS is part of python/2.7.3 module +module unload python +module load python/2.7.3 + +echo " * MACS: $(readlink -f $(which macs2)) " +echo " * MACS version: $(macs2 --version 2>&1) " +echo " * R: $(readlink -f $(which R)) " +echo " * R version: $(R --version | head -1) " +echo " * Rscript: $(readlink -f $(which Rscript)) " +echo " * Rscript version: $(Rscript --version 2>&1) " +echo " * BAM: $bam " +echo " * MACS genome: $macs_genome " +echo " * MACS dir: $macs_dir " + +cd "$macs_dir" || exit 1 + +bash_cmd=" +macs2 callpeak \ +--verbose 2 \ +--nomodel --shift -100 --extsize 200 \ +--keep-dup all \ +--qvalue $qvalue \ +--gsize $macs_genome \ +--name $sample \ +--treatment $bam \ +--outdir $macs_dir +" +echo "CMD: $bash_cmd" +$bash_cmd + + +######################### + + +# check that output generated + +if [ ! -s "$peaks_xls" ] ; then + echo -e "\n $script_name ERROR: XLS $peaks_xls NOT GENERATED \n" >&2 + exit 1 +fi + +if [ ! -s "$peaks_narrow" ] ; then + echo -e "\n $script_name ERROR: PEAKS $peaks_narrow NOT GENERATED \n" >&2 + exit 1 +fi + + +######################### + + +# generate bed file + +bash_cmd="cut -f 1,2,3,4,7 $peaks_narrow > $peaks_bed" +echo "CMD: $bash_cmd" +eval "$bash_cmd" + + +######################### + + +# generate summary + +peaks_num=$(cat "$peaks_narrow" | wc -l) +echo "total peaks: $peaks_num" + +# header for summary file +echo "#SAMPLE,PEAKS" > "$summary_csv" + +# summarize log file +echo "${sample},${peaks_num}" >> "$summary_csv" + +sleep 30 + +# combine all sample summaries +cat ${summary_dir}/*.${segment_name}.csv | LC_ALL=C sort -t ',' -k1,1 | uniq > "${proj_dir}/summary.${segment_name}.csv" + + +######################### + + + +# end diff --git a/segments/qc-fragment-sizes.sh b/segments/qc-fragment-sizes.sh new file mode 100755 index 0000000..25d395d --- /dev/null +++ b/segments/qc-fragment-sizes.sh @@ -0,0 +1,142 @@ +#!/bin/bash + + +# get fragment size distribution + + +# script filename +script_name=$(basename "${BASH_SOURCE[0]}") +segment_name=${script_name/%.sh/} +echo -e "\n ========== SEGMENT: $segment_name ========== \n" >&2 + +# check for correct number of arguments +if [ ! $# == 3 ] ; then + echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2 + echo -e "\n USAGE: $script_name project_dir sample_name BAM \n" >&2 + exit 1 +fi + +# arguments +proj_dir=$1 +sample=$2 +bam=$3 + + +######################### + + +# check that inputs exist + +if [ ! -d "$proj_dir" ] ; then + echo -e "\n $script_name ERROR: PROJ DIR $proj_dir DOES NOT EXIST \n" >&2 + exit 1 +fi + +if [ ! -s "$bam" ] ; then + echo -e "\n $script_name ERROR: BAM $bam DOES NOT EXIST \n" >&2 + exit 1 +fi + +code_dir=$(dirname "$(dirname "${BASH_SOURCE[0]}")") + + +######################### + + +# settings and files + +summary_dir="${proj_dir}/summary" +mkdir -p "$summary_dir" +summary_csv="${summary_dir}/${sample}.${segment_name}.csv" + +frag_sizes_dir="${proj_dir}/QC-fragment-sizes" +mkdir -p "$frag_sizes_dir" +frag_sizes_png="${frag_sizes_dir}/${sample}.png" +frag_sizes_freq_csv="${frag_sizes_dir}/${sample}.freq.csv" +frag_sizes_stats_csv="${frag_sizes_dir}/${sample}.stats.csv" + + +######################### + + +# exit if output exits already + +if [ -s "$frag_sizes_freq_csv" ] ; then + echo -e "\n $script_name SKIP SAMPLE $sample \n" >&2 + exit 1 +fi + + +######################### + + +# plot distribution + +module load r/3.3.0 + +echo " * R: $(readlink -f $(which R)) " +echo " * R version: $(R --version | head -1) " +echo " * Rscript: $(readlink -f $(which Rscript)) " +echo " * Rscript version: $(Rscript --version 2>&1) " + +# navigate to frag sizes dir +cd "$frag_sizes_dir" || exit 1 + +# run fragment sizes script +bash_cmd="Rscript --vanilla ${code_dir}/scripts/fragment-sizes.R $sample $bam" +echo "CMD: $bash_cmd" +($bash_cmd) + + +######################### + + +# check that output generated + +if [ ! -s "$frag_sizes_png" ] ; then + echo -e "\n $script_name ERROR: FILE $frag_sizes_png NOT GENERATED \n" >&2 + exit 1 +fi + +if [ ! -s "$frag_sizes_freq_csv" ] ; then + echo -e "\n $script_name ERROR: FILE $frag_sizes_freq_csv NOT GENERATED \n" >&2 + exit 1 +fi + + +######################### + + +# summary + +# combine charts into a single png + +combined_png_3w=${proj_dir}/summary.${segment_name}.3w.png +combined_png_4w=${proj_dir}/summary.${segment_name}.4w.png + +rm -f "$combined_png_3w" +rm -f "$combined_png_4w" + +# -geometry +20+20 = 20px x and y padding +# -tile 4x = 4 images wide +montage -geometry +20+20 -tile 3x "${frag_sizes_dir}/*.png" "$combined_png_3w" +montage -geometry +20+20 -tile 4x "${frag_sizes_dir}/*.png" "$combined_png_4w" + + +# header for summary file +echo "#SAMPLE,MIN,MAX,MEAN,MEDIAN,SD" > "$summary_csv" + +# summarize log file +grep -v "^SAMPLE" "$frag_sizes_stats_csv" >> "$summary_csv" + +sleep 30 + +# combine all sample summaries +cat ${summary_dir}/*.${segment_name}.csv | LC_ALL=C sort -t ',' -k1,1 | uniq > "${proj_dir}/summary.${segment_name}.csv" + + +######################### + + + +# end diff --git a/segments/qc-picard-rnaseqmetrics.sh b/segments/qc-picard-rnaseqmetrics.sh index 6ffa855..9cfe3b5 100755 --- a/segments/qc-picard-rnaseqmetrics.sh +++ b/segments/qc-picard-rnaseqmetrics.sh @@ -21,10 +21,6 @@ proj_dir=$1 sample=$2 bam=$3 -# strand: -# fwd | transcript | cufflinks "fr-secondstrand" | htseq "yes" | picard "FIRST_READ" -# rev | rev comp of transcript | cufflinks "fr-firststrand" | htseq "reverse" | picard "SECOND_READ" - ######################### @@ -69,18 +65,6 @@ metrics_pdf="${metrics_dir}/${sample}.pdf" summary_dir="${proj_dir}/summary" summary_csv="${summary_dir}/${sample}.${segment_name}.csv" -# # strand flag -# if [ "$STRAND" == "unstr" ] ; then -# STRAND_FLAG="0" -# elif [ "$STRAND" == "fwd" ] ; then -# STRAND_FLAG="1" -# elif [ "$STRAND" == "rev" ] ; then -# STRAND_FLAG="2" -# else -# echo -e "\n $script_name ERROR: incorrect strand selected \n" >&2 -# exit 1 -# fi - ######################### @@ -96,16 +80,15 @@ fi ######################### +# run picard CollectRnaSeqMetrics + module unload java module load java/1.7 module load picard-tools/1.88 - - - -# STRAND_SPECIFICITY {NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND} -# STRAND_SPECIFICITY="NONE" - +# strand: +# fwd | transcript | cufflinks "fr-secondstrand" | htseq "yes" | picard "FIRST_READ" +# rev | rev comp of transcript | cufflinks "fr-firststrand" | htseq "reverse" | picard "SECOND_READ" echo " * CollectRnaSeqMetrics: ${PICARD_ROOT}/CollectRnaSeqMetrics.jar " echo " * BAM: $bam " @@ -114,8 +97,6 @@ echo " * RIBOSOMAL_INTERVALS: $RRNA_INTERVAL_LIST " echo " * OUT TXT: $metrics_txt " echo " * OUT PDF: $metrics_pdf " -# run picard - PICARD_CMD="java -Xms8G -Xmx8G -jar ${PICARD_ROOT}/CollectRnaSeqMetrics.jar \ VERBOSITY=WARNING QUIET=true VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=2500000 \ REF_FLAT=${REFFLAT} \ @@ -189,8 +170,8 @@ paste \ | tr '\t' ',' \ > $summary_csv -rm -f "${metrics_txt}.1READ" -rm -f "${metrics_txt}.2READ" +rm -fv "${metrics_txt}.1READ" +rm -fv "${metrics_txt}.2READ" # combine charts into a single pdf