Skip to content

Commit

Permalink
minor adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
igordot committed Sep 8, 2016
1 parent 325f4d3 commit 52f9117
Show file tree
Hide file tree
Showing 12 changed files with 467 additions and 61 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

# Created by https://www.gitignore.io/


### OSX ###
*.DS_Store
.AppleDouble
Expand Down
21 changes: 20 additions & 1 deletion routes/atac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,24 @@ if [ -z "$bam_dd" ] ; then
bam_dd=$(grep -m 1 "^${sample}," "${proj_dir}/samples.${segment_dedup}.csv" | cut -d ',' -f 2)
fi

# generate BigWig (deeptools)
segment_bigwig_deeptools="bigwig-deeptools"
bash_cmd="bash ${code_dir}/segments/${segment_bigwig_deeptools}.sh $proj_dir $sample 4 $bam_dd"
qsub_cmd="qsub -N sns.${segment_bigwig_deeptools}.${sample} -M ${USER}@nyumc.org -m a -j y -cwd -pe threaded 4 -b y ${bash_cmd}"
$qsub_cmd

# fragment size distribution
segment_qc_frag_size="qc-fragment-sizes"
bash_cmd="bash ${code_dir}/segments/${segment_qc_frag_size}.sh $proj_dir $sample $bam_dd"
($bash_cmd)

# call peaks
segment_peaks="peaks-macs-atac"
bash_cmd="bash ${code_dir}/segments/${segment_peaks}.sh $proj_dir $sample $bam_dd 0.05"
($bash_cmd)
bash_cmd="bash ${code_dir}/segments/${segment_peaks}.sh $proj_dir $sample $bam_dd 0.20"
($bash_cmd)


#########################

Expand All @@ -84,13 +102,14 @@ fi

sleep 30

summary_csv="${proj_dir}/summary-combined.wes.csv"
summary_csv="${proj_dir}/summary-combined.${route_name}.csv"

bash_cmd="
bash ${code_dir}/scripts/join-many.sh , X \
${proj_dir}/summary.${segment_fastq_clean}.csv \
${proj_dir}/summary.${segment_align}.csv \
${proj_dir}/summary.${segment_dedup}.csv \
${proj_dir}/summary.${segment_peaks}-q-0.05.csv \
> $summary_csv
"
(eval $bash_cmd)
Expand Down
2 changes: 1 addition & 1 deletion routes/comp-rna-star-dge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ echo -e "\n ========== start analysis ========== \n"

cd "$dge_dir" || exit 1

# test R installation and setting
# launch the analysis R script
bash_cmd="Rscript --vanilla ${code_dir}/scripts/dge-deseq2.R $input_counts_table $input_groups_table"
echo "CMD: $bash_cmd"
($bash_cmd)
Expand Down
2 changes: 1 addition & 1 deletion routes/rna-star.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ fi

sleep 30

summary_csv="${proj_dir}/summary-combined.rna-star.csv"
summary_csv="${proj_dir}/summary-combined.${route_name}.csv"

bash_cmd="
bash ${code_dir}/scripts/join-many.sh , X \
Expand Down
2 changes: 1 addition & 1 deletion routes/wes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fi

sleep 30

summary_csv="${proj_dir}/summary-combined.wes.csv"
summary_csv="${proj_dir}/summary-combined.${route_name}.csv"

bash_cmd="
bash ${code_dir}/scripts/join-many.sh , X \
Expand Down
80 changes: 80 additions & 0 deletions scripts/fragment-sizes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env Rscript


##
## Extract and plot fragment size distribution from a BAM file.
##
## usage: Rscript --vanilla fragment-sizes.R sample_name file.bam
##


# increase output width
options(width = 150)

# relevent arguments
args = commandArgs(trailingOnly = TRUE)
sample_name = args[1]
bam_file = args[2]

# check that input files exist
if (!file.exists(bam_file)) stop("file does not exist: ", bam_file)

# load relevant packages
library(Rsamtools, quietly = TRUE)
library(readr, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(cowplot, quietly = TRUE)

# import fragment size for only one mate of paired reads (other mate will be opposite)
scan_flag = scanBamFlag(isPaired = TRUE, isFirstMateRead = TRUE, isSecondMateRead = FALSE)
scan_params = ScanBamParam(what = c("isize"), flag = scan_flag)
scanned_sizes = scanBam(file = bam_file, param = scan_params)

# convert to vector
sizes = unlist(scanned_sizes[[1]]["isize"], use.names = FALSE)

# make all sizes positive (half should be negative)
sizes = abs(sizes)

# display stats
message("min: ", min(sizes))
message("max: ", max(sizes))
message("mean: ", round(mean(sizes), digits = 1))
message("median: ", median(sizes))

# stats table
stats_table = data.frame(SAMPLE = sample_name,
MIN = min(sizes),
MAX = max(sizes),
MEAN = round(mean(sizes), digits = 1),
MEDIAN = median(sizes),
SD = round(sd(sizes), digits = 1))
stats_table_file = paste0(sample_name, ".stats.csv")
write_csv(x = stats_table, path = stats_table_file)

# calculate frequency table
freq_table = as.data.frame(table(sizes), stringsAsFactors = FALSE)
colnames(freq_table) = c("#SIZE", sample_name)

# export frequency table
freq_table_file = paste0(sample_name, ".freq.csv")
write_csv(x = freq_table, path = freq_table_file)

# filter out fragments that will not be plotted (with bin width padding)
sizes = sizes[sizes < 1210]

# plot
plot_file = paste0(sample_name, ".png")
pdf(file = NULL)
ggplot(melt(sizes, value.name = "size"), aes(size)) +
geom_freqpoly(binwidth = 10, size = 1.5) +
scale_x_continuous(limits = c(min(sizes), 1200), breaks = 1:12*100) +
background_grid(major = "x", minor = "none") +
ggtitle(sample_name) +
ggsave(plot_file, width = 8, height = 5, units = "in")
dev.off()



# end
9 changes: 5 additions & 4 deletions segments/align-bowtie2-atac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ bam="${bam_dir}/${sample}.bam"
logs_dir="${proj_dir}/logs-${segment_name}"
mkdir -p "$logs_dir"
unfiltered_sam="${logs_dir}/${sample}.unfiltered.sam"
bowtie2_txt="${logs_dir}/${sample}.bowtie2.txt"
flagstat_txt="${logs_dir}/${sample}.flagstat.txt"


Expand Down Expand Up @@ -121,8 +122,8 @@ bowtie2 \
--threads $threads \
-x $ref_bowtie2 \
-1 $fastq_R1 -2 $fastq_R2 \
> \
$unfiltered_sam
-S $unfiltered_sam \
2> $bowtie2_txt \
"
echo "CMD: $bash_cmd"
eval "$bash_cmd"
Expand Down Expand Up @@ -167,7 +168,7 @@ cat $unfiltered_sam \
$sambamba_bin view \
--sam-input \
--nthreads $threads \
--filter \"mapping_quality >= 30 and ref_name == 'chrM'\" \
--filter \"mapping_quality >= 30 and ref_name != 'chrM'\" \
--format bam \
--compression-level 0 \
/dev/stdin \
Expand Down Expand Up @@ -225,7 +226,7 @@ reads_filtered_pct=$(echo "(${reads_filtered}/${reads_input})*100" | bc -l | cut
reads_filtered_pct="${reads_filtered_pct}%"

# header for summary file
echo "#SAMPLE,TOTAL READS,MAPPED %,CHR M %,MAPPED MQ30 %" > "$summary_csv"
echo "#SAMPLE,TOTAL READS,MAPPED READS %,CHR M READS %,MAPPED READS MQ30 %" > "$summary_csv"

# summarize log file
echo "${sample},${reads_input},${reads_mapped_pct},${reads_chrM_pct},${reads_filtered_pct}" >> "$summary_csv"
Expand Down
5 changes: 3 additions & 2 deletions segments/bam-dedup-sambamba.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ echo " * BAM OUT: $bam_dd "
bash_cmd="
$sambamba_bin markdup \
--remove-duplicates \
--nthreads=${threads} \
--overflow-list-size=500000 \
--nthreads $threads \
--hash-table-size 1500000 \
--overflow-list-size 1500000 \
$bam \
$bam_dd \
2> $bam_dd_log
Expand Down
50 changes: 25 additions & 25 deletions segments/bigwig-deeptools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,41 @@ echo -e "\n ========== SEGMENT: $segment_name ========== \n" >&2
# check for correct number of arguments
if [ ! $# == 4 ] ; then
echo -e "\n $script_name ERROR: WRONG NUMBER OF ARGUMENTS SUPPLIED \n" >&2
echo -e "\n USAGE: $script_name [project dir] [sample] [threads] [BAM] \n" >&2
echo -e "\n USAGE: $script_name project_dir sample_name num_threads BAM \n" >&2
exit 1
fi

# arguments
PROJ_DIR=$1
SAMPLE=$2
THREADS=$3
BAM=$4
proj_dir=$1
sample=$2
threads=$3
bam=$4


#########################


# check that inputs exist

if [ ! -d "$PROJ_DIR" ] || [ ! "$PROJ_DIR" ] ; then
echo -e "\n $script_name ERROR: DIR $PROJ_DIR DOES NOT EXIST \n" >&2
if [ ! -d "$proj_dir" ] ; then
echo -e "\n $script_name ERROR: DIR $proj_dir DOES NOT EXIST \n" >&2
exit 1
fi

if [ ! -s "$BAM" ] || [ ! "$BAM" ] ; then
echo -e "\n $script_name ERROR: BAM $BAM DOES NOT EXIST \n" >&2
if [ ! -s "$bam" ] ; then
echo -e "\n $script_name ERROR: BAM $bam DOES NOT EXIST \n" >&2
exit 1
fi


#########################


# output
# settings and files

BIGWIG_DIR="${PROJ_DIR}/BIGWIG"
mkdir -p "$BIGWIG_DIR"
BIGWIG="${BIGWIG_DIR}/${SAMPLE}.bin1.rpkm.bw"
bigwig_dir="${proj_dir}/BIGWIG"
mkdir -p "$bigwig_dir"
bigwig="${bigwig_dir}/${sample}.bin1.rpkm.bw"

# BIGWIG_RPKM_BIN1=${BIGWIG_RPKM_DIR}/${ID}.bin1.rpkm.bw
# BIGWIG_RPKM_BIN10=${BIGWIG_RPKM_DIR}/${ID}.bin10.smooth50.rpkm.bw
Expand All @@ -57,8 +57,8 @@ BIGWIG="${BIGWIG_DIR}/${SAMPLE}.bin1.rpkm.bw"

# exit if output exits already

if [ -s "$BIGWIG" ] ; then
echo -e "\n $script_name SKIP SAMPLE $SAMPLE \n" >&2
if [ -s "$bigwig" ] ; then
echo -e "\n $script_name SKIP SAMPLE $sample \n" >&2
exit 1
fi

Expand All @@ -74,21 +74,21 @@ module load deeptools/2.2.4
module load kentutils/329

echo " * bamCoverage: $(readlink -f $(which bamCoverage)) "
echo " * BAM: $BAM "
echo " * BIGWIG: $BIGWIG "
echo " * BAM: $bam "
echo " * BIGWIG: $bigwig "

CMD="
bash_cmd="
bamCoverage \
--verbose \
--numberOfProcessors $THREADS \
--numberOfProcessors $threads \
--binSize 1 \
--normalizeUsingRPKM \
--outFileFormat bigwig \
--bam $BAM \
--outFileName $BIGWIG
--bam $bam \
--outFileName $bigwig
"
echo "CMD: $CMD"
eval "$CMD"
echo "CMD: $bash_cmd"
eval "$bash_cmd"

# echo " * BIGWIG: $BIGWIG_RPKM_BIN10 "

Expand All @@ -113,8 +113,8 @@ eval "$CMD"

# check that output generated

if [ ! -s "$BIGWIG" ] ; then
echo -e "\n $script_name ERROR: BIGWIG $BIGWIG NOT GENERATED \n" >&2
if [ ! -s "$bigwig" ] ; then
echo -e "\n $script_name ERROR: BIGWIG $bigwig NOT GENERATED \n" >&2
exit 1
fi

Expand Down
Loading

0 comments on commit 52f9117

Please sign in to comment.