Skip to content

Commit

Permalink
routine sync
Browse files Browse the repository at this point in the history
  • Loading branch information
igordot committed Mar 14, 2017
1 parent c24d998 commit bf7a22d
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 34 deletions.
4 changes: 3 additions & 1 deletion run
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ sub run_samples {
$email = `echo $email`;
chomp($email);
my $qsub_name = "sns.${route}.${sample}";
my $qsub_cmd = "qsub -N $qsub_name -M $email -m a -j y -cwd -pe threaded $threads -b y $cmd";
my $qsub_thread = "-pe threaded ${threads}";
my $qsub_mem = "-hard -l mem_free=64G";
my $qsub_cmd = "qsub -N $qsub_name -M $email -m a -j y -cwd $qsub_thread $qsub_mem -b y $cmd";

# paired command is single-threaded (at least for now)
if ($route =~ "-pairs-") {
Expand Down
26 changes: 14 additions & 12 deletions scripts/fragment-sizes.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
# increase output width
options(width = 150)

# get scripts directory (directory of this file) and load relevant functions
args_all = commandArgs(trailingOnly = FALSE)
scripts_dir = normalizePath(dirname(sub("^--file=", "", args_all[grep("^--file=", args_all)])))
source(paste0(scripts_dir, "/load-install-packages.R"))

# relevent arguments
args = commandArgs(trailingOnly = TRUE)
sample_name = args[1]
Expand All @@ -20,11 +25,11 @@ bam_file = args[2]
if (!file.exists(bam_file)) stop("file does not exist: ", bam_file)

# load relevant packages
library(Rsamtools, quietly = TRUE)
library(readr, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(cowplot, quietly = TRUE)
load_install_packages("Rsamtools")
load_install_packages("readr")
load_install_packages("reshape2")
load_install_packages("ggplot2")
load_install_packages("cowplot")

# import fragment size for only one mate of paired reads (other mate will be opposite)
scan_flag = scanBamFlag(isPaired = TRUE, isFirstMateRead = TRUE, isSecondMateRead = FALSE)
Expand All @@ -43,10 +48,9 @@ sizes = sizes[sizes > 0]
sizes = sizes[sizes < 5000]

# display stats
message("min: ", min(sizes))
message("max: ", max(sizes))
message("mean: ", round(mean(sizes), digits = 1))
message("median: ", median(sizes))
message("sd: ", round(sd(sizes), digits = 1))

# stats table
stats_table = data.frame(SAMPLE = sample_name,
Expand All @@ -69,14 +73,12 @@ sizes = sizes[sizes < 1010]

# plot
plot_file = paste0(sample_name, ".png")
pdf(file = NULL)
ggplot(melt(sizes, value.name = "size"), aes(size)) +
freq_plot = ggplot(melt(sizes, value.name = "size"), aes(size)) +
geom_freqpoly(binwidth = 10, size = 1.5) +
scale_x_continuous(limits = c(min(sizes), 1000), breaks = 1:10*100) +
background_grid(major = "x", minor = "none") +
ggtitle(sample_name) +
ggsave(plot_file, width = 8, height = 5, units = "in")
dev.off()
ggtitle(sample_name)
ggsave(filename = plot_file, plot = freq_plot, width = 8, height = 5, units = "in")



Expand Down
25 changes: 15 additions & 10 deletions scripts/load-install-packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,31 @@ load_install_packages = function(package_list) {

# try to install from CRAN if package is not already installed
# use require() instead of installed.packages() to check that package is both installed and usable
if (!require(p, character.only = TRUE, quietly = TRUE)) {
message("try installing package from CRAN: ", p)
# install to local user library path (destination needs to be specified the first time)
install.packages(p, lib = Sys.getenv("R_LIBS_USER"), repos = "https://cran.rstudio.com/", quiet = TRUE)
}

# biocLite can install CRAN as well
# if (!require(p, character.only = TRUE, quietly = TRUE)) {
# message("try installing package from CRAN: ", p)
# # install to local user library path (destination needs to be specified the first time)
# install.packages(p, lib = Sys.getenv("R_LIBS_USER"), repos = "https://cran.rstudio.com/", quiet = TRUE)
# }

# try to install from Bioconductor if package is not already installed
# biocLite() installs Bioconductor and CRAN packages
if (!require(p, character.only = TRUE, quietly = TRUE)) {
message("package not available on CRAN: ", p)
message("try installing package from Bioconductor: ", p)
# message("package not available on CRAN: ", p)
# message("try installing package from Bioconductor: ", p)
message("try installing package: ", p)
source("https://bioconductor.org/biocLite.R")
biocLite(p, suppressUpdates = TRUE)
biocLite(p, suppressUpdates = TRUE, lib = Sys.getenv("R_LIBS_USER"))
}

# package still not available (after CRAN and Bioconductor)
if (!require(p, character.only = TRUE, quietly = TRUE)) {
message("package not available on Bioconductor: ", p)
# message("package not available on Bioconductor: ", p)
message("package not available: ", p)
}

# force load package
# load package
library(p, character.only = TRUE, quietly = TRUE)

}
Expand Down
7 changes: 4 additions & 3 deletions scripts/test-packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


##
## Try loading a few packages to test R settings.
## Try loading (installing if not available) a few R packages.
##
## usage: Rscript --vanilla test-packages.R
##
Expand All @@ -13,8 +13,9 @@ args_all = commandArgs(trailingOnly = FALSE)
scripts_dir = normalizePath(dirname(sub("^--file=", "", args_all[grep("^--file=", args_all)])))
source(paste0(scripts_dir, "/load-install-packages.R"))

# load some packages
test_packages = c("bitops", "mnormt", "gplots", "xtable", "ggplot2", "dplyr", "gsalib", "limma")
# load some packages (some will probably need to be installed)
# used by GATK: ggplot2, gplots, reshape, gsalib
test_packages = c("bitops", "mnormt", "reshape", "gplots", "ggplot2", "gsalib", "dplyr", "limma")
load_install_packages(test_packages)


Expand Down
26 changes: 20 additions & 6 deletions segments/align-bwa-mem.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ samples_csv="${proj_dir}/samples.${segment_name}.csv"
bwa_bam_dir="${proj_dir}/BAM-BWA"
mkdir -p "$bwa_bam_dir"
bam="${bwa_bam_dir}/${sample}.bam"
bai="${bam}.bai"

bwa_logs_dir="${proj_dir}/logs-${segment_name}"
mkdir -p "$bwa_logs_dir"
Expand Down Expand Up @@ -132,13 +133,10 @@ sleep 30
#########################


# check that output generated
# check that bwa/sambamba output generated

# check that bam file size above 10 kb
du -s $bam
bam_size=$(du -s --apparent-size $bam | cut -f 1)
if [ $bam_size -lt 10 ] ; then
echo -e "\n $script_name ERROR: BAM $bam TOO SMALL \n" >&2
if [ ! -s "$bam" ] ; then
echo -e "\n $script_name ERROR: BAM $bam NOT GENERATED \n" >&2
exit 1
fi

Expand All @@ -152,6 +150,22 @@ bash_cmd="$sambamba_bin flagstat $bam > $bwa_flagstat"
echo "CMD: $bash_cmd"
eval "$bash_cmd"

sleep 30


#########################


# check that flagstat output generated

if [ ! -s "$bwa_flagstat" ] ; then
echo -e "\n $script_name ERROR: FLAGSTAT $bwa_flagstat NOT GENERATED \n" >&2
# delete BAM and BAI since something went wrong and they might be corrupted
rm -fv "$bam"
rm -fv "$bai"
exit 1
fi


#########################

Expand Down
2 changes: 1 addition & 1 deletion segments/annot-annovar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ genome_build=$(basename "$genome_dir")

if [[ "$genome_build" == "hg19" ]] ; then
annovar_buildver="hg19"
annovar_protocol="refGene,snp138,snp138NonFlagged,exac03,esp6500siv2_all,1000g2015aug_all,cosmic70,cadd13gt10,fathmm"
annovar_protocol="refGene,snp138,snp138NonFlagged,exac03,esp6500siv2_all,1000g2015aug_all,cosmic80,cadd13gt10,fathmm"
annovar_operation="g,f,f,f,f,f,f,f,f"
annovar_argument="'--splicing_threshold 10',,,,,,,,"
annovar_multianno="${annovar_out_prefix}.hg19_multianno.txt"
Expand Down
26 changes: 25 additions & 1 deletion segments/bam-dedup-sambamba.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ samples_csv="${proj_dir}/samples.${segment_name}.csv"
bam_dd_dir="${proj_dir}/BAM-DD"
mkdir -p "$bam_dd_dir"
bam_dd="${bam_dd_dir}/${sample}.dd.bam"
bai_dd="${bam_dd}.bai"

dedup_logs_dir="${proj_dir}/logs-${segment_name}"
mkdir -p "$dedup_logs_dir"
Expand Down Expand Up @@ -102,13 +103,20 @@ sleep 30
#########################


# check that output generated
# check that sambamba markdup output generated

if [ ! -s "$bam_dd" ] ; then
echo -e "\n $script_name ERROR: BAM $bam_dd NOT GENERATED \n" >&2
exit 1
fi

if [ ! -s "$bai_dd" ] ; then
echo -e "\n $script_name ERROR: BAI $bai_dd NOT GENERATED \n" >&2
# delete BAM since something went wrong and it might be corrupted
rm -fv "$bam_dd"
exit 1
fi


#########################

Expand All @@ -119,6 +127,22 @@ bash_cmd="$sambamba_bin flagstat $bam_dd > $bam_dd_flagstat"
echo "CMD: $bash_cmd"
eval "$bash_cmd"

sleep 30


#########################


# check that flagstat output generated

if [ ! -s "$bam_dd_flagstat" ] ; then
echo -e "\n $script_name ERROR: FLAGSTAT $bam_dd_flagstat NOT GENERATED \n" >&2
# delete BAM and BAI since something went wrong and they might be corrupted
rm -fv "$bam_dd"
rm -fv "$bai_dd"
exit 1
fi


#########################

Expand Down
18 changes: 18 additions & 0 deletions segments/bam-ra-rc-gatk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,10 @@ fi
# GATK settings

module unload java
module unload r
module load java/1.8
module load r/3.3.0
module unload zlib

# command
gatk_jar="/ifs/home/id460/software/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar"
Expand Down Expand Up @@ -161,6 +163,22 @@ fi
#########################


# test R (GATK uses R for plotting)

echo " * R: $(readlink -f $(which R)) "
echo " * R version: $(R --version | head -1) "
echo " * Rscript: $(readlink -f $(which Rscript)) "
echo " * Rscript version: $(Rscript --version 2>&1) "

# test R settings and install missing packages if needed
test_r_cmd="Rscript --vanilla ${code_dir}/scripts/test-packages.R"
echo "CMD: $test_r_cmd"
($test_r_cmd)


#########################


# realignment

echo " * GATK: $(readlink -f $gatk_jar) "
Expand Down

0 comments on commit bf7a22d

Please sign in to comment.