diff --git a/gather-fastqs b/gather-fastqs index 25bc186..803dbfa 100755 --- a/gather-fastqs +++ b/gather-fastqs @@ -36,7 +36,8 @@ sub main { } # find fastqs in given directory - my $find_fastq_cmd = 'find -L ' . $search_dir . ' -maxdepth 2 -type f -name "*_R1*.fastq.gz" -or -name "*_1.fastq.gz" | LC_ALL=C sort'; + my $find_fastq_cmd_names = "-name '*_R1_0*.fastq.gz' -or -name '*_R1.fastq.gz' -or -name '*_1.fastq.gz'"; + my $find_fastq_cmd = "find -L $search_dir -maxdepth 2 -type f $find_fastq_cmd_names | LC_ALL=C sort"; my @fastqs = `$find_fastq_cmd`; # counter single and paired reads diff --git a/scripts/dge-deseq2.R b/scripts/dge-deseq2.R index 97585e0..d42f529 100755 --- a/scripts/dge-deseq2.R +++ b/scripts/dge-deseq2.R @@ -9,7 +9,7 @@ # increase output width -options(width = 150) +options(width = 120) # java heap size options(java.parameters = "-Xmx8G") diff --git a/segments/align-bwa-mem.sh b/segments/align-bwa-mem.sh index 674c219..7cc1c5a 100755 --- a/segments/align-bwa-mem.sh +++ b/segments/align-bwa-mem.sh @@ -87,7 +87,7 @@ fi module load bwa/0.7.13 -sambamba_bin="/ifs/home/id460/bin/sambamba" +sambamba_bin="/ifs/home/id460/software/sambamba/sambamba_v0.6.6" echo " * bwa: $(readlink -f $(which bwa)) " echo " * bwa version: $(bwa 2>&1 | grep -m 1 'Version') " diff --git a/segments/bam-dedup-sambamba.sh b/segments/bam-dedup-sambamba.sh index 29fe71a..58518f4 100755 --- a/segments/bam-dedup-sambamba.sh +++ b/segments/bam-dedup-sambamba.sh @@ -77,7 +77,7 @@ fi # sambamba markdup -sambamba_bin="/ifs/home/id460/bin/sambamba" +sambamba_bin="/ifs/home/id460/software/sambamba/sambamba_v0.6.6" echo " * sambamba: $(readlink -f $(which $sambamba_bin)) " echo " * sambamba version: $($sambamba_bin 2>&1 | head -1) " @@ -88,8 +88,8 @@ bash_cmd=" $sambamba_bin markdup \ --remove-duplicates \ --nthreads $threads \ ---hash-table-size 1500000 \ ---overflow-list-size 1500000 \ +--hash-table-size 525000 \ +--overflow-list-size 525000 \ $bam \ $bam_dd \ 2> $bam_dd_log diff --git a/segments/fastq-clean.sh b/segments/fastq-clean.sh index 7ab7dc3..b3f61ff 100755 --- a/segments/fastq-clean.sh +++ b/segments/fastq-clean.sh @@ -164,6 +164,22 @@ cat ${summary_dir}/*.${segment_name}.csv | LC_ALL=C sort -t ',' -k1,1 | uniq > " ######################### +# exit if FASTQ has very few reads + +if [ $reads_R1 -lt 10000 ] ; then + echo -e "\n $script_name ERROR: FASTQ $fastq_R1_clean IS TOO SHORT \n" >&2 + # delete FASTQs since they are not useable + rm -fv "$fastq_R1_clean" + if [ -s "$fastq_R2_clean" ] ; then + rm -fv "$fastq_R2_clean" + fi + exit 1 +fi + + +######################### + + # add sample and FASTQ to sample sheet echo "${sample},${fastq_R1_clean},${fastq_R2_clean}" >> "$samples_csv_clean"