forked from rtfcoimbra/Coimbra-et-al-2021_CurrBiol
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_bams.sh
26 lines (24 loc) · 774 Bytes
/
clean_bams.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
#
# Usage:
# clean_bams.sh <regions.bed> <indir> <outdir> <njobs>
#
# Description:
# Remove unmapped (4), secondary (256), QC failed (512), duplicate (1024), and
# supplementary (2048) reads from indel-realigned BAMs, and keep only reads
# mapped in a proper pair (2) to regions in a BED file (non-repetitive regions
# in scaffolds >= 1 Mb) using SAMtools.
#
# Requirements:
# samtools
# parallel
# 'no_repeats_1mb_scaffolds.bed' file generated with 'process_assembly.sh'
#
# Important:
# Each job uses 4 CPU threads.
# clean indel-realigned BAMs
parallel -j $4 --plus \
"samtools view -@ 4 -b -F 3844 -f 2 -L $1 -o $3/{/..}.clean.bam {}" \
::: $2/*.realigned.bam
# index clean BAMs
parallel -j $4 samtools index -b {} ::: $3/*.clean.bam