-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsingleton_remove.py
executable file
·45 lines (37 loc) · 1.26 KB
/
singleton_remove.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#this script removes singletons from a paired-end sam file
#
#this is necessary because bowtie2 does not generate sam files
#containing only uniquely mapped reads
#
#to remove those that are not uniquely mapped in sam files, use:
#sed '/XS:/d' your_alignment_file.sam > your_alignment_file_1alignmentonly.sam
#(from MGineste - http://seqanswers.com/forums/showthread.php?t=23389
#
#this removes all those that are not uniquely mapped, but doesn't remove
#paired-end mates that ARE uniquely mapped, which causes error messages
#from htseq-count
#
#like htseq-count, this script requires paired-end reads to have the
#same name (but due to the use of a dictionary, does not require it to be sorted)
#
#USAGE
#python singleton_remove.py [messy_file.sam]
import sys
def singleton_remove(sam_input):
input_file = open(sam_input)
output_file_name = sam_input + ".cleaned.sam"
output_file = open(output_file_name, 'w')
read_dict = {}
for line in input_file:
if line[0] is "@":
output_file.write(line)
else:
if line.split("\t")[0] in read_dict:
output_file.write(read_dict[line.split("\t")[0]])
output_file.write(line)
else:
read_dict[line.split("\t")[0]] = line
input_file.close()
output_file.close()
sam_input = sys.argv[1]
singleton_remove(sam_input)