-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_gb_controlregion.py
64 lines (49 loc) · 2.09 KB
/
extract_gb_controlregion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python
#-*- coding: utf-8 -*-
'''
date: 2018-11-25
version: 0.1
history:
0.1 create script
'''
import argparse
from Bio import SeqIO
parser = argparse.ArgumentParser(description = 'used for myself extract genbank features',
usage = 'python extract_features.py -g <sequence.gb> -m <cds>')
parser.add_argument('-g', '--genbank',
default = 'sequence.gb',
help = '<sequence.gb> file in genbank format as input')
parser.add_argument('-f', '--feature',
default = 'misc_feature',
required = True,
help = '<feature> specify the feature that will be extracted')
parser.add_argument('-o', '--outfile',
default = 'extract_results.fa',
help = '<extract_results.fa> file in fasta format as output')
parser.add_argument('-k', '--keywords',
action = 'store_true',
help = 'decide whether the keyword (RefSeq) is displayed')
args = parser.parse_args()
def organism_name(seq_record):
'get organism name from seq_record and remove space in scientific name'
name_string = seq_record.annotations["organism"]
new_name = "_".join(name_string.split())
return new_name
def keywords(seq_record):
'get keyword frome seq_record class and transfer list into str'
if args.keywords:
keyword = "".join(seq_record.annotations["keywords"])
else:
keyword = ""
return " " + keyword
def subseq(seq_record, seq_feature):
'extract sub sequence from seq_record and features'
sequence = seq_feature.location.extract(seq_record.seq)
return sequence
for seq_record in SeqIO.parse(args.genbank, 'genbank'):
#print(f'Dealing with GenBank record \n>{seq_record.id}')
for seq_feature in seq_record.features:
if seq_feature.type == args.feature:
#with open(args.outfile, 'w') as fo:
print(f'>{seq_record.name}|{organism_name(seq_record)}{keywords(seq_record)} {seq_feature.qualifiers["note"]}\
\n{subseq(seq_record, seq_feature)}')