-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathObtainFeatures.py
More file actions
114 lines (108 loc) · 2.89 KB
/
ObtainFeatures.py
File metadata and controls
114 lines (108 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Getting the complementary sequence of a given sequence
def ComSeq(seq):
s=''
seq=seq[::-1]
for i in range(0,len(seq)):
if seq[i]=='A':
s=s+'T'
if seq[i]=='T':
s=s+'A'
if seq[i]=='C':
s=s+'G'
if seq[i]=='G':
s=s+'C'
return s
# Calling the longest common substring function to calculate the similarity score between two sequences
def SimilarityScore(F,REF1,REF2):
RE=LCS(REF1,REF2[::-1])
F=[RE]
RE=LCS(REF1,ComSeq(REF2))
F.append(RE)
return F
# Calling two functions to extract composition- and frequency-based features (footprint DNA)
def CallFunc(PTR,REF,IDN,PO,dis):
F=FeatureExtract(PTR,REF,dis)
PO=PositionFeatures(PTR,REF,IDN,PO)
return F,PO
# Calling two functions to extract composition- and frequency-based features (flanking linker DNA)
def CallFuncL(PTR,REF,IDN,PO,dis):
F=FeatureExtractL(PTR,REF,dis)
PO=PositionFeaturesL(PTR,REF,IDN,PO)
return F,PO
# Writing features to a text file
def WriteFeatures(F):
s=''
global Fe
for j in range(0,len(F)):
s=s+str(F[j])+','
Fe.write(s)
import sys
from LCS import LCS
from FeatureExtract import FeatureExtract
from FeatureExtract import PositionFeatures
from FeatureExtract import FeatureExtractL
from FeatureExtract import PositionFeaturesL
from FeatureExtract import GetF
Arg1=sys.argv[1]
D=open(Arg1+'.txt','r')
DT=D.readlines()
D.close()
global Fe
Fe=open('Features'+Arg1,'w')
P=['A','T','C','G']
for i in range(0,len(DT)):
PV=0
print(i)
dyad=100
REF=DT[i][dyad-70:dyad+73]
for a in P:
for b in P:
DI=[]
for s in range(0,71):
DI.append(0)
PV=PV+1
F,DI=CallFunc(a+b,REF,70,DI,70)
WriteFeatures(F)
WriteFeatures(DI)
for c in P:
TI=[]
for s in range(0,71):
TI.append(0)
PV=PV+1
F,TI=CallFunc(a+b+c,REF,70,TI,70)
WriteFeatures(F)
WriteFeatures(TI)
#F=GetF(REF)
#WriteFeatures(F)
F=[]
F=SimilarityScore(F,REF[0:70],REF[71:141])
WriteFeatures(F)
REF=DT[i][dyad-100:dyad+103]
DI=[]
TI=[]
PV=0
for a in P:
for b in P:
DI=[]
for s in range(0,22):
DI.append(0)
PV=PV+1
F,DI=CallFuncL(a+b,REF,100,DI,100)
WriteFeatures(F)
WriteFeatures(DI)
for c in P:
TI=[]
for s in range(0,22):
TI.append(0)
PV=PV+1
F,TI=CallFuncL(a+b+c,REF,PV,TI,100)
WriteFeatures(F)
WriteFeatures(TI)
F=[]
F=SimilarityScore(F,REF[0:22],REF[178:200])
WriteFeatures(F)
if Arg1=='TP':
Fe.write('1\n')
else:
Fe.write('0\n')
Fe.close()