forked from hiruna72/squigualiser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathf5c_resquiggle.py
171 lines (136 loc) · 27.3 KB
/
f5c_resquiggle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from bokeh.plotting import figure, show, output_file, save
from bokeh.models import Span, BoxAnnotation, HoverTool, ColumnDataSource, Label, LabelSet, CustomJS
from bokeh.colors import RGB
import pyslow5
import copy
import argparse
import re
import numpy as np
KMER_LENGTH = 6
BASE_LIMIT = 1000
base_color_map = {'A': 'limegreen', 'C': 'blue', 'T': 'red', 'G': 'orange'}
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--slow5', required='True', help="shows output")
parser.add_argument('-p', '--paf_read', required='True', help="shows output")
parser.add_argument('-o', '--output', required='True', help="shows output")
args = parser.parse_args()
paf_fields = re.split(r'\t+', args.paf_read)
read_id = paf_fields[0]
print(f'signal file: {args.slow5}')
print(f'read: {read_id}')
print(f'output file: {args.output}')
basecalled_read = "TACTTCGTTCAGTTACGTATTGCTCCTATAATTGAAGACACTTGTTCTTATACTGCTTTAAGGTATAAAGGAAGAAAAAAAACAGATAATGGCAAATGTTGGTGAAGCCGGGCATGGTGGCAGCCTGTAATTCCAGAACTTAGGGAGGCTGAGGTGGGCAGATCACTTGAGGCCAGGAGTATGAGACCAGCCTGGGCAACATGGTAAAATCCCACCACTACAGAAAAATATAAAAATTAGCCAGGCATGGTGGCGTACACCTGTAATTTTCAGCTACCCAGGAGGCTGAGATGAGAGAATCACTTGTGCCTGGGAGGTCACGGCTGCAGTGAACTGTGATGGCATCATTGCACTGCAGCCTGAGAGACAGAGCAAGCCCCTATCTAGAAAAAAAAAATGTCAGTGAAGATGTGGAGGAATTGGAACCCACATACATTACTGGTGGGAACATAAAATTGTGTAACCATTTTGTTTGGGTATTTTCTTTTCTTGTCATTTAGTGGATTTTTAAAAAATCAAGACGGGGTTTCACTATCTTGCCAGGCTGGTCTTGAATTCCTGAGCTCAAGCCATCCTCCTAGCTGAGCCTCCTGAGTAGCTGGGATTACAGGTGTGAGCCATTGCACCCAACTGAGTATAGCCACGTTAGAAAACATTCTGGCAGTTTCTCAAAAGGCTAAATGTACAGTCATCCTATAATGCAACAATTTCACTCCTAGGCATATGTCCAGAAAAATAAAAATATTATGTCCACACAAAAACTTGTACAACAATCTTCATAGCAGCATTATTCATAATGACCAATACATGGAATACATGGAAACAACCCAAATATCCACCAACTGATGAACAGATAAACAAAATGCAGTGTGTCTCTACCATGGAATACTGCCATAGAAGGAATGAAATGATACACACTATGACATAAAGGAACTTTGAAAACACTGTGTAAGAGGGAAAAAAATACAAAAGATCACATATTGTACTGTTCTATTTGTCCAGATTAGGCAAATCTATAGTGACAAAAAAATTAATCAATGGTTGCCTAAGGCTGGGGGCAAAGGTAGGTGGGGAGAGTAGGAGGTAGTGGCTAAGGGGTATGGATTTCTCTATAGGGTAATGAAAGGTTCTAAAAGTGACTGTGGTGATCGATGCACAGCTCTGTGAATATTCTAAAACCTACTGAATTGCAGATTTCAATAAATAAAGTGAATGGTATGTGAATATTTTAATAAAGCTATTATTTAAAATAATAATAATAGGGGGCTGGGCACAGGTGGTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGAGGATCACTTGAGGTCAGGAGTTTGAGCCCAGTCGGAGCAACATGGCAAGATCCCGTCTCTATGATAAAAAATTACCTGGACATGGTGGCACATGTCTGTAGTCCCAGCTACTTGGGAGACTGAAGTGAGAGAACCACTTGAGCCCAGGAGTTTGAGGCTACAGTGAACCATGATCATGTCATACTGTAGCCTAAGCAACAGAGCAAGACGCTGTCTCTGAAAGGAAAGAAAACAAATGCAAGTTTTTATCACTTTGTGAGTGGCAGCCAAGTTGGAGGAGAAATAGACAATAATAAAAGAGCACTGAATAATGACAGTGAGTGGCTGGTTAGGCTCAGTTGCTAGCTAAATGGCTTCTAGAAATTCAATAAAGTTACAGCTCTGGGGACAGTCATGTAGTCAAAGAATGAGAGCGAAATTCATTACAATTGCCCATGGTCTTTATTTACATGCCTTCTAGTGAAAAATTCTAAGTGCCTAAACAGCAAGTCTGCAATGATAGCAGCTGTTTATTAAAGACTAAAAAAGAAATGGAGGCCGGGCGTGGTTGTTCACATCTGTACTCCCCTTGAATTTTGGGAGGCTGAGGCAGGCAGATTGCCTGAGGTCAGGAGCTCGGGAGCCTGGCCAACATGGTGAAATCCCATCTCTACTAAAAATACAAAAATTAGCTGGGTATGGTGGCGGGCACCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCAGAAGGTGAAGGTTGCAGTGAGCCAAAATCGCACCATTGCACTCCAGCCTGGGTGACAAGAGAAAGACTCTTATCTTAAAAAAAAAAGAAAAAAAGAAATGGCATCTTCTTCAAGAATTACATCGTGTTTCATGATAAAGCTCTAATTTGCATTTGTTCAAGTATTGATGAATTGCAATATGACACCCATCTTGGATAAAATGCAAACAACACAATTTCATTTTCTCATTAACAAAACCGATTAAGTAGTCTAATATAAATTGCGATCTTATTAAAAACTGATCCGATTTAAAAAATTATGGAATTATGGAGCCAATAAGATGTTACAACCTGTTCAAGGGGAATTCAAAATCCACACATATCTGAGACCATCAAGTATGATGAAATATATTTGATTACTATATTGAAAAATAAACTGATTACATAGCCAACAATTGGACAGGGGTCTCCTCATCCACAGCCACACAAACCCGATCATGCAGCTGTATGGTTACAAAACTCACATAGCCTAGAAGGGACTGGTCTGACTTGAGATTTCATTTGTATTTGTATTTTGAGACAGGGTCCCACTCTGTCACCCAGGATGGAGTGCAGTGGTATAATCATAGCTCACTGCAACCTTGACCAACTGGGCTCAAGAGATGCTCCTGCCTCAGCTGCCCCCATACCTGGGAATACAGGCAAGTATCACCATGTCCAGGCATTTTTTTTTCATTTTGTAGAGAGAGAAGACTTGCTATGTTGCCCAAGCTGGCCTCAAACTCCTAGAATCAAGAGATCTGCCCATCTCAGCCACATGAGTAACTGGGGCCATAGGTACATACCATCATGCCTGGCTATATTTATTTTCTTTTTATTAAATTTATTTTTGTTTGTAGAGGAGGTCTTGCTGTGTTGCCCAGGCTGCTCTCAAACTCATGGCCTTAAAACATACTCCCATCTCCTCTGCCTCTCAAACTGTTGGAACTATAGGTGTGAGCCACTGTACCTGGCCTGACTTGGGATTTCTTTTATCTAGCATCCTTTACTTGGTAGGATTGGGAAAAGCAGTAGTGTTTTTAAAATTACTTAATAATTCAATCAGAATCAAACTCCAACCTTGACCACTGCCTTCTCTCACAGCTCATCCAGTCTGTCAGGAAATCCTACTGACTGACTTCAACATGTATCCAGGCTCTAACCATCTCTCACCACCACCATGAACCCCGTCAGGATCACTATCATCTCCCACCGGGATGTTGCCACAACAGCTCCCATGCTTCTACCCAAATCTTCCATAGTCTTTCTCAACTCGGCAGCCAGGTCGTGCTTTTAAATCAGGAGACGGATCATGTCGCCTCTCTGCTCAAAAACACTCGGTGGTTCCCATTTTAGTCAGAGCAAAAGCCAAAGCCCCAGCAGCGTCCAGGGCTTACGATCTGTACCGATCCCAGCCCAGCAACTCCCTGGCCTCCTCGCTGACTTCGCTCCATCTCTTTGCTCCACTGGCCTCCTTCCAGAGCCTCAGACACACCAAGTTTCCTCTAATGCCTTTATCCTGTTGACTCAGCCTACAATGCTCTTCCCTCAGCACCTTGGCCAGCTCCATCACCTGCTTCAAACTTGCTCAATATTCACTTATGAGGCCAACCCTGACCCCTCTACTTAACACTGCCATCTGTCCCCATTCCCACCATGCTCATTTCTTTCTTTCTTTTTGAAACAAGATCTTGCTTTGTTGCCCAGGCTGGAGTACACTGGTGCAATCACAGCTCACAACAACTTCAACCTCCCAGGCTTAAACAATCCTCCCGCCTCAGCCACCTAGGAACTGAGACTACAGCTGCATGCCACAACATGGCTTTTATTTTTTTTTTTGAGACGGAGTCTCGGTCGCCCAGGCTGAAGTGGGGTGCGATCTTGGCTCACTGCAATGTCTGCCTTTTGGGTTCAAGTGATTCTCTGCCTCCCAAGTAGCTGGGATTACAGGCACCCACCACCACACCTGGCTAATGTTTGTATTTTTAGTAGAGATGGGGTTTCACCATCTTGGCTAGGCTGGTCTTGAACTTCTGACCTCGTGATCCACCCTCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACTGCGCCTGGCCTTTAAAAAAATATTTTTTAGACATGAGGTCTCATTATGTTGCCCAGGCTGGTCTTAAGCTCCTGGGCTTAAGCGATCCTCCCACCTCAGCCTCCTAAAGTTCTGGGATTACAGGCGTGAGCAACAGCAGCATGAGGTCCCAGCTTCATGTTTTTGTTGTTGCTACAACAAAGTACCCTACATTTAGTGGCATCAAACACCACAAATCTACCATCTTACAGTTCTGGGAGCCGAAGCCCAACTAGGTCTATTAAGGCTAAAGTCAAGGTGTCAGAGAGGCTGCATTCCTTCTGGGGGAGGCTCTGGGTGGGATGTGCTCCTTTGCCTTTCCAACTACAAGCCACCCCATTCCTTGACTTACCTCGTGACTCCATATTCAAGGCCAGAGTGCAGCATCTTCAAATCTCCCTCTCTGACCTCTTCTTCCATTACCACATCACTTTCTCTAATTCTGACTCTCCTACCTCATTCTCTTATAAAGATCCTTGTGATTGGTGGGTATGGGCTCACATCTGTAATCCCAACATTTTGGGAGGCCAAAGAGGAAGGATTGCTTGAGGCCAAGAGTTAGAGATCAGCCTGGGGAAAATAGGAAGATCCTGCCTTTACAAAAATTAAAAATCAGCTGGACATGGTGATGCATGCCTGTAGTTCCAGCTACTGGAGAGGCTAAGGTGGGAGGATTGCTTTAGCCTAGGAGGTCAAGGCTGCAGTGAGCTATGATCACATCACTGCACTCCAGCCTCAGTGGCAGAGTGAGACTCTGTCTCCGATATAAGAAAGAAATATACATTTGGTCTCTGCCCCTGGTTCCTGGCATAGAGCTTCCAAAGCTCTTATAAAGCCCTTCGTGACAGAGGTAATAGGAGCATTTTCTGTTTGATATTTAGTCTTAGTCCCAGGTTCCTGACACAAGGGCCTCTAAGGTCTTTCAGATCTGCAGCATGGTAAGAATGCATGTGGGATGCTGTTGAGCTAACGGGGTGGCTGCAAGCTCCGAGACTGCTTCAGGAGGAGGGCTAGCTGCCAGAAAGCAGCACATTTTTTTTTTAAAACAGAGGTTTGGCTCTTGTAGCCCAGGCTGGAGTGCAATGGCACAATCTCAGCTCGCTACAACCTCCACCTCCCGGGTTCAAGCAATTCTCCTGCCTCGGCCTCCCGAGTAGCTGGAATTATAGGGGTGTGCCACAATGCCTAGCTAACTGTTGTTATTTTTAGTAGAAACAGGGTTTCACCATGTTGGTCAGGCTGGTCTCAAACTCTTGACCTCAAGTGGTCCATGTGCCTCAGCCTTCCAAACTGCTAGGATTACAGGAGTGAGCCACCGCACCTGGCCCCAACCACATTTTTGGGGCTTGGAACTTTCAGCCTCACCTGCTGAACTCCAGGAGGCAAAGGGAACTGGAGATTGACTTAACTACCAATGGCCAGTGATTTTATCAATCATGCCTCCATAAACACCCAAACAGCAGGGTTTGGAAACTTCTGTGTTGCTAAGCCTAGGAGGTCCTGGGAGGGTAGTGTGCCCAACAGAGGGCATGGAAGCTCTGTGCCCCTCCCCACTTACCTTGTCCTGTGCATCTCTCTTTCATTGGCTGTTCCTGAGATGGAGCCATTACATTGAGCCAGTAATAGAAAATAAGGTGGCCAGATGCACTGGCTCATGCCCGTAATCCCAGCACTTTGGGAGGCAGAGGTGGGCGGAATCACTTGAGCCTAGGAATTTGAGACCAACCTGGGCAACATAAGAAGACCCCATCTATACAAAAAATAAAATTAGCCAAATGTGGTGGTGGGAACCCTGTAATTCCAGCTACTTGAGAGGCTGAAGCAGGAGAATCACTTGAGCCCTGGATGTTGAGGCTTCAATAAGCTATGATTGCACCACTGCACACCAGCTTGGACAACAGAGCGAGGCCCTGTCTCTTAAAAAGAAAAGAAAAAAAACTTGTTTTTCTAAGTTCTGTGAGTTGTTCTAGTAAATAATTAAACTCAACAAGAGGGTCATGGGAAACCCTGATTTCTAACTGGTTGGTCAAAATACAGGTGACAGCCTAGGACTTGCAACTGGCATCTGAAGTGAGGTGGTCTTGTGGGACTGAGCCCCTAACCTGTGGGTTCTGTGCTAACTCTAGGTAGTGTGAATGGAATTGTGGGATACGGTTGGCATCCAGAGAGTTGGAGAACTGGTGACAGGAAACTCTGCACACACATTTGGTCAGAAGTCTGTGAGTAGAGAGAAACGTGTTGCAGAAGTCAGGACCCCAAACGGAGGGACTGGCTGAAGCCACGAAGAATATAAATTGTGAAGATTTCATGGACATTTTATTAGTTCCCAAATTAATACTTCTATAATTTCTTAGGCCTGTCATTGCTGCAATCTCTGAACATAAATTGTAATTTCATGAACCTTATCCTTCCCAATCAATACCACTAATTTTCTATACCTGTCTTTAATCTCTTAATCCGCTCATCTTCGTAAAACAGGGATGAATGTCGCAGGACCCTGTGATAATTAGATTAGCTGCAAGTTGTTTAAACAATATAGAAACTGGCACCTTAGAAAGAACAGGATAACAACAATTTCAGGAACAAGGCAACACTAAACTCTAGCTACCTGTAGGCAGGTTGAACAAGGCCATATTCTCTTCTTTCAAAAGCAAATAGGAGAAGTATTGCTGAATTCTTTTTCTCAACTTAAGGCATCCCTGAAAAGGAGAATACATCTAAAGGCACAAATAGCCGCTTTGAGGCAACTGTCTTTTACAGTCATAGATAAGGGATGAAATAAGCCCTAGAGATTGGCGTGTAGCTCCCGGGCTTATCAGAACAAGGGAAATTTAAAACACAATAAAATGTTGGTCAGATGGGTTGTCTGCTCTCAAACCCTTTCTCCTGATAGATGTTATCAATGACAATGCGCGCCCGAAACTTCATTAGCAATTTGTTCCGCTTCCCCAGTCCTGTGGTCCTGTGATCTTGCCCTGCCTCCATTTGCCTTGTGATATTTTATTACCTTGAAGCATGTGATCTCTGTGACCCACACCCTATTCGTACACTCCCTCACCTTTTTGAAAATCACTAATAAAAACTTGTTGGTTTTGCGGCTTGGGGGGCATCACGGAACCTGCCGACGTGTGATGTCTCCCCTGGACATCCAGCTTTAAAATTTCTCTCTTTGTACTCTTTCCCTTTATTTCTCAGACTGGCTGACACTCAGGGAAAATGAAAAGAACCACCATGAAATATCAGGGGTGAATTTCCCCCGATATCACACTGGCTCTTCTCACCTGTCTACCTGCTTAACTTAATAGGAGAGGCAATGCATGGTGCTCATGAACAAGGCAAGCATTAAGAGTCAGACCAGACTAACATTTGACTCAGTCCTAATATTCAGGTGAGCTTGGGCAAATCGCTCATTAACCCCAAGTCTTCATCATTTTGTGCATATAATGGGGATAACTGTGGCACCCACCTGTTTTTGTGAGAATCAATGAAATATTATGCTTGATGTTATTGTGATCACTATCTGACAAGGGCAGTGATGCATGATAACATCAAAATTAGAAACTGTAATGAGGTCTCTTGGGCAAAATTCCATACAGGCAAATTACTGTCTCTACAAAGCATTTCTGCCACACTTAATTCACCATACCCTGAACAAAATGTGCCATCTTCATTGTTCAGGTCTGTATAGTGCTGGTTTCCCTGCCTGGGCAGCTCCTCCATCCATCCCAGCCCAATCCCATCCCTCCACCTCCCCTTCCCTCCCCACTCTCATACAACTCTTCCTTATCTTACGGGACTTGGCTTCAATGTCACCTTAACTTTGGAAAGCTTCTCTCCCTCTCCAGAAAATTCCGATTGCACTTGATGCATGCACTATTATTTGATCATTTTTGAGTTACAGTCCAAGTCTTTTTGTGCCTGAATAACATGTTGCCCAGTCAGTTTCTCTTCCTGGATTCAGAAGTCTTTCATGGTAGGTCCAGCTAGAAGTGACAAAAAGACATTTAAAAAAAAAAAAAAAGAGGGATGACACAGACAGACATCAGCACTTAAAAGTTTTAAACGATATGTGAAAAACAAAATTTAAGGGCTTCTAGGAGAAATGTAGGAGGGAAGGTGTTACTGGGAAATATGATAGAAGGTTAATTTTATTTTATTTTATTTTTAGAGAAAGGGTCTTGCTCTATCACCTAGGCTGGACTGCAGTGGTGCAATCACAGTTAACTGCAGCCTCAACCTCCAGGGCTTGAGGCAATATTCCCATCTAATTTTTATTTTGTTTAAGAAATGCAGTCTTGCTCTTAGCAAAGCTAAAGTGCAATGGTGTGATCATAGCTTACTGCAGCCTCAACCTTCTAGACTCAAGTGATCCTCCAGTCTTAGCCTCCCAAGTAGCTCGGACTACAGGTGTGCACTGCAACGTGTAGCTCATTTTTTTTTTAATTTTTAGTAGAGACAAAGTGTCACTATGTTGACCAGGTTGGTGGTGATCTCCTACCTCAGGCAGTTCTCTCACCTCAGCCTTCCAAAATGCTGGGATTACAGGTGTCAGCTGCCACACCTGGCTGAGGGGGTTAATTTTTAATTATATAAAGAGCTCAAAGCAAATATTAGAAGGAGCCTAAATGCCTCCAGCAGTTGACTGGTACTGGTAAATTGTGATACATCCATATAATAAAATATTATGCAACCATGAAAAGGATTAAGATAGATCAATAGGTATTGGCACAAATGTCCACGAAATATGAAAATATGAAGTGATGTTCAATCACCATGTACGTATCTTGAAGGATATAGCCCATTCTCAACTGCAATTATTTCCTGAGATAAGATTATGGGTCTAAAGTGAAGGACATTTTTCACTTATTTAAAAGTATTTATCATTTTTATAATTTGACAAGATTAAACAGATCATTGAATTAGTAAAAGACAAAGTAACTCTATAAATAAATGGAAAAGACACAGATACCCAGGCATGGTGGCTCATGCTTATAATACCAGTACTTTGGGAGGGGGTGGTGGGGGGATTGCTTGAGGCCAGGAGTTCCAGACCAGCCTAAGCTCAATAAGACCTCCTCTCTAGTAAAAATAAAAAATAAAAAATAATTGGCCAGGCATAGTGGCATGTGCCTATAGTCCCAACTACTGAGGTGGAAGGATCACCTGAGCCTAGGAGGTCAAGGCTGCAGTGGGTTGGTGCCACTACACTGAAGCCTAGGAGACAGAGCGAGACTTCATCTCAAAAAAAAAAAAAGGACAATAAAGAAATAAAAGCTAATAAACTAACATAAGGAAAGATAAAATATGTGACAAATAGGCTGGGCGGCACATGGCTGTAATCCAAGCACTTTGGGAGGCCAAGGCGGGTAGATCACGAGATCAGGAGTTCGAGACCAGCCTGATCAACATGGTGAAACCACGTTTCTACTAAAAATACAAAAATTAACCAGGCATGGTGGCATATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGGCACAGGTTGCAGTAATTCGAGATCACACCACTGCACTCCAGCCTGGTCGACAGAGCGAGACTGTGTCTCAAAAAGAAAAAAGAATGGGTGACAAAGTAATAATAGGAGGTCTTTCATTTATCACACAGAAAATAACTTGTTAAATTGCAATACCTGTGTGGGCGAAGGTGCAGTGAAATGGCCATTTTCTTGTAGTATTAGTGGTGTTTAAAATGTATATAAACCTTCCAGCATAAAGCTTGAAATTTTTTTTAAATCATACAGACAGTGACTCATTATACTGCCTCCTCCAACTCCTGGCCTCAAGCAATCCTCCCACCTCAGCCTCCCAAAGTGCTGGAATTACAGGCGGCAGCCACCATGCCTGGCTTTGCAATTTACATCAGGGGTAATAAGAATGCTCATGCCCTGTGACTCACAGTAATCTGCTTCTGTTCACGCCTTTGGATATAATTCAACCTAAACAAAAGGTCGTATGCACAAACACAGTGAAAATCTGGGAATTATTTTTTCTCTTTTTTTAAAAAAATATGGAATGCTTCACAAATTTGCATGTCATTCTTTCACAGAGGCCGTGCCAATCTCTCTATTGTTCCAACTTAAGTATGTGTGTGCTACTGAGGCAAGCATAGGTAATTTAAGATAGAGTGGTTAAGTGAAATAAGGAAGAATTATGGAGAATTTAAAAATCTATGCTATTTATAGGCACCTAATGCTCAGTAAATATTAGCTGCTACTATTATTATTTTTATGGTAATTTCACTCAATTAAAAACTGTCGTTAAAAATTACCATTGTCATGGAACATAATGTCTCCTACTGTATAATTGTAGAAACAGATACAATTTGTCCCTTGGTATATGGGATTAGTTCCAGCTCTCATTTGTGTATACCAAATCTTTCTCAATTTGAAGTCAGTCCTGTGGAATCCACATACACAAATGGGAAAATTAGTGAGGTGTGGTGACAAGCACCTGTAGTCCCAGCTACTTGTGAGGCTGAGGCAGGAGGATTGCTTGAGCCCAGGAGGTTGAGGCTGCAGTGAGCCATAATTGCACCACTGCACTCCAGTCTGGGCAACAGAGTGAGACAGAAGGTTGACTTTTTAATAGAATTTTTCTGTTCACTTGAAGATATGGTCAGGATTGTGGCATGAAAATTCTTCATAAAATAACTATCTAATCCAATTAATGCTGGAATTGGGAACAGCAGAAGTGTCATCTCAGAGCTACTCACAATGAAAGGTGATGTCTGGGGCTCAGGTGTGTTGAGGTCCCCATGCCTGGACTATGGGTGCTGAGTGGGATTATGTCCATCCATTTCTATATTCCAGCACTGGGAAACCAGGGACAGTACTTGTTCTCAAGGGAATCTTCAGCTTAGATTGGCTCTGTAAAGAGGGAGTACATCATTTGAAAAATCGTCGCAGGTCAGGTGAGGTGGCTCATACCTATAATCCCAGCCCACTGGAGACTAAGGCAGGAGGATCAAGAGTTCAAGACCAGCCTGAGCAACACAGTGAAACCTCATCTCTACAAAAAATTAGAAAATGAACTGGGTGCGGTAAAACATTCGTATAGTCCCAGCTACTCTGGAGGCTGAAATAGGAGGATCGCTTGAGCCCAGGAAGTGGAAGCTGCAGTGAGCTCTGATCTCACCACTGCACTCTAGCCTTGGTGACAGAGTGAGACCCTGTCTCAAGACACACACACACACACACACACACACACCCAATCTCACTCTGTCCAGCCTTGACTAATCAAAAGGGCCTTCTGGTTACAGAAGAGGTATGCTCTTTTGTAGGACGGGGGAGAACCAGCAAGCTTGTTCACAGACTTTCCTCATCCTCTGCTTAGTTTTCCAAGAACCCTCACAGTGGAAATGGGTCTCTGGGAAAATGACCTAAATCTTTGGGTTACCAGGGGAGAAATATGCCTCCTTTGTCAATTAATAAATGGAACATCTGCCTTAAAATCCAGGGAGTTTCTGCTAGAATGAATCACTCCCTAAGACCCTGACCAATGCATGGAACATGAAAAACTGAAGTTTAACTGGGCGCGGTGGATCACGCCTCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCGGATCACCTGAGGTCAAAAGTTCTAGATCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGTTGGGCATGGTGGTGGACACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAAAATCGCTTGAACCCGGAAGGCGGAGGTTGCAGTTACTTCTAGAAGAATTTCCATTAGCCCTTTGAAATCCTTCAACATTCATGAAGGCAAAAGGTTTTCACCTAATTTAATCTGATGGGTGTGTGACCCAGAGTCTTTCTAGGGAATGAGACTCCCAAACAGTTCGACTGGGAAGTGAGGAGAGAATTTATTACTCAAAACCAAAAGGGAAATGAAAAGAGGCCTTATAGAATGTCATTATTCTTTCTTGGCCGGGAATGGATTTCAGAGTCGTTGCGACCTTTACATGACCTCCTTATTAGCATCTAAAAGCTTCCAGTGTAGGATGCAGCCAGCTAGGTTCTCTTCTAATGTAATAAAATTTGCTTGACCAAATCTTATGCTGAAGCCATCTCCAGGCTCCAGAAACAATAGGCTATAAATTACTGGATCTCCCATTTGATACAATGAAGTATGAGCATGGTCCTGAATGACTCCTCTACATACTACTCTGGGTGGCTTGAAGTGAATTTGATACAAGAACTGGAGCGAGGGCAAAGCAGAGCTAGATCTAGGATTAATGTGCTTGTTGAACTCCTCACTACTCACCTATGAGTCTAGTTCCAGAACCCAAGTAGGATGGGAAACAAGGCTCCTGACTTTTTCCCTAATGTCTGCATCTCTTTCCCATTTCTTATCTCCTTGCAAAGAAACTAAACAGAGCTCAACTGAAATAACTAAATGATTAAACCCTATACAGAGACTCTCCAAAGACTGACAAAATATCATTCAAGACTGTTACACAGACAACCTTGAGGATGACTTGATGTACCAGTGATCTACAATATTTGGGATCATTCCAAATTCCCATCAAGGATCTGCCTATATCAACAAAGGAGCCAAGGACCAACCATTCAAATGGGCCATGCTGCCAAACCTTTTTTTTTTTTTAACAATGCCATCTCTTCATATTGTTCATTTAACAAAACTGCAGCCCTTCATCTATCCTTAAGTCCTTGGCCGTATTGGGGACGGTAATACTACTCCCTAGCAGGAAATCAACAGGATGACCTACTAAACACCATTCAGAAGATGCTAAGACCCATAGTGCAACAGGAAAGAAAAGACAGAGAATTAGTCAGACAGGTACATGCTGTGCCAAAAATGCACTACAGCCCCCAATTCTGCCTAATCCTAGCTGGGCTGACACCAACCTGATGAGACCGAAGACCTATAAGATCTCAAACTAAAACAGAAACTCTGAACTGGGTTCTTTCGACAGGAAGCAGCCAATAAATCATTAAAGAACAGATAAGTTCTTAAGGTGAGGAGAGTTTCAGATAAATGGAATGCTGGTAGAACACAGGGCCCAAAGGAGCAAAAGTTAACCTAAACCCAGGTAGAACCTTGTTTACTAGAGTATTAGGCATGGGTTTGGGCAACTATTCTAACCAGAGAAGCAGCTTCAGTGAGGGCAAGTTAGCAATCAAGGTATAGCATGCATGAGGCTGGCAAAATTCAGGGTGACTGAAGCAAAAGCTTCATAACCAGAAAGACCACATCTGGGGGTAGAGCACAAAACTCTCAAGATGAATCTTTGTAGAGTGAAGGCAGAACTATATAGCAGTTTAGGAGATCTGTTGGTGCCCAGCAAGAGCTCCAGCTGGGTATATGCAGGGATGCAGGCTGTGGTCTCAGGAGAGAGGTTCTAAAAGTCATTCAGTCCAAGACCTCAAACTGTGTTCTCTACTAAAAGGAATCAAGGTTCCCTAGAGAAATGGCTGACTCCATGTATGGTGCAGTATATTGATCCTGGAACATCTGTTTTGCCAAGCAAGGAAGCCATCAAAGTCCAACAGGATCACGTCAAAAAGACATGAAAGTCAACTTGAAGAGATAATTATTAACCTAGATGGAGACAATGTAAGCATCCAAAACAATAAAGACTGCAATGACACAAATACATCAAATGCAAACAATAATCTATGAGTTCATAATGGTATTCAGAAAAAAAAACTACTGGTCCATTAGAGGAAGGTTACTAGGTCGCAACACTACTCTGAAAAGTGACTTAAGATGAGAGGTAGGGTGGAGAATTAGCTATTTATTCAGTCTTTCCTGTACAAACATAAATTTTTAGGGAGATTGAAGCAGATGAAACAAATCTGGAAAAATGGAGGTAACTGCTTAATCTGCGGAGTTGGGTGCATGGAGGTTTAACATATTTCTTTTGTGTATATTTGAACCCCCTACAAAAAAAACACAAAGACAGAATGTGAGCCAAGCAGCTTAGGGTTTGACGACTGCCTACAAGAGACATAGGATGGGGTAGTTTTAGCCTAATGGGCTGAGCCAACTGGAGGTATATGAAGTACTAAATTGCAGAGGTATCATGTTGTACTTGATCAAATCCTAGATCCTAGGTCTGCTTGGTGGCATGCTTCCTAGGTAGTGGATACAAGGCTACCTATAGAACTGTGATGTATTCGCTCAGAAACTACAAAAGTGCTTGCTCTTGAAAATGGAGTCTTTGTCCATTCATGCTTCTATAAAAGAATACCACAGACTGCATAATTTATAAAAGGAAAAAGGAAGGAAAAGAAAAAAAGAGGAAGGGAGGAGGGAAGGGAGGAAAAAAGGAAGGAGGGAAGGAAAGGAAGGAAGGAAAGAAGGAAAGGAAGGAAGGAAAGAGAGAAAGAGGGAAGGAGGAAGGGAGGGAAGGAGGGAGGGAGGGAGAGAGAGAGAGGGAGGGAGGGAAGGGAAGAAAAGGAAGAGAAGGAAAGGAGGAAGAAAAGGAAAGGAAAGGAATAAATTTTGTTCTTAACAGTTCTGGATGTTAGAAGTCCAAGGTTGGGGAGCCTGCATCCTGGTGGGTCTTCTTGCTGCATCCATCCCACTACAGAAGGCAGAAGGAAAAGAGGTGCAAAAGCAAGGGCAAAGGGCTGAACTCTGTTTTATAATAGCCTCTGTGATTATAATCTGTTACCACAATAACAACATTAACTCATTCATGAAGGCTATTTTATTAGGCCCCACATCCCAACTGTTGCATTGAGGATTGGGTTTTCCAGCACATAAACTTTGGGGGACACATTTAAACCATAGCAGAGCACTTGGGTTAATTCAACTAAGAGGAGCTGGGAAAATCAAAGGCATGAGAAAGACAGCAAAAGCTAGCAGAGAGAAATGCATGAGTTGAAAAAAGTCACAGTGAATCCTGTAGTGCAGGCTACTTTATCAAAAGCACTAAAAAGATCTCATTAACTCCCCCAGCTCACACTCACGCACATCTAAAGAGCCACACACAGCACCACCAAAGGCAGCACAATGAGAACAGCATTCTCCTCAACAGACAAGCTGGGAGTATCTAGACACCTGACCTCAATAGCTCCAGTGGCTATTGTTCCTCCCTAACCACCACTCAAGTCACCAGCTTGGAAAGTAGTAGAAAACCCAAATTGACACACCACTATGAAACAACAAAACAGCAGAACAACCCATTTAAGCAATGCCAGCTGTTGGGAAAAAAGGAACAATGAGTAGAGAAGCAGACCTCTCAGGGGTCCACTAAGACCCAGTCTCGCAACCAGCACTTTTTAAATGCAGAATCCATACCCCTCAGGGCCTGTGGAGCTCCACAAGGCATGTCGTCCTCAAAGATAAATGAACGAGCAGCTGGCTAGAAAACCACTAAGGGTGTTATTCTTTAAAGAATCTTTATAGGGTCAAAGAAGAATGGGTCCTAACTGGCTATGTGAACTCCCCACAGATTCTGAGGATGATGTCAGTATCCCTTTCCAGATGTGTTTAACACTTTGCAGTCACTTGTATTCCTGCCACTGAGTGCCAGTGCTTTGCTAATTTGAACTGGTTCAACTCCATGACACAGCTCCTGGATGTTACCATTAGCCAGACTGACACCCATGCTGTACCCTTTCAAAAAGAGTCACAAACGTATTTCACCTACTCTTCCAAGACAAGTAAAAATGACTGCCAAAGAAATGGGGAAAAAAGGTCAGAGAGTGAAAACAGTGTATATAACAGACATGGGGAGAAACTAGGAAAATCATATATGGGCTCTCTGCAAAACCCACAGGCTATGTCCTTTTGCTTGTGTCTGAATGGAGCACCAATTTTAGCTGTGGCGAAAAAAAAGAAAAAGCAGGCCAGAGCTGGGCGCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCACCCAAGGTCAGGAGTTCTAGACCAGCCTGGTCAACATGGTGAAACACCATCTCTACTAAAAATACAAAAAATTAGCCAAGCATGGTGGCGCATGCCTGTAAATCCAGCTACTAAGGAAGCTGAGGCAGGAGAATTGCTTGAACCTGGAAGGCAGAGAATGTGGTGACCTGAGATACGTCATTGCCCTCAAGCCACAGCAATGAGAACAAAATTCGGTAAAAACAAAACAAAACAAAACAAAACAAAAACACATAAAAAATAACTCAGACTTAATTAAATACAACCTAGTGGTGAATGACTAAAGATGGATTACTCATAACAGAGATTGAACAGTCCAATAAGAATCCAGGAATCTTACCTTTTAATAACAAAAAAATCCTTTCCTTCTAAAGTAACATCCTCTCAAGGCCAGGAATTCCATTAGTAGAAAGCCTTCCTAAAAAACAAAATTCCTGGCCAGGCATGGGTTCACGTCTGTAATCTCAGCACTCTGGGAGGCCGAGGCGGGAAGATCCACTTGATATCAGGAGTCGAGGCGGGAAGATCACTTGACGTCAGGAGTTCGAGACTGGCCCAGCCAACATGGTGAAACCGCATCTCCACTAAAAATACAAAAATTAGCCTGGTGTGGTGGTGGGCACCTGTAATCCCAGTGACTTGGGAGGCTAAGGCAGGAGAATTTCTTGAACCCAGGAGGCAGAGGTTGCAGTGACAGCAAGGTTGCGCCATTGCACCCCAGCCTGGGCGATAAGAGTGAAACTCCATCTCAAAAAAAAAAAAAAAAAAAAAATTCCTTTGGGAAGGCCTTCTACATAAAAATCTTCAACATGAGACTGGAAAAGAGGTATGGGATCATCACCGGACCTTTGGCTTTTGCAGCTCGAGCTATAGAACAAAAAGAAAAGGGATATCATTTAAACACGGTATGTGAAAAGAATAATTATTGAATCTGTACTGGTCTTTAACTTTTACACTTTGATCTTCATTCTGTTATTGTGATTGAGTCCAAAGAAAAACAGTATGAGTAAAATAAAAAGAACACCAAAAATGCTAATATTCTGTTTGCAGAAGTCTGTAGTGAAATATCCCATTAAATCCAAGTGCAGTGGCACACCATAATCCCAAGCACTTTGGGAGGCTGAGGCAGGTGAATCTCCTGAAGTCAGGAGTTCAAGGCCAGCCTGGCCAACATGGTGAAACCCCAACTCTACTACAAATACAAAAATTAGGCAGGCGTGGTGGCAGAGGCCTTAATCCCAGCTACTTAGGAGGCTGAGGCAGGGAGAATTGCTTGAACCCAGGAGGTGAGCTTGCCATGAGCTGAGATCATACCACTGCACTCCAGCGTGGGTGACAGAACAAAACTTCAACCTCAAAAAAAAAAAAAAAAAAAAAACAGCTAGCAGGTGACATTTGCTATAGGGAGTGAGGAGACTAGGGATATGATCTTGCTGCAATCTTTCCATTTAGTAAATCTAAACAAGTGTGAATCCATTCTGTTTCGTCCCCATCGCCAGAGCCAAAACAAGAAAATCAATTATATTTCTAGTTCTTTAAAAACATATCTAACTAAATCATCTAATTAAAAGATAATATGCATGGTTCCATACTCTAAAAGAAAACTTATGTCCTGCATATCATGGACATTTGATGAATGCTTATTCAGTTGACTGGTGTAGACTTCAATAATAACCTGTTCAATGCATTATACCAGATGAATCTTGCATCTCAAAGTAGAACAAATATTGTTCTTTCAGTTTGTCTACCCATAAATGCAATATTTACTAATAAAAAAGAAAAATGAGTTTATTGTTCTAGAGAGTATGAGAATTTTGACAGCATGAATTCTCCTGTCCTAGGACATAATTAATACTAGAGGCATACTATTTCATGTGGAAGCTACCATTAAATCAATGTTAAGTGTTAATTACCTCACATAATCTTCTAATCGAGCTTGACTGAAGACGGTACCTGACAAAGTTGATTTATCAAGTTGTAAATCTTCACCTGTTGAATTCATAAGTTCATGTCTGAAAGGTGAATAAATACTTAATATTCATTAGGCAATATTCCTTAGTAATATCCACTAGTACATATTTAATATTTCATCATGAACTGCGGGTGTGAAGAAGAAAAGACAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCAGTTTGGGAGGCCGAGGCAGGCAGATCATGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTAAAAACCCCGTCTGTACTAAAAGTACAATAATTAGCTGGGCATGGTGGCAGGCACCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATTGCCTGAACCCAGGAGGTGGAGGTTGCAGGAAACCATTGTCCGCCACTGCATTCCAGCCTGGGCAAGAGAGCAAGATTCTGTCTCCATCAATCAATCAATAAAAATATAAGGAGGGAAGCATTTACTGTGTATTTATATGTCTGGTATTATGTGAAGCACTTTTATCTTATCAAATCTTCGGGACAGATCTTCAGTTCTCATGACCACAAAAGAGGATACTAGTGAACAGGAGAAGGAACGTGGCCAACCTGTGTCCCCAGGACTGTGGTCTTACCACTAGGTTACAGTGTTTTCAGATATCACATGTTGTGGGGTGCCTTTAAAATGAACCAAAAAACCAAAGGTAGAAAAAGAGCTAAGCTATTAAAAAGTGGGAGAAACACTAAGAGAACCTTAAGCATGTAACTAAAAATATTATGGAAATGTTATTGAATTCATTATGGAAATTTTGGTACTAGGTTTTCATTGAGGAGTAGGTTATATTACTCATGATGAAGAAAAATGTTACATTTAAGTATATTAACATAAATACCATCAATATTGTTTATCATGCTTAAATGTTCACTTAAAGCAATTCAGTTAAAATTCTGCATATCATACAATTTTATAGTTTGCTAGTAGGTTACAAAGTAAATAGTCACCAAATAAAAACATCATGTTTTTCACGGTTGTTGCTCTTTTTTAGGTGGTATTTGATGTATACCAACAGAGAGGATAATAACAAATCGCTAATTTCTTTCATCACTATATAAAGGTGGCTTCAGGATAGAATAGTATCAGGGCAATGATGAATTTGAAATCTAACATCAATTCAGTGATGCATCAAGATAAAAGTAGAGACAGCAGGGGCTGTGGTGAGTACTGAACATTTTATTTATTTATTTATTTTGAGATGGAGTTTTGCTCTTTTGCCCAGGCTACAGTGCAATGGTGCCAACCTCGCCTCACTGCAACCTCTGCCTCCTGGGTTCAAGCGATTCTCCTGCCTTGGCCTCCCGAATAGCTGGGATTACAGACATGCGCCACCACACCGTCTAATTTTGTATTTTTAGTAGAGACGGGGTTTCTCCATGTTGGTCAGGCTGGTCTCGAACTCCCGACCTAGATATCTGCCTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGCCCCTTATGGTCGTTAAATTTAACAAGCAGACTAAGAGAAACAATTCATTTAAAAAAATAATATTTGGCCAGGCATGGTGGCTCACACCTATAATCCCAGCACTTTGGGAGGCTGAGGTGAGTGGATCAGGAGGTCAGCAGTTCAAGACCAGCCTAGCCAAGATCATGAAACCCCGTCTCTACTAAAAATACAAAAATCAGCCAGGCGTGGTGGCGGGTGCCTGTAATCCTAGCTGCTCGGGAGGCTGAGGCAGAGAACTGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGGCTCCGTCTCAAAAAAATAAATAAATAATTCAATGAAATTCCTAAGATCCAGGGCTTTGCAATAAATATGTAAATAAATTTCAATCTCCATACTGAAAGTTTAAAAGAAATGCTAACTAATAACTAAAGAAATACAACTTTTCTCCAGCTTTGCAGCAATCTAGAAACAAAGTGTGTAGACACTACAAAGCACCTTACAAGGAGAAACATGTAAGGATGGCATGACTCACCGGCGGCCTGAGCATCCACGGTACCCCCATGATGAACAGTAACTCCACTGTGTAAGCATGAACATAAGATTACAAGACTTTTCAGTTTAGACATACCATATTTTCTTTCAGACAATTCTTCAGTTTGTTTACGTAGATCAGCGATACGATGATTCCATTTCTCTGAAAACCAAGCAAAAGTTGCTTCTCAATAACACGTCCCTATGTCAGAGCAGCACTAACGTATAATGACTGATTTCATATATTTTACATTCTAACAGTCCATATCCACACTGCTTTCAAGAAAAAGACTTGCTCTTGGTGGTTCTTAGAATTGGTTTAATGGGAGACTATTAGAGAAGCTGAAAAGCAGGAGGGCAGAAAAGTTCAATCAAATTAAACACAATAACAGGGAGGTCACAATGAGGCGGTCTCCAGGGGTCTTTTAGCAAACTTCTAAAACATGTCTCAGCTGTGTGAAATAAGACTTTACAGCAGCCGGGTGCAGTGGTGCAGGCCTGTAATCCCAGCACTTTGGCAGCAGAGGCAGGCGGATCACTTTGAGCTCAGGGCAACATGGCCAAAACCCCCCCTCCCTAGCCCCACCCACCCCGTCCCTACCAAAAATACAAAACAGCAGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCACCTGAACCCAGGAGGCAGACATTGCAGTGAGCCAAGATCACGCCACTGCCAGCCTGGATGACAGAGCAAGACTCCACCTCAAAAAAAAACAAAAACAAAAACACAAGGTTAGAGGGACCCCCGACCTTACAGATACAAGTTTAAGAGGGACCCCAAGCAAAAAATGCCAACCTTTTTCTCCCAATCATTGAAACACCAGGAGGGTGTAACAGTTTTGCAGCCTAGCTGTAGCAGGCTGATGCCCCCAAGATGCCCCATATCCTAATCCCGGGAACTGGTGAACATGACCTTATATGGCAAAAGGAGCTTTGCAGATATAATGAAGTTAAGGGTCTTTGGCTTTTTGGGGTTGATGTACTCACTCGGATCCTTGTAAGAGCAAGAGCAGGTGATGGAGAGGGTGGGAGGTGTAGTGACAGAAGCAGGAAACTCAGTCATTCGAGACGGGCAGCACAAGCTGAGGGAGTGCAGGCCACCTCTCTGACCAGAAGTGGATTCTCCCGCAAGCCTCGGAAGCTACCGACCCTGCTCCCACCTTGACTCAGTAGGACTTACTGTAGAATTCTGGCCTTCAGACCTGTAAGGGAATACATTTGGTTGTTTTAAGTCATAGTGTGTGGTAATTTGTTGCAGCAGCCACAGGAAACTAGTATTGTAGTGAAGCCTCAAAACCCCCCTGAAGGGGCTGGGCTCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGACGTGGGTGGATCACTTGAGGTCAGGAGTTCGAGACCAGCCCAGCCAACATGGTGAAATGCCATCTATACAAAAAATACAAAAACTAGCCGGGCATGGTGGCACATGCCTGTAATCTCAGCTACTCAGGAGGCTGAGACAGGAGAATTGTTTGAACCCAGGGGGCAGAGGTTGCAGTGAACTGAGATTCCACCACTGCACTCCAGCCTGGGTGACAGAGCGACGCTCCATCTCGAAAACAAAACAAAACAAAAAACCCCACCTGAAGGTTTCCAGTTCTGCCAGCACTCTCACCCAACCCCCAGAAACAGACATTCCATTGCTGTGGGCCATGGACAGGCAGAAGGAAGCACCTCCTCATGGCAGAGGCCTGGGAAAGCAAGGGAAGGCACTACTGGGCTGGCCCCTCTCTGCAGTATATTCTTTTTTTTGAGGCCGAGTTTCACTCTCTGTCTCCCAGACTGGAGTGCAGGGGCACAATCTCGGCTCACTTCGACCTCTGCCTCCCCAGTTCAAGTGATTCTCCTGCCTCAGTCTCCTGAGTAGCTGGGATGACAGGAGTGTAGCATGCCTAGCTAATTTTTGTATTTCTAGTAGAGATGCAGTTTTGCCATGTTGCCCAGGCTGGACTCGAACTCCTTGCCTCAAGTGATCCACCTGTCTCAGCCCCGCAAAGTGCTGGTATTACAGGAGTGAGCCACTGCACCCAGCATTTGCCAGAAGACCTTTGATGGCAGGCTTTTTTCAGGTGATCAGTCCTTGTCTGGTCTGGCTCTGCCCCACTCTCCCTTCTCACCTAGTTGGAATCCCTAGCTACTTTTCAGTAGAGGGAAGTGTGTACCCCAATCCCAGCTTGGTTCAGATCTGCATTTAACTCATGGAACCTGGCTGCTCCCCAGGTTCTGAAGAAAAAAACGGTCTCTCTGTGGGTATGATAAAGGATGGGCCTGTCCCCAGGACCCTGTGAGAGGAAGCCCAATGTCCCACCAGGTTGGCAGGGCTGGGGAAGGGAAAGTGTTATGGCAGCCCCAAGAAAAAAGAGGCAGCAGAGGGAGCAGGACAGCGCTCACATGGAACTCATGCCACTGCCTGAGGGGGAGGAGTGCACGCCAGTGACGTCAGGGGGCAGAGAGGCGCAGTTCCA"
trim_offset = int(paf_fields[2])
start_index = 0
end_index = int(paf_fields[3])
# prepare some data
x = [0, 1, 2, 3, 4, 5]
Xreal = [0, 1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]
# set output to static HTML file
output_file(filename=args.output, title=read_id)
plot_title = f'{read_id}:{start_index}-{end_index}-{trim_offset}'
tools_to_show = 'hover,box_zoom,pan,save,wheel_zoom'
p = figure(title=plot_title,
x_axis_label='signal index',
y_axis_label='signal value',
sizing_mode="stretch_width",
height=300,
output_backend="webgl",
x_range=(0, 750),
tools=tools_to_show)
# tooltips=tool_tips)
# open file
s5 = pyslow5.Open(args.slow5, 'r')
read = s5.get_read(read_id, pA=True, aux=["read_number", "start_mux"])
if read is not None:
# print("read_id:", read['read_id'])
# print("len_raw_signal:", read['len_raw_signal'])
# x = list(range(1,read['len_raw_signal']+1))
# y = read['signal']
x = list(range(0, end_index - start_index))
Xreal = list(range(start_index, end_index))
y = read['signal'][start_index:end_index]
base_x = []
base_y = []
base_label = []
moves_string = paf_fields[14]
moves_string = re.sub('ss:Z:', '', moves_string)
moves_string = re.sub('D', 'D,', moves_string)
moves_string = re.sub('I', 'I,', moves_string)
# print(moves_string)
moves = re.split(r',+', moves_string)
moves = moves[:-1]
vlines = []
base_count = KMER_LENGTH - 2
location = trim_offset
previous_location = start_index
# draw moves
signal_x = []
signal_y = []
for i in moves:
previous_location = location
n_samples = 0
if 'D' in i:
i = re.sub('D', '', i)
n_samples = int(i)
prev_loc = previous_location
for j in range(0, n_samples):
base_count = base_count + 1
base = basecalled_read[base_count]
base_box = BoxAnnotation(left=prev_loc, right=prev_loc+5, fill_alpha=0.2, fill_color='white')
p.add_layout(base_box)
base_x.append(prev_loc)
base_y.append(115)
label = str(base) + "\t" + str(base_count + 1)
base_label.append(label)
prev_loc = prev_loc + 5
x_end = x[-1]
x = x + list(range(x_end, x_end+5))
location = prev_loc
z = np.concatenate((y[:previous_location], [0] * n_samples * 5), axis=0)
y = np.concatenate((z, y[previous_location:]), axis=0)
# y = y[:previous_location] + y[previous_location:]
elif 'I' in i:
i = re.sub('I', '', i)
n_samples = int(i)
location = location + n_samples
vline = Span(location=location, dimension='height', line_color='red', line_width=1)
vlines.append(vline)
else:
base_count = base_count + 1
n_samples = int(i)
location = location + n_samples
base = basecalled_read[base_count]
base_box = BoxAnnotation(left=previous_location, right=location, fill_alpha=0.2, fill_color=base_color_map[base])
p.add_layout(base_box)
vline = Span(location=location, dimension='height', line_color='red', line_width=1)
vlines.append(vline)
base_x.append(previous_location)
base_y.append(115)
label = str(base) + "\t" + str(base_count + 1)
base_label.append(label)
if base_count == BASE_LIMIT: break
p.renderers.extend(vlines)
base_annotation = ColumnDataSource(data=dict(base_x=base_x,
base_y=base_y,
base_label=base_label))
base_annotation_labels = LabelSet(x='base_x', y='base_y', text='base_label',
x_offset=5, y_offset=5, source=base_annotation, render_mode='canvas',
text_font_size="7pt")
p.add_layout(base_annotation_labels)
plot_signal_limit = location + 10
source = ColumnDataSource(data=dict(
x=x[:plot_signal_limit],
y=y[:plot_signal_limit],
xreal=Xreal[:plot_signal_limit],
))
p.line('x', 'y', line_width=2, source=source)
# add a circle renderer with a size, color, and alpha
p.circle(x[:plot_signal_limit], y[:plot_signal_limit], size=2, color="red", alpha=0.5)
# show the tooltip
hover = p.select(dict(type=HoverTool))
hover.tooltips = [("x", "@xreal"), ("y", "$y")]
hover.mode = 'mouse'
save(p)
# show(p)