-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheast.py
193 lines (147 loc) · 6.57 KB
/
east.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import sys
# sys.path.append("..")
import os
import time
# from concurrent import futures
from multiprocessing import Pool, cpu_count
import cv2
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
import math
import numpy as np
from skimage import feature
from scipy import spatial
import tensorflow as tf
from east_utils import locality_aware_nms as nms_locality
from east_utils import model
from pre_processing import *
from post_processing import *
from crnn import CRNN
dir_path, file_path = os.path.realpath(__file__).rsplit("\\", 1)
print(dir_path, '-->', file_path)
sys.path.insert(1, dir_path)
mother_dir, _ = os.path.realpath(dir_path).rsplit("\\", 1)
sys.path.insert(1, mother_dir)
class EAST(object):
_defaults = {
"gpu_num": 1,
"gpu_memory": 0.3,
"model_path": 'east_utils\\models\\east_icdar2015_resnet_v1_50_rbox\\',
"labels_path": 'east_utils\\config\\labels_{}.json',
"words_path": 'east_utils\\config\\words_{}.txt',
"phrases_path": 'east_utils\\config\\phrases_{}.txt',
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name `{}`".format(n)
def __init__(self, **kwargs):
print("\n\n\n[EAST] Initializing\n\n\n")
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.process = self.process.split('_')[0]
self.labels_path = self.labels_path.format(self.process)
self.words_path = self.words_path.format(self.process)
self.phrases_path = self.phrases_path.format(self.process)
# self.labels = load_dict(os.path.join(dir_path, self.labels_path))
self.words = load_list(os.path.join(dir_path, self.words_path))
self.phrases = load_list(os.path.join(dir_path, self.phrases_path))
self.total_classes = self.words+self.phrases
self.class_names = ['leftClick_'+c for c in self.total_classes]
self.f_score, self.f_geometry = self.generate()
# print(self.f_score)
# print(self.f_geometry)
self.crnn = CRNN()
# workers = cpu_count()
# self.executor = Pool(processes=workers)
print("\n\n\n[EAST] Initialized successfully\n\n\n")
def generate(self, restrict_gpu=False):
# Tell program which GPU we use
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# Reset the graph in case we have to load a model many times
tf.reset_default_graph()
with tf.get_default_graph().as_default():
# simple syntax
self.input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
f_score, f_geometry = model.model(self.input_images, is_training=False)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
if restrict_gpu:
config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory
self.sess = tf.Session(config=config)
print("\n\n\n[EAST] Session is created\n\n\n")
# Load model
ckpt_state = tf.train.get_checkpoint_state(os.path.join(dir_path, self.model_path))
model_path = os.path.join(dir_path, self.model_path, os.path.basename(ckpt_state.model_checkpoint_path))
saver.restore(self.sess, model_path)
print("\n\n\n[EAST] Model is loaded\n\n\n")
return f_score, f_geometry
def locate(self, image, use_focus=False, focus_zone=None, visual_check=False):
canvas = image.copy()
image = np.asarray(image)[:,:,:3]
image = image[:,:,::-1]
H, W = image.shape[:2]
im_resized, (ratio_h, ratio_w) = resize_image(image)
score, geometry = self.sess.run(
[self.f_score, self.f_geometry],
feed_dict={
self.input_images: [im_resized]
}
)
# print("Score:\n", score.shape)
# np.savetxt("pb_models\\east_score_ckpt.txt", score.flatten(), fmt='%1.2e')
# print("Geometry:\n", geometry.shape)
# np.savetxt("pb_models\\east_geometry_ckpt.txt", geometry.flatten(), fmt='%1.2e')
boxes = merge_boxes(score_map=score, geo_map=geometry)
image = image[:,:,::-1]
if boxes is None:
return {}, {}
else:
boxes = boxes[:,:8].reshape((-1, 4, 2))
boxes[:,:,0] /= ratio_w
boxes[:,:,1] /= ratio_h
# centers = {}
# confidences = {}
temp_labels = []
temp_positions = []
temp_confidences = []
for i, box in enumerate(boxes):
# top left, top right, bottom right, bottom left
box = sort_poly(box.astype(np.int32))
# get bigger RoI for better recognition
top = max(0, box[0,1]-5)
left = max(0, box[0,0]-5)
right = min(W, box[2,0]+5)
bottom = min(H, box[2,1]+5)
if use_focus:
if top < focus_zone[0]-3 or bottom > focus_zone[2]+3 \
or left < focus_zone[1]-7 or right > focus_zone[3]+7:
continue
mini_box = np.copy(image[top:bottom,
left:right])
mini_box = Image.fromarray(mini_box).convert('L')
# Run CRNN
name, score = self.crnn.eval(mini_box)
# push into list and wait for processing later.
temp_labels.append(name)
temp_positions.append([left, top, right, bottom])
temp_confidences.append(float(score))
if visual_check:
drawer = ImageDraw.Draw(canvas)
for bb in temp_positions:
drawer.rectangle(bb, outline='red')
plt.imshow(canvas)
plt.show()
# Post-processing
bboxes, confidences = postprocess_special_cases(
temp_labels, temp_positions, temp_confidences,
self.total_classes
)
return bboxes, confidences
def close_session(self):
self.sess.close()