forked from ibaiGorordo/ONNX-YOLOv7-Object-Detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyolov7.py
249 lines (181 loc) · 10.1 KB
/
yolov7.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import time, cv2, onnxruntime, requests, base64, numpy as np
from imread_from_url import imread_from_url
class yolov7:
def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
self.conf_threshold = conf_thres
self.iou_threshold = iou_thres
self.class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3))
# Initialize model
self.initialize_model(path)
def __call__(self, image):
if type(image) == str and (image.startswith("http://") or image.startswith("https://")): # assumes that it's an image url
image = imread_from_url(image)
elif type(image) == str and image.startswith("/9j/"): # assumes that it's a base64 encoded image
im_bytes = base64.b64decode(image)
im_arr = np.frombuffer(im_bytes, dtype=np.uint8)
image = cv2.imdecode(im_arr, flags=cv2.IMREAD_COLOR)
elif type(image) == str and image.startswith("data:image/") and "/9j/" in image: # assumes that it's a base64 encoded image (with format attached)
image = image.split(",")[1]
# after this line, we're assuming it's a base64 encoded image
im_bytes = base64.b64decode(image)
im_arr = np.frombuffer(im_bytes, dtype=np.uint8)
image = cv2.imdecode(im_arr, flags=cv2.IMREAD_COLOR)
elif type(image) == str: # assumes that it's a local path
image = cv2.imread(image)
self.current_image = image
self.current_image = cv2.cvtColor(self.current_image, cv2.COLOR_BGR2RGB)
return self.detect_objects(image)
def initialize_model(self, path):
self.session = onnxruntime.InferenceSession(path,
providers=['CUDAExecutionProvider',
'CPUExecutionProvider'])
# Get model info
self.get_input_details()
self.get_output_details()
def detect_objects(self, image):
input_tensor = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
# Process output data
self.boxes, self.scores, self.class_ids = self.process_output(outputs)
return self.boxes, self.scores, self.class_ids
def prepare_input(self, image):
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize input image
input_img = cv2.resize(input_img, (self.input_width, self.input_height))
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor
def inference(self, input_tensor):
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
return outputs
def process_output(self, output):
predictions = np.squeeze(output[0])
# Filter out object confidence scores below threshold
obj_conf = predictions[:, 4]
predictions = predictions[obj_conf > self.conf_threshold]
obj_conf = obj_conf[obj_conf > self.conf_threshold]
# Multiply class confidence with bounding box confidence
predictions[:, 5:] *= obj_conf[:, np.newaxis]
# Get the scores
scores = np.max(predictions[:, 5:], axis=1)
# Filter out the objects with a low score
predictions = predictions[scores > self.conf_threshold]
scores = scores[scores > self.conf_threshold]
if len(scores) == 0:
return [], [], []
# Get the class with the highest confidence
class_ids = np.argmax(predictions[:, 5:], axis=1)
# Get bounding boxes for each object
boxes = self.extract_boxes(predictions)
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = self.nms(boxes, scores, self.iou_threshold)
return boxes[indices], scores[indices], class_ids[indices]
def extract_boxes(self, predictions):
# Extract boxes from predictions
boxes = predictions[:, :4]
# Scale boxes to original image dimensions
boxes = self.rescale_boxes(boxes)
# Convert boxes to xyxy format
boxes = self.xywh2xyxy(boxes)
return boxes
def rescale_boxes(self, boxes):
# Rescale boxes to original image dimensions
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
if type(image) == str:
image = cv2.imread(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
def _draw_detections(image, boxes, scores, class_ids, mask_alpha=0.1):
mask_img = image.copy()
det_img = image.copy()
img_height, img_width = image.shape[:2]
size = min([img_height, img_width]) * 0.0015
text_thickness = int(min([img_height, img_width]) * 0.001)
# Draw bounding boxes and labels of detections
for box, score, class_id in zip(boxes, scores, class_ids):
color = self.colors[class_id]
x1, y1, x2, y2 = box.astype(int)
# Draw rectangle
cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)
# Draw fill rectangle in mask image
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
label = self.class_names[class_id]
caption = f'{label} {int(score * 100)}%'
(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=size, thickness=text_thickness)
th = int(th * 1.2)
cv2.rectangle(det_img, (x1, y1),
(x1 + tw, y1 - th), color, -1)
cv2.rectangle(mask_img, (x1, y1),
(x1 + tw, y1 - th), color, -1)
cv2.putText(det_img, caption, (x1, y1),
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
cv2.putText(mask_img, caption, (x1, y1),
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)
return _draw_detections(image, self.boxes, self.scores, self.class_ids, mask_alpha)
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[0].shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
def nms(self, boxes, scores, iou_threshold):
# Sort by score
sorted_indices = np.argsort(scores)[::-1]
keep_boxes = []
while sorted_indices.size > 0:
# Pick the last box
box_id = sorted_indices[0]
keep_boxes.append(box_id)
# Compute IoU of the picked box with the rest
ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
# Remove boxes with IoU over the threshold
keep_indices = np.where(ious < iou_threshold)[0]
# print(keep_indices.shape, sorted_indices.shape)
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
def compute_iou(self, box, boxes):
# Compute xmin, ymin, xmax, ymax for both boxes
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
# Compute intersection area
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
# Compute union area
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
union_area = box_area + boxes_area - intersection_area
# Compute IoU
iou = intersection_area / union_area
return iou
def xywh2xyxy(self, x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y