-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdemo.py
More file actions
301 lines (261 loc) · 13.5 KB
/
demo.py
File metadata and controls
301 lines (261 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#!/usr/bin/python3
from PyQt5.QtGui import QImage, QPixmap
from PyQt5.QtWidgets import (QApplication, QLabel, QLineEdit, QGridLayout,
QHBoxLayout, QVBoxLayout, QWidget)
from typing import List, Dict, Tuple, Optional, Any, Union
import cv2
import numpy as np
import tensorflow as tf
from functools import partial
from picamera2 import CompletedRequest, MappedArray, Picamera2
from picamera2.job import Job
from picamera2.previews.qt import QGlPicamera2
from picamera2.devices.hailo import Hailo
from face_utils import SCRFDPostProc as PostProc
DETECTOR_NETWORK = "scrfd_2.5g_8l.hef"
RECOGNISER_NETWORK = "arcface_mobilefacenet_8l.hef"
REGISTER_FRAMES = 30 # Number of frames to capture when registering a face
MIN_SIZE = 64 # Minimum usable face size, in pixels.
SIMILARITY_THRESHOLD = 0.75 # Cosine similarity threshold for face comparisons
NUM_FACES = 6
INSTRUCTIONS = """
Enter name, stare straight at the camera and
press Enter to register. Wiggle your head
slightly for 2 seconds!
Delete name and press Enter to remove."""
class FaceApp(QWidget):
"""A PyQt-based face recognition application.
This application provides a GUI for face detection and recognition using:
- A face detection model (SCRFD) to locate faces in camera frames
- A face recognition model (ArcFace) to generate face embeddings
- A simple cosine similarity-based matching system
The application allows users to:
- Register new faces by providing a name and capturing multiple frames
- Recognize registered faces in real-time
- Remove registered faces
"""
RED = (0, 0, 255)
GREEN = (0, 255, 0)
MAGENTA = (255, 0, 255)
YELLOW = (0, 255, 255)
CYAN = (255, 255, 0)
# Reference points for face alignment:
ARCFACE_POINTS = np.float32([[38.3, 51.7], [73.5, 51.5], [41.5, 92.4], [70.7, 92.2]])
def __init__(self) -> None:
"""Initialize the face recognition application.
This method:
1. Sets up the face detection and recognition models
2. Configures the camera for video capture
3. Creates the GUI layout with registration controls
4. Initializes state variables for face tracking
"""
super().__init__()
# Load the recogniser network, which generates the feature vector for a face.
self.hailo_recog = Hailo(RECOGNISER_NETWORK)
self.recog_model_h, self.recog_model_w, _ = self.hailo_recog.get_input_shape()
print("Recognition model size", self.recog_model_w, "x", self.recog_model_h)
# Load the detector network, which detects faces in the camera image.
self.hailo_detect = Hailo(DETECTOR_NETWORK)
self.detect_model_h, self.detect_model_w, _ = self.hailo_detect.get_input_shape()
print("Detect model size", self.detect_model_w, "x", self.detect_model_h)
anchors = {'steps': (8, 16, 32), 'min_sizes': ((16, 32), (64, 128), (256, 512))}
self.detect_post_processor = PostProc(image_dims=(self.detect_model_h, self.detect_model_w),
anchors=anchors)
self.video_w, self.video_h = self.detect_model_w, self.detect_model_h
# Configure camera. Pass the "main" image to the network and the "lores" is for display.
self.picam2 = Picamera2()
main = {'size': (self.detect_model_w, self.detect_model_h), 'format': 'RGB888'}
lores = {'size': (self.video_w, self.video_h), 'format': 'XRGB8888'}
half_res = [d // 2 for d in self.picam2.sensor_resolution]
sensor = {'output_size': half_res, 'bit_depth': 10} # use 2x2 binned mode
controls = {'FrameRate': 15}
config = self.picam2.create_preview_configuration(main, lores=lores, sensor=sensor,
controls=controls, display='lores')
self.picam2.configure(config)
# Set up the GUI stuff.
self.qpicamera2 = QGlPicamera2(self.picam2, width=self.video_w, height=self.video_h, keep_ar=False)
self.qpicamera2.done_signal.connect(self.capture_done)
layout_h = QHBoxLayout()
layout_h.addWidget(self.qpicamera2, 70)
layout_v = QVBoxLayout()
self.registered_faces = []
grid_layout = QGridLayout()
for i in range(NUM_FACES):
label = QLabel(f"Person {i}:")
grid_layout.addWidget(label, i, 0)
textbox = QLineEdit()
textbox.returnPressed.connect(partial(self.enter_pressed, i))
grid_layout.addWidget(textbox, i, 1)
image = QLabel()
image.setFixedSize(64, 64)
grid_layout.addWidget(image, i, 2)
self.registered_faces.append({"textbox": textbox, "name": "", "image": image, "vectors": []})
layout_v.addLayout(grid_layout, 5)
instructions = QLabel(INSTRUCTIONS)
layout_v.addWidget(instructions)
layout_h.addLayout(layout_v)
self.setWindowTitle("Face Recogniser")
self.resize(self.video_w + 320, self.video_h)
self.setLayout(layout_h)
self.registering = None
self.register_frames = 0
self.draw_rects = []
self.draw_points = []
self.draw_colour = self.RED
self.draw_face = None
self.draw_name = None
# Finally start the camera.
self.picam2.start()
self.picam2.pre_callback = self.draw_callback
self.picam2.capture_request(signal_function=self.qpicamera2.signal_done)
def enter_pressed(self, i: int) -> None:
"""Handle the Enter key press in a registration textbox.
This method is called when the user presses Enter in one of the registration textboxes.
It either:
- Starts the registration process for a new face if a name is provided
- Removes a registered face if the name is empty
Args:
i (int): Index of the registration textbox that was activated
"""
name = self.registered_faces[i]["textbox"].text()
self.registered_faces[i]["name"] = name
if name == "":
self.registered_faces[i]["vectors"] = []
self.registered_faces[i]["image"].clear()
else:
print(f"Registering {name}...")
self.registered_faces[i]["textbox"].setEnabled(False)
self.registering = i
self.register_frames = REGISTER_FRAMES
def draw_callback(self, request: CompletedRequest) -> None:
"""Draw annotations on the camera preview. Called automatically by the camera.
This method is called before each frame is displayed and draws:
- Bounding boxes around detected faces
- Facial landmarks
- The current face being analysed (if any)
- The name of the recognized person (if any)
Args:
request (CompletedRequest): The camera request containing the frame to draw on
"""
with MappedArray(request, 'lores') as m:
for rect in self.draw_rects:
cv2.rectangle(m.array, rect[:2], rect[2:], self.draw_colour, 2)
for point in self.draw_points:
cv2.circle(m.array, point, 3, self.draw_colour, -1)
if self.draw_face is not None:
h, w, _ = self.draw_face.shape
m.array[:h, :w, :3] = self.draw_face
m.array[h:h+1, :w, :] = 0
m.array[:h, w:w+1, :] = 0
if self.draw_name:
(text_width, text_height), baseline = cv2.getTextSize(
self.draw_name, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
text_x = m.array.shape[1] - text_width - 5
text_y = text_height + 5
rectangle = m.array[text_y - text_height:text_y + baseline, text_x: text_x + text_width]
rectangle[...] = rectangle // 2
cv2.putText(m.array, self.draw_name, (text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
def capture_done(self, job: Job) -> None:
"""Process a captured camera frame. Called in response to a signal from capture_request().
This method is called when a new frame is captured and:
1. Runs face detection on the frame
2. If a face is found:
- Aligns and crops the face
- Either registers it or tries to recognize it
- Updates the display annotations
3. Queues the next frame capture
Args:
job (Job): The completed camera capture job
"""
# First get the image to pass to the detection network, and queue a request for the next frame.
request = job.get_result()
frame = request.make_array('main')
request.release()
self.picam2.capture_request(signal_function=self.qpicamera2.signal_done)
# Run the detection network and post-processing, and unpack the results.
results = self.hailo_detect.run(frame)
post_proc_results = self.detect_post_processor.tf_postproc(results)
detection_boxes = post_proc_results['detection_boxes'][0]
num_detections = post_proc_results['num_detections'][0]
face_landmarks = post_proc_results['face_landmarks'][0]
n = int(num_detections.numpy())
self.draw_rects = []
self.draw_points = []
self.draw_face = None
self.draw_name = None
if n:
# Find the single largest face of any that were found, and the associated landmarks.
def rect_area(r: List[float]) -> float:
"""Calculate the area of a bounding box."""
x0, y0, x1, y1 = r
return (x1 - x0) * (y1 - y0)
areas = [rect_area(r) for r in detection_boxes[:n]]
index = np.array(areas).argmax()
rect = detection_boxes.numpy()[index].tolist()
landmarks = face_landmarks.numpy()[index].reshape((-1, 2)).tolist()
# Convert to image coordinates.
rect = [max(0, int(v * s)) for v, s in zip(rect, (self.detect_model_w, self.detect_model_h) * 2)]
landmarks = [[int(v * s) for v, s in zip(landmark, (self.detect_model_w, self.detect_model_h))]
for landmark in landmarks]
# If the face was sufficiently large, crop and resize it for passing to the recogniser network.
colour = self.RED
face = None
name = None
if all([p1 > p0 + MIN_SIZE for p0, p1 in zip(rect[:2], rect[2:])]):
# Get an aligned and cropped version of the face.
face = self.create_crop(frame, rect, landmarks)
raw_embeddings = self.hailo_recog.run(face)
embeddings = tf.nn.l2_normalize(raw_embeddings).numpy()
if self.registering is not None:
# Add this embedding vector to the set that we have registered for this face.
self.registered_faces[self.registering]["vectors"].append(embeddings)
self.register_frames -= 1
if self.register_frames == 0:
print("Registering done!")
self.registered_faces[self.registering]["textbox"].setEnabled(True)
small = cv2.resize(face, dsize=(64, 64))
img = QImage(small.data, small.shape[1], small.shape[0], QImage.Format_BGR888)
pixmap = QPixmap.fromImage(img)
self.registered_faces[self.registering]["image"].setPixmap(pixmap)
self.registering = None
# Wink the bounding box magenta/yellow/cyan while registering.
colour = (self.MAGENTA, self.YELLOW, self.CYAN)[(self.register_frames // 2) % 3]
else:
# If any of the cosine similarities exceed the threshold, that will do.
embeddings = embeddings.T
scores = [sum([reg @ embeddings > SIMILARITY_THRESHOLD
for reg in entry["vectors"]]) for entry in self.registered_faces]
best = np.argmax(scores)
score = scores[best]
name = self.registered_faces[best]["name"] if score > 0 else None
if name:
colour = self.GREEN
# Update what we draw.
self.draw_colour = colour
self.draw_rects = [rect]
self.draw_points = landmarks
self.draw_face = face
self.draw_name = name
def create_crop(self, frame: np.ndarray, bbox: List[int], landmarks: List[List[int]]) -> np.ndarray:
"""Create an aligned and cropped face image.
This method uses an affine transform to align the face based on eye and mouth positions,
then crops it to the size expected by the recognition model.
Args:
frame (np.ndarray): The full camera frame
bbox (List[int]): Bounding box coordinates [x1, y1, x2, y2]
landmarks (List[List[int]]): Facial landmark coordinates
Returns:
np.ndarray: Aligned and cropped face image
"""
l_eye, r_eye, nose, l_mouth, r_mouth = np.float32(landmarks)
landmarks = [l_eye, r_eye, (l_mouth + r_mouth) / 2]
l_eye, r_eye, l_mouth, r_mouth = self.ARCFACE_POINTS
ref_points = [l_eye, r_eye, (l_mouth + r_mouth) / 2]
M = cv2.getAffineTransform(np.float32(landmarks), np.float32(ref_points))
return cv2.warpAffine(frame, M, (self.recog_model_w, self.recog_model_h))
if __name__ == "__main__":
app = QApplication([])
window = FaceApp()
window.show()
app.exec()