Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor code to handle text detection target result states and updat… #9

Merged
merged 2 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,19 @@ There are some extra steps for installation on Windows:

### Running from source

1. Once everything is installed launch the application:
1. Compile the UI files into Python:

```shell
python main.py
```
```powershell
./scripts/compile_ui.ps1
```

1. Launch the application:

```shell
python main.py
```

2. Follow the on-screen instructions to load an image of the scoreboard and extract the text.
1. Follow the on-screen instructions to load an image of the scoreboard and extract the text.

### Build an executable

Expand Down
75 changes: 7 additions & 68 deletions camera_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import platform
import time
from PySide6.QtWidgets import (
QGraphicsView,
QGraphicsScene,
Expand All @@ -8,19 +6,22 @@
from PySide6.QtCore import Qt
from PySide6.QtGui import QImage, QPixmap, QPainter
from PySide6.QtCore import QThread, Signal

import platform
import time
import cv2
import numpy as np
import datetime
from datetime import datetime

from camera_info import CameraInfo
from ndi import NDICapture
from screen_capture_source import ScreenCapture

from storage import TextDetectionTargetMemoryStorage
from tesseract import TextDetector
import datetime
from datetime import datetime

from text_detection_target import TextDetectionTargetWithResult
from sc_logging import logger
from frame_stabilizer import FrameStabilizer


# Function to set the resolution
Expand Down Expand Up @@ -80,68 +81,6 @@ def set_camera_highest_resolution(cap):
set_resolution(cap, *highest_res)


class FrameStabilizer:
def __init__(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None
self.orb = None
self.matcher = None

def reset(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None

def stabilize_frame(self, frame_rgb):
if self.stabilizationFrame is None:
self.stabilizationFrame = frame_rgb
self.stabilizationFrameCount = 0
elif not self.stabilizationBurnInCompleted:
self.stabilizationFrameCount += 1
# add the new frame to the stabilization frame
frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
if self.stabilizationFrameCount == 10:
self.stabilizationBurnInCompleted = True
# extract ORB features from the stabilization frame
self.orb = cv2.ORB_create()
self.stabilizationKPs, self.stabilizationDesc = (
self.orb.detectAndCompute(self.stabilizationFrame, None)
)
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

if self.stabilizationBurnInCompleted:
# stabilization burn-in period is over, start stabilization
# extract features from the current frame
kps, desc = self.orb.detectAndCompute(frame_rgb, None)
# match the features
matches = self.matcher.match(self.stabilizationDesc, desc)
# sort the matches by distance
matches = sorted(matches, key=lambda x: x.distance)
# calculate an affine transform from the matched keypoints
src_pts = np.float32(
[self.stabilizationKPs[m.queryIdx].pt for m in matches]
).reshape(-1, 1, 2)
dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
-1, 1, 2
)
h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
# warp the frame
if h is not None:
frame_rgb = cv2.warpAffine(
frame_rgb,
h,
(frame_rgb.shape[1], frame_rgb.shape[0]),
flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
)

return frame_rgb


class TimerThread(QThread):
update_signal = Signal(object)
update_error = Signal(object)
Expand Down
74 changes: 74 additions & 0 deletions frame_stabilizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import cv2
import numpy as np


# This class is used to stabilize the frames of the video.
# It uses ORB features to match keypoints between frames and calculate an affine transform to
# warp the frame.
class FrameStabilizer:
def __init__(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None
self.orb = None
self.matcher = None

def reset(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None

def stabilize_frame(self, frame_rgb):
if self.stabilizationFrame is None:
self.stabilizationFrame = frame_rgb
self.stabilizationFrameCount = 0
elif not self.stabilizationBurnInCompleted:
self.stabilizationFrameCount += 1
# add the new frame to the stabilization frame
frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
if self.stabilizationFrameCount == 10:
self.stabilizationBurnInCompleted = True
# extract ORB features from the stabilization frame
self.orb = cv2.ORB_create()
self.stabilizationKPs, self.stabilizationDesc = (
self.orb.detectAndCompute(self.stabilizationFrame, None)
)
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

if (
self.stabilizationBurnInCompleted
and self.stabilizationFrame is not None
and self.orb is not None
and self.matcher is not None
and self.stabilizationKPs is not None
and self.stabilizationDesc is not None
):
# stabilization burn-in period is over, start stabilization
# extract features from the current frame
kps, desc = self.orb.detectAndCompute(frame_rgb, None)
# match the features
matches = self.matcher.match(self.stabilizationDesc, desc)
# sort the matches by distance
matches = sorted(matches, key=lambda x: x.distance)
# calculate an affine transform from the matched keypoints
src_pts = np.float32(
[self.stabilizationKPs[m.queryIdx].pt for m in matches]
).reshape(-1, 1, 2)
dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
-1, 1, 2
)
h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
# warp the frame
if h is not None:
frame_rgb = cv2.warpAffine(
frame_rgb,
h,
(frame_rgb.shape[1], frame_rgb.shape[0]),
flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
)

return frame_rgb
37 changes: 27 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def selectOutputFolder(self):
folder = QFileDialog.getExistingDirectory(
self,
"Select Output Folder",
fetch_data("scoresight.json", "output_folder"),
fetch_data("scoresight.json", "output_folder", ""),
options=QFileDialog.Option.ShowDirsOnly,
)
if folder and len(folder) > 0:
Expand Down Expand Up @@ -522,7 +522,7 @@ def vmixUiSetup(self):
if mapping:
self.vmixUpdater.set_field_mapping(mapping)

self.ui.tableView_vmixMapping.model().itemChanged.connect(
self.ui.tableView_vmixMapping.model().dataChanged.connect(
self.vmixMappingChanged
)

Expand Down Expand Up @@ -759,9 +759,9 @@ def connectObs(self):
if self.obs_connect_modal is not None:
self.obs_websocket_client = open_obs_websocket(
{
"ip": self.obs_modal_ui.obs_connect_modal.lineEdit_ip.text(),
"port": self.obs_modal_ui.obs_connect_modal.lineEdit_port.text(),
"password": self.obs_modal_ui.obs_connect_modal.lineEdit_password.text(),
"ip": self.obs_modal_ui.lineEdit_ip.text(),
"port": self.obs_modal_ui.lineEdit_port.text(),
"password": self.obs_modal_ui.lineEdit_password.text(),
}
)
else:
Expand Down Expand Up @@ -865,23 +865,26 @@ def sourceChanged(self, index):
self, "Open Video File", "", "Video Files (*.mp4 *.avi *.mov)"
)
if not file:
# no file selected - change source to "Select a source"
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
self.source_name = file
if self.source_name == "URL Source (HTTP, RTSP)":
# open a dialog to enter the url
url_dialog = QDialog()
ui_urlsource = Ui_UrlSource()
ui_urlsource.setupUi(url_dialog)

url_dialog.setWindowTitle("URL Source")
# focus on url input
ui_urlsource.lineEdit_url.setFocus()
url_dialog.exec() # wait for the dialog to close
# check if the dialog was accepted
if url_dialog.result() != QDialog.DialogCode.Accepted:
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
self.source_name = ui_urlsource.lineEdit_url.text()
if self.source_name == "":
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
if self.source_name == "Screen Capture":
# open a dialog to select the screen
Expand All @@ -898,6 +901,7 @@ def sourceChanged(self, index):
screen_dialog.exec()
# check if the dialog was accepted
if screen_dialog.result() != QDialog.DialogCode.Accepted:
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
# get the window ID from the comboBox_window
window_id = ui_screencapture.comboBox_window.currentData()
Expand Down Expand Up @@ -932,20 +936,29 @@ def sourceSelectionSucessful(self):
self.ui.frame_source_view.setEnabled(False)

if self.ui.comboBox_camera_source.currentData() == "file":
if self.source_name is None:
logger.error("No file selected")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
self.source_name,
CameraInfo.CameraType.FILE,
)
elif self.ui.comboBox_camera_source.currentData() == "url":
if self.source_name is None:
logger.error("No url entered")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
self.source_name,
CameraInfo.CameraType.URL,
)
elif self.ui.comboBox_camera_source.currentData() == "screen_capture":
if self.source_name is None:
logger.error("No screen capture selected")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
Expand Down Expand Up @@ -1056,7 +1069,8 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
if targetWithResult.result is None:
continue
if (
"skip_empty" in targetWithResult.settings
targetWithResult.settings is not None
and "skip_empty" in targetWithResult.settings
and targetWithResult.settings["skip_empty"]
and len(targetWithResult.result) == 0
):
Expand All @@ -1067,7 +1081,10 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
):
continue

if self.obs_websocket_client is not None:
if (
self.obs_websocket_client is not None
and targetWithResult.settings is not None
):
# find the source name for the target from the default boxes
update_text_source(
self.obs_websocket_client,
Expand Down Expand Up @@ -1202,12 +1219,12 @@ def removeBox(self):
self.detectionTargetsStorage.remove_item(item.text())

def createOBSScene(self):
self.ui.statusbar().showMessage("Creating OBS scene")
self.ui.statusbar.showMessage("Creating OBS scene")
# get the scene name from the lineEdit_sceneName
scene_name = self.ui.lineEdit_sceneName.text()
# clear or create a new scene
create_obs_scene_from_export(self.obs_websocket_client, scene_name)
self.ui.statusbar().showMessage("Finished creating scene")
self.ui.statusbar.showMessage("Finished creating scene")

# on destroy, close the OBS connection
def closeEvent(self, event):
Expand Down
Loading