From 93db5c612f735757c3f58ed6dcc318862c8683f6 Mon Sep 17 00:00:00 2001 From: Mehmet Emin Aruk Date: Fri, 19 Sep 2025 23:26:42 +0300 Subject: [PATCH 1/2] feat(time-in-zone): add responsive windows size on draw_zones.py & video output saving on ultralytics_file_example.py --- examples/time_in_zone/scripts/draw_zones.py | 88 ++++++++++++++++++- .../time_in_zone/ultralytics_file_example.py | 45 ++++++++++ 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/examples/time_in_zone/scripts/draw_zones.py b/examples/time_in_zone/scripts/draw_zones.py index 74508ccd1..cf6e3c6d5 100644 --- a/examples/time_in_zone/scripts/draw_zones.py +++ b/examples/time_in_zone/scripts/draw_zones.py @@ -30,11 +30,40 @@ def resolve_source(source_path: str) -> np.ndarray | None: image = cv2.imread(source_path) if image is not None: - return image + return resize_to_fit_screen(image) frame_generator = sv.get_video_frames_generator(source_path=source_path) frame = next(frame_generator) - return frame + return resize_to_fit_screen(frame) + + +def resize_to_fit_screen(image: np.ndarray, max_width: int = 1200, max_height: int = 800) -> np.ndarray: + """ + Resize image to fit screen while maintaining aspect ratio. + + Args: + image: Input image + max_width: Maximum width for display + max_height: Maximum height for display + + Returns: + Resized image + """ + height, width = image.shape[:2] + + # Calculate scaling factor + scale_w = max_width / width + scale_h = max_height / height + scale = min(scale_w, scale_h, 1.0) # Don't upscale if image is smaller + + if scale < 1.0: + new_width = int(width * scale) + new_height = int(height * scale) + resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) + print(f"Video resolution resized from {width}x{height} -> {new_width}x{new_height}") + return resized + + return image def mouse_event(event: int, x: int, y: int, flags: int, param: Any) -> None: @@ -119,8 +148,44 @@ def redraw_polygons(image: np.ndarray) -> None: ) -def save_polygons_to_json(polygons, target_path): +def convert_coordinates_to_original(polygons, original_size, display_size): + """ + Convert coordinates from display size back to original video size. + + Args: + polygons: List of polygons with display coordinates + original_size: (width, height) of original video + display_size: (width, height) of display window + + Returns: + List of polygons with original coordinates + """ + orig_w, orig_h = original_size + disp_w, disp_h = display_size + + scale_x = orig_w / disp_w + scale_y = orig_h / disp_h + + converted_polygons = [] + for polygon in polygons: + if polygon: # Skip empty polygons + converted_polygon = [] + for x, y in polygon: + orig_x = int(x * scale_x) + orig_y = int(y * scale_y) + converted_polygon.append([orig_x, orig_y]) + converted_polygons.append(converted_polygon) + + return converted_polygons + + +def save_polygons_to_json(polygons, target_path, original_size=None, display_size=None): data_to_save = polygons if polygons[-1] else polygons[:-1] + + # Convert coordinates back to original size if needed + if original_size and display_size: + data_to_save = convert_coordinates_to_original(data_to_save, original_size, display_size) + with open(target_path, "w") as f: json.dump(data_to_save, f) @@ -132,6 +197,18 @@ def main(source_path: str, zone_configuration_path: str) -> None: print("Failed to load source image.") return + # Get original video dimensions + cap = cv2.VideoCapture(source_path) + original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + + # Get display dimensions + display_height, display_width = original_image.shape[:2] + + print(f"Original video size: {original_width}x{original_height}") + print(f"Display size: {display_width}x{display_height}") + image = original_image.copy() cv2.imshow(WINDOW_NAME, image) cv2.setMouseCallback(WINDOW_NAME, mouse_event, image) @@ -144,8 +221,11 @@ def main(source_path: str, zone_configuration_path: str) -> None: POLYGONS[-1] = [] current_mouse_position = None elif key == KEY_SAVE: - save_polygons_to_json(POLYGONS, zone_configuration_path) + save_polygons_to_json(POLYGONS, zone_configuration_path, + (original_width, original_height), + (display_width, display_height)) print(f"Polygons saved to {zone_configuration_path}") + print("Coordinates converted to original video size.") break redraw(image, original_image) if key == KEY_QUIT: diff --git a/examples/time_in_zone/ultralytics_file_example.py b/examples/time_in_zone/ultralytics_file_example.py index 1b83108df..c0ecf41a4 100644 --- a/examples/time_in_zone/ultralytics_file_example.py +++ b/examples/time_in_zone/ultralytics_file_example.py @@ -23,6 +23,7 @@ def main( confidence: float, iou: float, classes: list[int], + output_video_path: str = None, ) -> None: model = YOLO(weights) tracker = sv.ByteTrack(minimum_matching_threshold=0.5) @@ -38,6 +39,32 @@ def main( for polygon in polygons ] timers = [FPSBasedTimer(video_info.fps) for _ in zones] + + # Video writer setup + video_writer = None + if output_video_path: + # Use Twitter-compatible codec - try H264 first, then XVID + try: + fourcc = cv2.VideoWriter_fourcc(*'H264') + video_writer = cv2.VideoWriter( + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height) + ) + print(f"Video output being saved: {output_video_path}") + print("Using Twitter-compatible H.264 codec") + except: + # H264 desteklenmiyorsa XVID kullan + fourcc = cv2.VideoWriter_fourcc(*'XVID') + video_writer = cv2.VideoWriter( + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height) + ) + print(f"Video output being saved: {output_video_path}") + print("Using XVID codec (convert with FFmpeg for Twitter)") for frame in frames_generator: results = model(frame, verbose=False, device=device, conf=confidence)[0] @@ -74,8 +101,19 @@ def main( ) cv2.imshow("Processed Video", annotated_frame) + + # Save frame to output video if writer is available + if video_writer is not None: + video_writer.write(annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): break + + # Cleanup + if video_writer is not None: + video_writer.release() + print(f"Video successfully saved: {output_video_path}") + cv2.destroyAllWindows() @@ -126,6 +164,12 @@ def main( default=[], help="List of class IDs to track. If empty, all classes are tracked.", ) + parser.add_argument( + "--output_video_path", + type=str, + default=None, + help="Path to save the output video. If not provided, video will only be displayed.", + ) args = parser.parse_args() main( @@ -136,4 +180,5 @@ def main( confidence=args.confidence_threshold, iou=args.iou_threshold, classes=args.classes, + output_video_path=args.output_video_path, ) From 3b0bcab2ad12fd45cb9e78c4ee71cb07fc2562db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Sep 2025 20:36:10 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/time_in_zone/scripts/draw_zones.py | 53 +++++++++++-------- .../time_in_zone/ultralytics_file_example.py | 30 +++++------ 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/examples/time_in_zone/scripts/draw_zones.py b/examples/time_in_zone/scripts/draw_zones.py index cf6e3c6d5..37d5149b8 100644 --- a/examples/time_in_zone/scripts/draw_zones.py +++ b/examples/time_in_zone/scripts/draw_zones.py @@ -37,32 +37,38 @@ def resolve_source(source_path: str) -> np.ndarray | None: return resize_to_fit_screen(frame) -def resize_to_fit_screen(image: np.ndarray, max_width: int = 1200, max_height: int = 800) -> np.ndarray: +def resize_to_fit_screen( + image: np.ndarray, max_width: int = 1200, max_height: int = 800 +) -> np.ndarray: """ Resize image to fit screen while maintaining aspect ratio. - + Args: image: Input image max_width: Maximum width for display max_height: Maximum height for display - + Returns: Resized image """ height, width = image.shape[:2] - + # Calculate scaling factor scale_w = max_width / width scale_h = max_height / height scale = min(scale_w, scale_h, 1.0) # Don't upscale if image is smaller - + if scale < 1.0: new_width = int(width * scale) new_height = int(height * scale) - resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) - print(f"Video resolution resized from {width}x{height} -> {new_width}x{new_height}") + resized = cv2.resize( + image, (new_width, new_height), interpolation=cv2.INTER_AREA + ) + print( + f"Video resolution resized from {width}x{height} -> {new_width}x{new_height}" + ) return resized - + return image @@ -151,21 +157,21 @@ def redraw_polygons(image: np.ndarray) -> None: def convert_coordinates_to_original(polygons, original_size, display_size): """ Convert coordinates from display size back to original video size. - + Args: polygons: List of polygons with display coordinates original_size: (width, height) of original video display_size: (width, height) of display window - + Returns: List of polygons with original coordinates """ orig_w, orig_h = original_size disp_w, disp_h = display_size - + scale_x = orig_w / disp_w scale_y = orig_h / disp_h - + converted_polygons = [] for polygon in polygons: if polygon: # Skip empty polygons @@ -175,17 +181,19 @@ def convert_coordinates_to_original(polygons, original_size, display_size): orig_y = int(y * scale_y) converted_polygon.append([orig_x, orig_y]) converted_polygons.append(converted_polygon) - + return converted_polygons def save_polygons_to_json(polygons, target_path, original_size=None, display_size=None): data_to_save = polygons if polygons[-1] else polygons[:-1] - + # Convert coordinates back to original size if needed if original_size and display_size: - data_to_save = convert_coordinates_to_original(data_to_save, original_size, display_size) - + data_to_save = convert_coordinates_to_original( + data_to_save, original_size, display_size + ) + with open(target_path, "w") as f: json.dump(data_to_save, f) @@ -202,10 +210,10 @@ def main(source_path: str, zone_configuration_path: str) -> None: original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() - + # Get display dimensions display_height, display_width = original_image.shape[:2] - + print(f"Original video size: {original_width}x{original_height}") print(f"Display size: {display_width}x{display_height}") @@ -221,9 +229,12 @@ def main(source_path: str, zone_configuration_path: str) -> None: POLYGONS[-1] = [] current_mouse_position = None elif key == KEY_SAVE: - save_polygons_to_json(POLYGONS, zone_configuration_path, - (original_width, original_height), - (display_width, display_height)) + save_polygons_to_json( + POLYGONS, + zone_configuration_path, + (original_width, original_height), + (display_width, display_height), + ) print(f"Polygons saved to {zone_configuration_path}") print("Coordinates converted to original video size.") break diff --git a/examples/time_in_zone/ultralytics_file_example.py b/examples/time_in_zone/ultralytics_file_example.py index c0ecf41a4..7ee450928 100644 --- a/examples/time_in_zone/ultralytics_file_example.py +++ b/examples/time_in_zone/ultralytics_file_example.py @@ -39,29 +39,29 @@ def main( for polygon in polygons ] timers = [FPSBasedTimer(video_info.fps) for _ in zones] - + # Video writer setup video_writer = None if output_video_path: # Use Twitter-compatible codec - try H264 first, then XVID try: - fourcc = cv2.VideoWriter_fourcc(*'H264') + fourcc = cv2.VideoWriter_fourcc(*"H264") video_writer = cv2.VideoWriter( - output_video_path, - fourcc, - video_info.fps, - (video_info.width, video_info.height) + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height), ) print(f"Video output being saved: {output_video_path}") print("Using Twitter-compatible H.264 codec") except: # H264 desteklenmiyorsa XVID kullan - fourcc = cv2.VideoWriter_fourcc(*'XVID') + fourcc = cv2.VideoWriter_fourcc(*"XVID") video_writer = cv2.VideoWriter( - output_video_path, - fourcc, - video_info.fps, - (video_info.width, video_info.height) + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height), ) print(f"Video output being saved: {output_video_path}") print("Using XVID codec (convert with FFmpeg for Twitter)") @@ -101,19 +101,19 @@ def main( ) cv2.imshow("Processed Video", annotated_frame) - + # Save frame to output video if writer is available if video_writer is not None: video_writer.write(annotated_frame) - + if cv2.waitKey(1) & 0xFF == ord("q"): break - + # Cleanup if video_writer is not None: video_writer.release() print(f"Video successfully saved: {output_video_path}") - + cv2.destroyAllWindows()