Add example scripts for image, video, and webcam detection using YOLO-NAS

Gabriellgpc · Gabriellgpc · commit b1a85a45eb47 · 2026-02-05T19:43:55.000-04:00
diff --git a/examples/detect_image.py b/examples/detect_image.py
@@ -0,0 +1,44 @@
+"""Example: Run YOLO-NAS inference on a single image.
+
+Usage:
+    python examples/detect_image.py path/to/image.jpg
+    python examples/detect_image.py path/to/image.jpg --model yolo_nas_l --device cpu
+"""
+
+import argparse
+
+from modern_yolonas.inference.detect import Detector
+
+
+def main():
+    parser = argparse.ArgumentParser(description="YOLO-NAS image detection")
+    parser.add_argument("image", help="Path to input image")
+    parser.add_argument("--model", default="yolo_nas_s", choices=["yolo_nas_s", "yolo_nas_m", "yolo_nas_l"])
+    parser.add_argument("--device", default="cuda")
+    parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
+    parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
+    parser.add_argument("--output", default="output.jpg", help="Output image path")
+    args = parser.parse_args()
+
+    # Create detector (downloads pretrained weights on first run)
+    det = Detector(args.model, device=args.device, conf_threshold=args.conf, iou_threshold=args.iou)
+
+    # Run detection
+    result = det(args.image)
+
+    # Print results
+    print(f"Found {len(result.boxes)} objects:")
+    from modern_yolonas.inference.visualize import COCO_NAMES
+
+    for box, score, cls_id in zip(result.boxes, result.scores, result.class_ids):
+        name = COCO_NAMES[int(cls_id)]
+        x1, y1, x2, y2 = box
+        print(f"  {name}: {score:.2f} [{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")
+
+    # Save annotated image
+    result.save(args.output)
+    print(f"Saved to {args.output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/detect_video.py b/examples/detect_video.py
@@ -0,0 +1,59 @@
+"""Example: Run YOLO-NAS inference on a video file.
+
+Usage:
+    python examples/detect_video.py path/to/video.mp4
+    python examples/detect_video.py path/to/video.mp4 --output output.mp4 --model yolo_nas_l
+"""
+
+import argparse
+from pathlib import Path
+
+from modern_yolonas.inference.detect import Detector
+
+
+def main():
+    parser = argparse.ArgumentParser(description="YOLO-NAS video detection")
+    parser.add_argument("video", help="Path to input video")
+    parser.add_argument("--model", default="yolo_nas_s", choices=["yolo_nas_s", "yolo_nas_m", "yolo_nas_l"])
+    parser.add_argument("--device", default="cuda")
+    parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
+    parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
+    parser.add_argument("--output", default=None, help="Output video path (default: <input>_detect.<ext>)")
+    parser.add_argument("--skip-frames", type=int, default=0, help="Process every N-th frame (0 = all)")
+    parser.add_argument("--codec", default="mp4v", help="Output video codec")
+    args = parser.parse_args()
+
+    # Default output path
+    if args.output is None:
+        src = Path(args.video)
+        args.output = str(src.parent / f"{src.stem}_detect{src.suffix}")
+
+    # Create detector
+    det = Detector(args.model, device=args.device, conf_threshold=args.conf, iou_threshold=args.iou)
+
+    # --- Option 1: Write annotated video directly ---
+    print(f"Processing {args.video} ...")
+    stats = det.detect_video_to_file(
+        source=args.video,
+        output=args.output,
+        codec=args.codec,
+        skip_frames=args.skip_frames,
+    )
+    print(
+        f"Done! {stats['total_frames']} frames, "
+        f"{stats['processed_frames']} processed, "
+        f"{stats['total_detections']} total detections"
+    )
+    print(f"Saved to {args.output}")
+
+    # --- Option 2: Iterate frames with a generator (commented out) ---
+    # This is useful when you need custom per-frame logic:
+    #
+    # for frame_idx, result in det.detect_video(args.video):
+    #     print(f"Frame {frame_idx}: {len(result.boxes)} detections")
+    #     # Access result.boxes, result.scores, result.class_ids
+    #     # Or get annotated frame: annotated = result.visualize()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/detect_webcam.py b/examples/detect_webcam.py
@@ -0,0 +1,36 @@
+"""Example: Run YOLO-NAS live on webcam feed.
+
+Usage:
+    python examples/detect_webcam.py
+    python examples/detect_webcam.py --model yolo_nas_m --device cuda
+"""
+
+import argparse
+
+import cv2
+
+from modern_yolonas.inference.detect import Detector
+
+
+def main():
+    parser = argparse.ArgumentParser(description="YOLO-NAS webcam detection")
+    parser.add_argument("--model", default="yolo_nas_s", choices=["yolo_nas_s", "yolo_nas_m", "yolo_nas_l"])
+    parser.add_argument("--device", default="cuda")
+    parser.add_argument("--conf", type=float, default=0.25)
+    parser.add_argument("--camera", type=int, default=0, help="Camera index")
+    args = parser.parse_args()
+
+    det = Detector(args.model, device=args.device, conf_threshold=args.conf)
+
+    print("Press 'q' to quit")
+    for frame_idx, result in det.detect_video(source=args.camera):
+        annotated = result.visualize()
+        cv2.imshow("YOLO-NAS", annotated)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/modern_yolonas/__init__.py b/src/modern_yolonas/__init__.py
@@ -3,6 +3,7 @@
 from modern_yolonas._version import __version__
 from modern_yolonas.model import YoloNAS
 from modern_yolonas.weights import load_pretrained
+from modern_yolonas.inference.detect import Detector, Detection
 
 
 def yolo_nas_s(pretrained: bool = False, num_classes: int = 80) -> YoloNAS:
@@ -36,4 +37,6 @@ def yolo_nas_l(pretrained: bool = False, num_classes: int = 80) -> YoloNAS:
     "yolo_nas_m",
     "yolo_nas_l",
     "load_pretrained",
+    "Detector",
+    "Detection",
 ]
diff --git a/src/modern_yolonas/cli/detect_cmd.py b/src/modern_yolonas/cli/detect_cmd.py
@@ -6,6 +6,9 @@
 
 import click
 
+IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv", ".wmv", ".m4v"}
+
 
 @click.command()
 @click.option("--model", default="yolo_nas_s", type=click.Choice(["yolo_nas_s", "yolo_nas_m", "yolo_nas_l"]))
@@ -15,7 +18,19 @@
 @click.option("--device", default="cuda", help="Device (cuda or cpu).")
 @click.option("--output", default="results", help="Output directory.")
 @click.option("--input-size", default=640, help="Model input size.")
-def detect(model: str, source: str, conf: float, iou: float, device: str, output: str, input_size: int):
+@click.option("--skip-frames", default=0, help="Process every N-th frame for video (0 = every frame).")
+@click.option("--codec", default="mp4v", help="Video output codec (e.g. mp4v, XVID, avc1).")
+def detect(
+    model: str,
+    source: str,
+    conf: float,
+    iou: float,
+    device: str,
+    output: str,
+    input_size: int,
+    skip_frames: int,
+    codec: str,
+):
     """Run object detection on images or video."""
     from rich.console import Console
 
@@ -31,16 +46,51 @@ def detect(model: str, source: str, conf: float, iou: float, device: str, output
     source_path = Path(source)
 
     if source_path.is_dir():
+        # Directory of images
         files = sorted(source_path.glob("*.*"))
-        files = [f for f in files if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".bmp")]
+        files = [f for f in files if f.suffix.lower() in IMAGE_EXTENSIONS]
+        _detect_images(det, files, out_dir, console)
+
+    elif source_path.suffix.lower() in VIDEO_EXTENSIONS:
+        # Video file
+        _detect_video(det, source_path, out_dir, console, skip_frames, codec)
+
+    elif source_path.suffix.lower() in IMAGE_EXTENSIONS:
+        # Single image
+        _detect_images(det, [source_path], out_dir, console)
+
     else:
-        files = [source_path]
+        console.print(f"[red]Unknown source type: {source_path.suffix}[/red]")
+        raise click.Abort()
+
 
+def _detect_images(det, files: list[Path], out_dir: Path, console):
+    """Run detection on a list of image files."""
     for f in files:
         console.print(f"Processing {f.name}...")
         result = det(str(f))
         out_path = out_dir / f.name
         result.save(out_path)
-        console.print(f"  {len(result.boxes)} detections → {out_path}")
+        console.print(f"  {len(result.boxes)} detections -> {out_path}")
+
+    console.print(f"[green]Done! {len(files)} images saved to {out_dir}[/green]")
+
+
+def _detect_video(det, source_path: Path, out_dir: Path, console, skip_frames: int, codec: str):
+    """Run detection on a video file."""
+    out_path = out_dir / source_path.name
+    console.print(f"Processing video {source_path.name}...")
+
+    stats = det.detect_video_to_file(
+        source=str(source_path),
+        output=str(out_path),
+        codec=codec,
+        skip_frames=skip_frames,
+    )
 
-    console.print(f"[green]Done! Results saved to {out_dir}[/green]")
+    console.print(
+        f"  {stats['total_frames']} frames, "
+        f"{stats['processed_frames']} processed, "
+        f"{stats['total_detections']} total detections"
+    )
+    console.print(f"[green]Done! Video saved to {out_path}[/green]")
diff --git a/src/modern_yolonas/inference/detect.py b/src/modern_yolonas/inference/detect.py