A clean, minimal Python reimplementation of YOLO-NAS object detection. No factory patterns, no registries, no OmegaConf — just PyTorch.
uv add modern-yolonas
# or
pip install modern-yolonasfrom modern_yolonas import Detector
det = Detector("yolo_nas_s", device="cuda")
result = det("image.jpg")
# Print detections
from modern_yolonas.inference.visualize import COCO_NAMES
for box, score, cls_id in zip(result.boxes, result.scores, result.class_ids):
name = COCO_NAMES[int(cls_id)]
x1, y1, x2, y2 = box
print(f"{name}: {score:.2f} [{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")
# Save annotated image
result.save("output.jpg")from modern_yolonas import Detector
det = Detector("yolo_nas_s", device="cuda")
# Option 1: Write annotated video directly
stats = det.detect_video_to_file("input.mp4", "output.mp4")
print(f"{stats['total_detections']} detections across {stats['total_frames']} frames")
# Option 2: Iterate frames for custom logic
for frame_idx, result in det.detect_video("input.mp4"):
print(f"Frame {frame_idx}: {len(result.boxes)} objects")
# result.boxes, result.scores, result.class_ids are numpy arrays
# result.visualize() returns the annotated frame as BGR numpy arrayimport cv2
from modern_yolonas import Detector
det = Detector("yolo_nas_s", device="cuda")
for frame_idx, result in det.detect_video(source=0): # 0 = default camera
cv2.imshow("YOLO-NAS", result.visualize())
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()import torch
from modern_yolonas import yolo_nas_s
model = yolo_nas_s(pretrained=True).eval().cuda()
x = torch.randn(1, 3, 640, 640).cuda()
pred_bboxes, pred_scores = model(x)
# pred_bboxes: [1, 8400, 4] — x1y1x2y2 pixel coordinates
# pred_scores: [1, 8400, 80] — class probabilities# Detect in images
yolonas detect --model yolo_nas_s --source image.jpg --conf 0.25
yolonas detect --model yolo_nas_l --source images/ --output results/
# Detect in video
yolonas detect --model yolo_nas_s --source video.mp4 --output results/
yolonas detect --model yolo_nas_m --source video.mp4 --skip-frames 2 --conf 0.3
# Training
yolonas train --model yolo_nas_s --data /path/to/dataset --format yolo --epochs 100
# Evaluation
yolonas eval --model yolo_nas_s --data /path/to/coco --split val2017
# Export
yolonas export --model yolo_nas_s --format onnx --output model.onnx
yolonas export --model yolo_nas_s --format openvino --output model.xml
# Export for Frigate (embeds preprocessing + NMS in the graph)
yolonas export --model yolo_nas_s --format onnx --target frigate
yolonas export --model yolo_nas_s --format openvino --target frigate --input-size 320The --target frigate export produces a self-contained model that accepts raw uint8 BGR
input and outputs a flat [D, 7] tensor with [batch, x1, y1, x2, y2, confidence, class_id].
Example Frigate configuration:
detectors:
ov:
type: openvino
device: GPU
model:
model_type: yolonas
width: 320
height: 320
input_tensor: nchw
input_pixel_format: bgr
path: /config/model_frigate.xmlSee the examples/ directory:
detect_image.py— run detection on a single imagedetect_video.py— run detection on a video filedetect_webcam.py— live webcam detection
| Model | Params | Input | mAP (COCO val) |
|---|---|---|---|
| YOLO-NAS S | ~12M | 640 | 47.5 |
| YOLO-NAS M | ~31M | 640 | 51.5 |
| YOLO-NAS L | ~44M | 640 | 52.2 |
uv sync --dev
uv run pytest tests/ -v
uv run ruff check src/MIT