From 5b06a3c46764d4be0dc495d3860f8f690bf88523 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Mon, 22 Sep 2025 15:32:53 -0700
Subject: [PATCH 1/5] Implement ONNX support for the pt2 exporter

Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com>
---
 torchvision/ops/_onnx_ops.py | 77 ++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 torchvision/ops/_onnx_ops.py

diff --git a/torchvision/ops/_onnx_ops.py b/torchvision/ops/_onnx_ops.py
new file mode 100644
index 00000000000..fa950cab25f
--- /dev/null
+++ b/torchvision/ops/_onnx_ops.py
@@ -0,0 +1,77 @@
+import sys
+import warnings
+
+import torchvision
+import torch
+
+
+_INT64_MAX = sys.maxsize
+
+
+def nms(boxes, scores, iou_threshold: float):
+    import onnxscript
+
+    op = onnxscript.opset18
+    # boxes: [num_batches, spatial_dimension, 4]
+    boxes = op.Unsqueeze(boxes, [0])
+    # scores: [num_batches, num_classes, spatial_dimension]
+    scores = op.Unsqueeze(scores, [0, 1])
+    # nms_out: [num_selected_indices, 3] where each column is [batch_index, class_index, box_index]
+    nms_out = op.NonMaxSuppression(boxes, scores, _INT64_MAX, iou_threshold)
+    return op.Reshape(op.Slice(nms_out, axes=[1], starts=[2], ends=[3]), [-1])
+
+
+def _process_batch_indices_for_roi_align(g, rois):
+    indices = opset11.squeeze(
+        g, opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))), 1
+    )
+    return g.op("Cast", indices, to_i=torch.onnx.TensorProtoDataType.INT64)
+
+
+def _process_rois_for_roi_align(g, rois):
+    return opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
+
+
+def _process_sampling_ratio_for_roi_align(g, sampling_ratio: int):
+    if sampling_ratio < 0:
+        warnings.warn(
+            "ONNX export for RoIAlign with a non-zero sampling_ratio is not supported. "
+            "The model will be exported with a sampling_ratio of 0."
+        )
+        sampling_ratio = 0
+    return sampling_ratio
+
+
+@parse_args("v", "v", "f", "i", "i", "i", "i")
+def roi_align_opset16(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned):
+    batch_indices = _process_batch_indices_for_roi_align(g, rois)
+    rois = _process_rois_for_roi_align(g, rois)
+    coordinate_transformation_mode = "half_pixel" if aligned else "output_half_pixel"
+    sampling_ratio = _process_sampling_ratio_for_roi_align(g, sampling_ratio)
+    return g.op(
+        "RoiAlign",
+        input,
+        rois,
+        batch_indices,
+        coordinate_transformation_mode_s=coordinate_transformation_mode,
+        spatial_scale_f=spatial_scale,
+        output_height_i=pooled_height,
+        output_width_i=pooled_width,
+        sampling_ratio_i=sampling_ratio,
+    )
+
+
+@parse_args("v", "v", "f", "i", "i")
+def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
+    roi_pool = g.op(
+        "MaxRoiPool", input, rois, pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale
+    )
+    return roi_pool, None
+
+
+def onnx_translation_table():
+    return {
+        torchvision.ops.nms: nms,
+        torchvision.ops.roi_align: roi_align,
+        torchvision.ops.roi_pool: roi_pool,
+    }

From 930b99fc7ddc70627a3ad3104a162f7bb2a14ca2 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Mon, 22 Sep 2025 15:48:57 -0700
Subject: [PATCH 2/5] Implement the ops

Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com>
---
 torchvision/ops/_onnx_ops.py | 85 +++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/torchvision/ops/_onnx_ops.py b/torchvision/ops/_onnx_ops.py
index fa950cab25f..2eb493a3d63 100644
--- a/torchvision/ops/_onnx_ops.py
+++ b/torchvision/ops/_onnx_ops.py
@@ -1,7 +1,7 @@
+from collections.abc import Callable
 import sys
 import warnings
 
-import torchvision
 import torch
 
 
@@ -21,18 +21,26 @@ def nms(boxes, scores, iou_threshold: float):
     return op.Reshape(op.Slice(nms_out, axes=[1], starts=[2], ends=[3]), [-1])
 
 
-def _process_batch_indices_for_roi_align(g, rois):
-    indices = opset11.squeeze(
-        g, opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))), 1
-    )
-    return g.op("Cast", indices, to_i=torch.onnx.TensorProtoDataType.INT64)
+def _process_batch_indices_for_roi_align(rois):
+    import onnxscript
+    from onnxscript import INT64
 
+    op = onnxscript.opset18
+    # Extract batch indices from the first column (index 0) of rois
+    indices = op.Slice(rois, axes=[1], starts=[0], ends=[1])
+    indices = op.Squeeze(indices, axes=[1])
+    return op.Cast(indices, to=INT64.dtype)
 
-def _process_rois_for_roi_align(g, rois):
-    return opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
 
+def _process_rois_for_roi_align(rois):
+    import onnxscript
 
-def _process_sampling_ratio_for_roi_align(g, sampling_ratio: int):
+    op = onnxscript.opset18
+    # Extract roi coordinates from columns 1, 2, 3, 4 (x1, y1, x2, y2)
+    return op.Slice(rois, axes=[1], starts=[1], ends=[5])
+
+
+def _process_sampling_ratio_for_roi_align(sampling_ratio: int):
     if sampling_ratio < 0:
         warnings.warn(
             "ONNX export for RoIAlign with a non-zero sampling_ratio is not supported. "
@@ -42,36 +50,51 @@ def _process_sampling_ratio_for_roi_align(g, sampling_ratio: int):
     return sampling_ratio
 
 
-@parse_args("v", "v", "f", "i", "i", "i", "i")
-def roi_align_opset16(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned):
-    batch_indices = _process_batch_indices_for_roi_align(g, rois)
-    rois = _process_rois_for_roi_align(g, rois)
+def roi_align(
+    input,
+    rois,
+    spatial_scale: float,
+    pooled_height: int,
+    pooled_width: int,
+    sampling_ratio: int = -1,
+    aligned: bool = False,
+):
+    import onnxscript
+
+    op = onnxscript.opset18
+    batch_indices = _process_batch_indices_for_roi_align(rois)
+    rois_coords = _process_rois_for_roi_align(rois)
     coordinate_transformation_mode = "half_pixel" if aligned else "output_half_pixel"
-    sampling_ratio = _process_sampling_ratio_for_roi_align(g, sampling_ratio)
-    return g.op(
-        "RoiAlign",
+    sampling_ratio = _process_sampling_ratio_for_roi_align(sampling_ratio)
+
+    return op.RoiAlign(
         input,
-        rois,
+        rois_coords,
         batch_indices,
-        coordinate_transformation_mode_s=coordinate_transformation_mode,
-        spatial_scale_f=spatial_scale,
-        output_height_i=pooled_height,
-        output_width_i=pooled_width,
-        sampling_ratio_i=sampling_ratio,
+        coordinate_transformation_mode=coordinate_transformation_mode,
+        spatial_scale=spatial_scale,
+        output_height=pooled_height,
+        output_width=pooled_width,
+        sampling_ratio=sampling_ratio,
     )
 
 
-@parse_args("v", "v", "f", "i", "i")
-def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
-    roi_pool = g.op(
-        "MaxRoiPool", input, rois, pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale
+def roi_pool(input, rois, spatial_scale: float, pooled_height: int, pooled_width: int):
+    import onnxscript
+
+    op = onnxscript.opset18
+    # MaxRoiPool expects rois in format [batch_index, x1, y1, x2, y2]
+    return op.MaxRoiPool(
+        input,
+        rois,
+        pooled_shape=(pooled_height, pooled_width),
+        spatial_scale=spatial_scale,
     )
-    return roi_pool, None
 
 
-def onnx_translation_table():
+def onnx_translation_table() -> dict[torch._ops.OpOverload, Callable]:
     return {
-        torchvision.ops.nms: nms,
-        torchvision.ops.roi_align: roi_align,
-        torchvision.ops.roi_pool: roi_pool,
+        torch.ops.torchvision.nms.default: nms,
+        torch.ops.torchvision.roi_align.default: roi_align,
+        torch.ops.torchvision.roi_pool.default: roi_pool,
     }

From e92a5f2229402c182cccd06b5328d798ddb269fd Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Mon, 22 Sep 2025 15:50:18 -0700
Subject: [PATCH 3/5] Rename

Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com>
---
 torchvision/ops/{_onnx_ops.py => onnx_ops.py} | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)
 rename torchvision/ops/{_onnx_ops.py => onnx_ops.py} (87%)

diff --git a/torchvision/ops/_onnx_ops.py b/torchvision/ops/onnx_ops.py
similarity index 87%
rename from torchvision/ops/_onnx_ops.py
rename to torchvision/ops/onnx_ops.py
index 2eb493a3d63..3b9a8a5d33f 100644
--- a/torchvision/ops/_onnx_ops.py
+++ b/torchvision/ops/onnx_ops.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+__all__ = ["onnx_translation_table"]
+
 from collections.abc import Callable
 import sys
 import warnings
@@ -8,7 +12,7 @@
 _INT64_MAX = sys.maxsize
 
 
-def nms(boxes, scores, iou_threshold: float):
+def _nms(boxes, scores, iou_threshold: float):
     import onnxscript
 
     op = onnxscript.opset18
@@ -50,7 +54,7 @@ def _process_sampling_ratio_for_roi_align(sampling_ratio: int):
     return sampling_ratio
 
 
-def roi_align(
+def _roi_align(
     input,
     rois,
     spatial_scale: float,
@@ -79,7 +83,7 @@ def roi_align(
     )
 
 
-def roi_pool(input, rois, spatial_scale: float, pooled_height: int, pooled_width: int):
+def _roi_pool(input, rois, spatial_scale: float, pooled_height: int, pooled_width: int):
     import onnxscript
 
     op = onnxscript.opset18
@@ -94,7 +98,7 @@ def roi_pool(input, rois, spatial_scale: float, pooled_height: int, pooled_width
 
 def onnx_translation_table() -> dict[torch._ops.OpOverload, Callable]:
     return {
-        torch.ops.torchvision.nms.default: nms,
-        torch.ops.torchvision.roi_align.default: roi_align,
-        torch.ops.torchvision.roi_pool.default: roi_pool,
+        torch.ops.torchvision.nms.default: _nms,
+        torch.ops.torchvision.roi_align.default: _roi_align,
+        torch.ops.torchvision.roi_pool.default: _roi_pool,
     }

From 00dcb9e99c7b49f64d3c4946d1e7815ef0f629de Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Mon, 22 Sep 2025 16:01:47 -0700
Subject: [PATCH 4/5] Create tests

Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com>
---
 test/test_onnx_pt2.py | 508 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 508 insertions(+)
 create mode 100644 test/test_onnx_pt2.py

diff --git a/test/test_onnx_pt2.py b/test/test_onnx_pt2.py
new file mode 100644
index 00000000000..d9432fbed9c
--- /dev/null
+++ b/test/test_onnx_pt2.py
@@ -0,0 +1,508 @@
+from collections import OrderedDict
+
+import pytest
+import torch
+from torch.onnx._internal.exporter import _testing as onnx_testing
+from common_utils import set_rng_seed
+from torchvision import models, ops
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead
+from torchvision.models.detection.image_list import ImageList
+from torchvision.models.detection.roi_heads import RoIHeads
+from torchvision.models.detection.rpn import (
+    AnchorGenerator,
+    RegionProposalNetwork,
+    RPNHead,
+)
+from torchvision.models.detection.transform import GeneralizedRCNNTransform
+
+# In environments without onnxruntime we prefer to
+# invoke all tests in the repo and have this one skipped rather than fail.
+onnxruntime = pytest.importorskip("onnxruntime")
+
+
+class TestPT2ONNXExporter:
+    @classmethod
+    def setup_class(cls):
+        torch.manual_seed(42)
+        onnxruntime.set_seed(42)
+
+    def run_model(
+        self,
+        model: torch.nn.Module,
+        inputs,
+        input_names=None,
+        output_names=None,
+        dynamic_axes=None,
+    ):
+        onnx_program = torch.onnx.export(
+            model,
+            inputs,
+            verbose=False,
+            dynamo=True,
+            input_names=input_names,
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+        )
+        assert onnx_program is not None
+        onnx_testing.assert_onnx_program(onnx_program)
+
+    def test_nms(self):
+        num_boxes = 100
+        boxes = torch.rand(num_boxes, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        scores = torch.randn(num_boxes)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores):
+                return ops.nms(boxes, scores, 0.5)
+
+        self.run_model(Module(), [(boxes, scores)])
+
+    def test_batched_nms(self):
+        num_boxes = 100
+        boxes = torch.rand(num_boxes, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        scores = torch.randn(num_boxes)
+        idxs = torch.randint(0, 5, size=(num_boxes,))
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores, idxs):
+                return ops.batched_nms(boxes, scores, idxs, 0.5)
+
+        self.run_model(Module(), [(boxes, scores, idxs)])
+
+    def test_clip_boxes_to_image(self):
+        boxes = torch.randn(5, 4) * 500
+        boxes[:, 2:] += boxes[:, :2]
+        size = torch.randn(200, 300)
+
+        size_2 = torch.randn(300, 400)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, size):
+                return ops.boxes.clip_boxes_to_image(boxes, size.shape)
+
+        self.run_model(
+            Module(),
+            [(boxes, size), (boxes, size_2)],
+            input_names=["boxes", "size"],
+            dynamic_axes={"size": [0, 1]},
+        )
+
+    def test_roi_align(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2)
+        self.run_model(model, [(x, single_roi)])
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, -1)
+        self.run_model(model, [(x, single_roi)])
+
+    def test_roi_align_aligned(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 1.5, 1.5, 3, 3]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 0.5, 3, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1.8, 2, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((2, 2), 2.5, 0, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((2, 2), 2.5, -1, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+    def test_roi_align_malformed_boxes(self):
+        x = torch.randn(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 2, 0.3, 1.5, 1.5]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 1, aligned=True)
+        self.run_model(model, [(x, single_roi)])
+
+    def test_roi_pool(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        pool_h = 5
+        pool_w = 5
+        model = ops.RoIPool((pool_h, pool_w), 2)
+        self.run_model(model, [(x, rois)])
+
+    def test_resize_images(self):
+        class TransformModule(torch.nn.Module):
+            def __init__(self_module):
+                super().__init__()
+                self_module.transform = self._init_test_generalized_rcnn_transform()
+
+            def forward(self_module, images):
+                return self_module.transform.resize(images, None)[0]
+
+        input = torch.rand(3, 10, 20)
+        input_test = torch.rand(3, 100, 150)
+        self.run_model(
+            TransformModule(),
+            [(input,), (input_test,)],
+            input_names=["input1"],
+            dynamic_axes={"input1": [0, 1, 2]},
+        )
+
+    def test_transform_images(self):
+        class TransformModule(torch.nn.Module):
+            def __init__(self_module):
+                super().__init__()
+                self_module.transform = self._init_test_generalized_rcnn_transform()
+
+            def forward(self_module, images):
+                return self_module.transform(images)[0].tensors
+
+        input = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
+        input_test = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
+        self.run_model(TransformModule(), [(input,), (input_test,)])
+
+    def _init_test_generalized_rcnn_transform(self):
+        min_size = 100
+        max_size = 200
+        image_mean = [0.485, 0.456, 0.406]
+        image_std = [0.229, 0.224, 0.225]
+        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
+        return transform
+
+    def _init_test_rpn(self):
+        anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
+        aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
+        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
+        out_channels = 256
+        rpn_head = RPNHead(
+            out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
+        )
+        rpn_fg_iou_thresh = 0.7
+        rpn_bg_iou_thresh = 0.3
+        rpn_batch_size_per_image = 256
+        rpn_positive_fraction = 0.5
+        rpn_pre_nms_top_n = dict(training=2000, testing=1000)
+        rpn_post_nms_top_n = dict(training=2000, testing=1000)
+        rpn_nms_thresh = 0.7
+        rpn_score_thresh = 0.0
+
+        rpn = RegionProposalNetwork(
+            rpn_anchor_generator,
+            rpn_head,
+            rpn_fg_iou_thresh,
+            rpn_bg_iou_thresh,
+            rpn_batch_size_per_image,
+            rpn_positive_fraction,
+            rpn_pre_nms_top_n,
+            rpn_post_nms_top_n,
+            rpn_nms_thresh,
+            score_thresh=rpn_score_thresh,
+        )
+        return rpn
+
+    def _init_test_roi_heads_faster_rcnn(self):
+        out_channels = 256
+        num_classes = 91
+
+        box_fg_iou_thresh = 0.5
+        box_bg_iou_thresh = 0.5
+        box_batch_size_per_image = 512
+        box_positive_fraction = 0.25
+        bbox_reg_weights = None
+        box_score_thresh = 0.05
+        box_nms_thresh = 0.5
+        box_detections_per_img = 100
+
+        box_roi_pool = ops.MultiScaleRoIAlign(
+            featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2
+        )
+
+        resolution = box_roi_pool.output_size[0]
+        representation_size = 1024
+        box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
+
+        representation_size = 1024
+        box_predictor = FastRCNNPredictor(representation_size, num_classes)
+
+        roi_heads = RoIHeads(
+            box_roi_pool,
+            box_head,
+            box_predictor,
+            box_fg_iou_thresh,
+            box_bg_iou_thresh,
+            box_batch_size_per_image,
+            box_positive_fraction,
+            bbox_reg_weights,
+            box_score_thresh,
+            box_nms_thresh,
+            box_detections_per_img,
+        )
+        return roi_heads
+
+    def get_features(self, images):
+        s0, s1 = images.shape[-2:]
+        features = [
+            ("0", torch.rand(2, 256, s0 // 4, s1 // 4)),
+            ("1", torch.rand(2, 256, s0 // 8, s1 // 8)),
+            ("2", torch.rand(2, 256, s0 // 16, s1 // 16)),
+            ("3", torch.rand(2, 256, s0 // 32, s1 // 32)),
+            ("4", torch.rand(2, 256, s0 // 64, s1 // 64)),
+        ]
+        features = OrderedDict(features)
+        return features
+
+    def test_rpn(self):
+        set_rng_seed(0)
+
+        class RPNModule(torch.nn.Module):
+            def __init__(self_module):
+                super().__init__()
+                self_module.rpn = self._init_test_rpn()
+
+            def forward(self_module, images, features):
+                images = ImageList(images, [i.shape[-2:] for i in images])
+                return self_module.rpn(images, features)
+
+        images = torch.rand(2, 3, 150, 150)
+        features = self.get_features(images)
+        images2 = torch.rand(2, 3, 80, 80)
+        test_features = self.get_features(images2)
+
+        model = RPNModule()
+        model.eval()
+        model(images, features)
+
+        self.run_model(
+            model,
+            [(images, features), (images2, test_features)],
+            input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
+            dynamic_axes={
+                "input1": [0, 1, 2, 3],
+                "input2": [0, 1, 2, 3],
+                "input3": [0, 1, 2, 3],
+                "input4": [0, 1, 2, 3],
+                "input5": [0, 1, 2, 3],
+                "input6": [0, 1, 2, 3],
+            },
+        )
+
+    def test_multi_scale_roi_align(self):
+        class TransformModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.model = ops.MultiScaleRoIAlign(["feat1", "feat2"], 3, 2)
+                self.image_sizes = [(512, 512)]
+
+            def forward(self, input, boxes):
+                return self.model(input, boxes, self.image_sizes)
+
+        i = OrderedDict()
+        i["feat1"] = torch.rand(1, 5, 64, 64)
+        i["feat2"] = torch.rand(1, 5, 16, 16)
+        boxes = torch.rand(6, 4) * 256
+        boxes[:, 2:] += boxes[:, :2]
+
+        i1 = OrderedDict()
+        i1["feat1"] = torch.rand(1, 5, 64, 64)
+        i1["feat2"] = torch.rand(1, 5, 16, 16)
+        boxes1 = torch.rand(6, 4) * 256
+        boxes1[:, 2:] += boxes1[:, :2]
+
+        self.run_model(
+            TransformModule(),
+            [
+                (
+                    i,
+                    [boxes],
+                ),
+                (
+                    i1,
+                    [boxes1],
+                ),
+            ],
+        )
+
+    def test_roi_heads(self):
+        class RoiHeadsModule(torch.nn.Module):
+            def __init__(self_module):
+                super().__init__()
+                self_module.transform = self._init_test_generalized_rcnn_transform()
+                self_module.rpn = self._init_test_rpn()
+                self_module.roi_heads = self._init_test_roi_heads_faster_rcnn()
+
+            def forward(self_module, images, features):
+                original_image_sizes = [img.shape[-2:] for img in images]
+                images = ImageList(images, [i.shape[-2:] for i in images])
+                proposals, _ = self_module.rpn(images, features)
+                detections, _ = self_module.roi_heads(
+                    features, proposals, images.image_sizes
+                )
+                detections = self_module.transform.postprocess(
+                    detections, images.image_sizes, original_image_sizes
+                )
+                return detections
+
+        images = torch.rand(2, 3, 100, 100)
+        features = self.get_features(images)
+        images2 = torch.rand(2, 3, 150, 150)
+        test_features = self.get_features(images2)
+
+        model = RoiHeadsModule()
+        model.eval()
+        model(images, features)
+
+        self.run_model(
+            model,
+            [(images, features), (images2, test_features)],
+            input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
+            dynamic_axes={
+                "input1": [0, 1, 2, 3],
+                "input2": [0, 1, 2, 3],
+                "input3": [0, 1, 2, 3],
+                "input4": [0, 1, 2, 3],
+                "input5": [0, 1, 2, 3],
+                "input6": [0, 1, 2, 3],
+            },
+        )
+
+    def get_image(self, rel_path: str, size: tuple[int, int]) -> torch.Tensor:
+        import os
+
+        from PIL import Image
+        from torchvision.transforms import functional as F
+
+        data_dir = os.path.join(os.path.dirname(__file__), "assets")
+        path = os.path.join(data_dir, *rel_path.split("/"))
+        image = Image.open(path).convert("RGB").resize(size, Image.BILINEAR)
+
+        return F.convert_image_dtype(F.pil_to_tensor(image))
+
+    def get_test_images(self) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
+        return (
+            [self.get_image("encode_jpeg/grace_hopper_517x606.jpg", (100, 320))],
+            [self.get_image("fakedata/logos/rgb_pytorch.png", (250, 380))],
+        )
+
+    def test_faster_rcnn(self):
+        images, test_images = self.get_test_images()
+        dummy_image = [torch.ones(3, 100, 100) * 0.3]
+        model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(
+            weights=models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_Weights.DEFAULT,
+            min_size=200,
+            max_size=300,
+        )
+        model.eval()
+        model(images)
+        # Test exported model on images of different size, or dummy input
+        self.run_model(
+            model,
+            [(images,), (test_images,), (dummy_image,)],
+            input_names=["images_tensors"],
+            output_names=["outputs"],
+            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
+        )
+        # Test exported model for an image with no detections on other images
+        self.run_model(
+            model,
+            [(dummy_image,), (images,)],
+            input_names=["images_tensors"],
+            output_names=["outputs"],
+            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
+        )
+
+    def test_mask_rcnn(self):
+        images, test_images = self.get_test_images()
+        dummy_image = [torch.ones(3, 100, 100) * 0.3]
+        model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(
+            weights=models.detection.mask_rcnn.MaskRCNN_ResNet50_FPN_Weights.DEFAULT,
+            min_size=200,
+            max_size=300,
+        )
+        model.eval()
+        model(images)
+        # Test exported model on images of different size, or dummy input
+        self.run_model(
+            model,
+            [(images,), (test_images,), (dummy_image,)],
+            input_names=["images_tensors"],
+            output_names=["boxes", "labels", "scores", "masks"],
+            dynamic_axes={
+                "images_tensors": [0, 1, 2],
+                "boxes": [0, 1],
+                "labels": [0],
+                "scores": [0],
+                "masks": [0, 1, 2],
+            },
+        )
+        # Test exported model for an image with no detections on other images
+        self.run_model(
+            model,
+            [(dummy_image,), (images,)],
+            input_names=["images_tensors"],
+            output_names=["boxes", "labels", "scores", "masks"],
+            dynamic_axes={
+                "images_tensors": [0, 1, 2],
+                "boxes": [0, 1],
+                "labels": [0],
+                "scores": [0],
+                "masks": [0, 1, 2],
+            },
+        )
+
+    def test_keypoint_rcnn(self):
+        images, test_images = self.get_test_images()
+        dummy_images = [torch.ones(3, 100, 100) * 0.3]
+        model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(
+            weights=models.detection.keypoint_rcnn.KeypointRCNN_ResNet50_FPN_Weights.DEFAULT,
+            min_size=200,
+            max_size=300,
+        )
+        model.eval()
+        model(images)
+        self.run_model(
+            model,
+            [(images,), (test_images,), (dummy_images,)],
+            input_names=["images_tensors"],
+            output_names=["outputs1", "outputs2", "outputs3", "outputs4"],
+            dynamic_axes={"images_tensors": [0, 1, 2]},
+        )
+
+        self.run_model(
+            model,
+            [(dummy_images,), (test_images,)],
+            input_names=["images_tensors"],
+            output_names=["outputs1", "outputs2", "outputs3", "outputs4"],
+            dynamic_axes={"images_tensors": [0, 1, 2]},
+        )
+
+    def test_shufflenet_v2_dynamic_axes(self):
+        model = models.shufflenet_v2_x0_5(
+            weights=models.ShuffleNet_V2_X0_5_Weights.DEFAULT
+        )
+        dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True)
+        test_inputs = torch.cat([dummy_input, dummy_input, dummy_input], 0)
+
+        self.run_model(
+            model,
+            [(dummy_input,), (test_inputs,)],
+            input_names=["input_images"],
+            output_names=["output"],
+            dynamic_axes={
+                "input_images": {0: "batch_size"},
+                "output": {0: "batch_size"},
+            },
+        )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])

From 4004a191c17f0dd437aa0948d8b3a46f9ee1a0c8 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Mon, 22 Sep 2025 16:19:48 -0700
Subject: [PATCH 5/5] Update tests

Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com>
---
 test/test_onnx_pt2.py | 89 ++++---------------------------------------
 1 file changed, 8 insertions(+), 81 deletions(-)

diff --git a/test/test_onnx_pt2.py b/test/test_onnx_pt2.py
index d9432fbed9c..2a4a3131dd4 100644
--- a/test/test_onnx_pt2.py
+++ b/test/test_onnx_pt2.py
@@ -13,6 +13,7 @@
     RegionProposalNetwork,
     RPNHead,
 )
+import torchvision.ops.onnx_ops
 from torchvision.models.detection.transform import GeneralizedRCNNTransform
 
 # In environments without onnxruntime we prefer to
@@ -36,15 +37,19 @@ def run_model(
     ):
         onnx_program = torch.onnx.export(
             model,
-            inputs,
+            inputs[0],
             verbose=False,
             dynamo=True,
             input_names=input_names,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
+            custom_translation_table=torchvision.ops.onnx_ops.onnx_translation_table(),
         )
         assert onnx_program is not None
         onnx_testing.assert_onnx_program(onnx_program)
+        if len(inputs) > 1:
+            for input in inputs[1:]:
+                onnx_testing.assert_onnx_program(onnx_program, args=input)
 
     def test_nms(self):
         num_boxes = 100
@@ -261,41 +266,6 @@ def get_features(self, images):
         features = OrderedDict(features)
         return features
 
-    def test_rpn(self):
-        set_rng_seed(0)
-
-        class RPNModule(torch.nn.Module):
-            def __init__(self_module):
-                super().__init__()
-                self_module.rpn = self._init_test_rpn()
-
-            def forward(self_module, images, features):
-                images = ImageList(images, [i.shape[-2:] for i in images])
-                return self_module.rpn(images, features)
-
-        images = torch.rand(2, 3, 150, 150)
-        features = self.get_features(images)
-        images2 = torch.rand(2, 3, 80, 80)
-        test_features = self.get_features(images2)
-
-        model = RPNModule()
-        model.eval()
-        model(images, features)
-
-        self.run_model(
-            model,
-            [(images, features), (images2, test_features)],
-            input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
-            dynamic_axes={
-                "input1": [0, 1, 2, 3],
-                "input2": [0, 1, 2, 3],
-                "input3": [0, 1, 2, 3],
-                "input4": [0, 1, 2, 3],
-                "input5": [0, 1, 2, 3],
-                "input6": [0, 1, 2, 3],
-            },
-        )
-
     def test_multi_scale_roi_align(self):
         class TransformModule(torch.nn.Module):
             def __init__(self):
@@ -332,49 +302,6 @@ def forward(self, input, boxes):
             ],
         )
 
-    def test_roi_heads(self):
-        class RoiHeadsModule(torch.nn.Module):
-            def __init__(self_module):
-                super().__init__()
-                self_module.transform = self._init_test_generalized_rcnn_transform()
-                self_module.rpn = self._init_test_rpn()
-                self_module.roi_heads = self._init_test_roi_heads_faster_rcnn()
-
-            def forward(self_module, images, features):
-                original_image_sizes = [img.shape[-2:] for img in images]
-                images = ImageList(images, [i.shape[-2:] for i in images])
-                proposals, _ = self_module.rpn(images, features)
-                detections, _ = self_module.roi_heads(
-                    features, proposals, images.image_sizes
-                )
-                detections = self_module.transform.postprocess(
-                    detections, images.image_sizes, original_image_sizes
-                )
-                return detections
-
-        images = torch.rand(2, 3, 100, 100)
-        features = self.get_features(images)
-        images2 = torch.rand(2, 3, 150, 150)
-        test_features = self.get_features(images2)
-
-        model = RoiHeadsModule()
-        model.eval()
-        model(images, features)
-
-        self.run_model(
-            model,
-            [(images, features), (images2, test_features)],
-            input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
-            dynamic_axes={
-                "input1": [0, 1, 2, 3],
-                "input2": [0, 1, 2, 3],
-                "input3": [0, 1, 2, 3],
-                "input4": [0, 1, 2, 3],
-                "input5": [0, 1, 2, 3],
-                "input6": [0, 1, 2, 3],
-            },
-        )
-
     def get_image(self, rel_path: str, size: tuple[int, int]) -> torch.Tensor:
         import os
 
@@ -409,7 +336,7 @@ def test_faster_rcnn(self):
             [(images,), (test_images,), (dummy_image,)],
             input_names=["images_tensors"],
             output_names=["outputs"],
-            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
+            dynamic_axes={"images_tensors": [0, 1, 2]},
         )
         # Test exported model for an image with no detections on other images
         self.run_model(
@@ -417,7 +344,7 @@ def test_faster_rcnn(self):
             [(dummy_image,), (images,)],
             input_names=["images_tensors"],
             output_names=["outputs"],
-            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
+            dynamic_axes={"images_tensors": [0, 1, 2]},
         )
 
     def test_mask_rcnn(self):