Update tests

ajrasane · ajrasane · commit a4c3e31e1719 · 2025-11-24T23:24:22.000Z
Signed-off-by: ajrasane &lt;131806219+ajrasane@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/quantization/qdq_utils.py b/modelopt/onnx/quantization/qdq_utils.py
@@ -1051,7 +1051,7 @@ def replace_zero_scale_with_smallest_nonzero(onnx_model: onnx.ModelProto) -> onn
     return onnx_model
 
 
-def _cast_initializer_to_dtype(
+def cast_initializer_to_dtype(
     node: onnx.NodeProto, dtype: str, initializer_map: dict[str, onnx.TensorProto]
 ):
     """Casts the initializer to the given dtype."""
diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py
@@ -32,20 +32,18 @@
 from torch.nn.parallel import DataParallel, DistributedDataParallel
 
 from modelopt.onnx.autocast.convert import convert_to_f16
-from modelopt.onnx.quantization.qdq_utils import (
-    fp4qdq_to_2dq,
-    qdq_to_dq,
-    quantize_weights_to_int4,
-    quantize_weights_to_mxfp8,
-    replace_zero_scale_with_smallest_nonzero,
-)
 from modelopt.onnx.export.quant_exporter import (
     INT4QuantExporter,
     MXFP8QuantExporter,
     NVFP4QuantExporter,
     ONNXQuantExporter,
 )
-from modelopt.onnx.quantization.qdq_utils import fp4qdq_to_2dq, qdq_to_dq, quantize_weights_to_mxfp8
+from modelopt.onnx.quantization.qdq_utils import (
+    fp4qdq_to_2dq,
+    qdq_to_dq,
+    quantize_weights_to_mxfp8,
+    replace_zero_scale_with_smallest_nonzero,
+)
 from modelopt.onnx.utils import (
     get_input_names,
     get_input_shapes,
@@ -368,6 +366,8 @@ def is_fp8_quantized(model: nn.Module) -> bool:
         ):
             return True
     return False
+
+
 def quantize_weights(model: nn.Module, onnx_model: onnx.ModelProto) -> onnx.ModelProto:
     """Real quantizes the weights in the onnx model.
 
diff --git a/tests/unit/onnx/test_qdq_utils.py b/tests/unit/onnx/test_qdq_utils.py
@@ -17,11 +17,11 @@
 import pytest
 from onnx import TensorProto, helper, numpy_helper
 
+from modelopt.onnx.export.quant_exporter import INT4QuantExporter
 from modelopt.onnx.quantization.qdq_utils import (
     _cast_fp4,
     _cast_fp8,
     fp4qdq_to_2dq,
-    quantize_weights_to_int4,
     quantize_weights_to_mxfp8,
 )
 
@@ -337,7 +337,9 @@ def test_basic_quantization_with_reshape_transpose(self):
         model = create_test_model_with_int4_dq_reshape_transpose_matmul()
 
         # Run quantization
-        quantized_model = quantize_weights_to_int4(model)
+        quantized_model = INT4QuantExporter.compute_scales(model)
+        quantized_model = INT4QuantExporter.compress_weights(quantized_model)
+        quantized_model = INT4QuantExporter.post_process(quantized_model)
 
         # Verify weight is converted to INT4
         weight_tensor = next(
@@ -362,7 +364,9 @@ def test_quantization_with_constant_scale(self):
         model = create_test_model_with_int4_dq_reshape_transpose_matmul(constant_scale=True)
 
         # Run quantization
-        quantized_model = quantize_weights_to_int4(model)
+        quantized_model = INT4QuantExporter.compute_scales(model)
+        quantized_model = INT4QuantExporter.compress_weights(quantized_model)
+        quantized_model = INT4QuantExporter.post_process(quantized_model)
 
         # Verify Constant node is removed
         constant_nodes = [node for node in quantized_model.graph.node if node.op_type == "Constant"]
@@ -385,7 +389,9 @@ def test_projection_bias_and_scale_casting(self):
         model = create_test_model_with_proj_nodes()
 
         # Run quantization
-        quantized_model = quantize_weights_to_int4(model)
+        quantized_model = INT4QuantExporter.compute_scales(model)
+        quantized_model = INT4QuantExporter.compress_weights(quantized_model)
+        quantized_model = INT4QuantExporter.post_process(quantized_model)
 
         # Verify bias tensor is cast to float16
         bias_tensor = next(