Skip to content

Commit ccfedd3

Browse files
committed
Qualcomm AI Engine Direct - raise ValueError for 2-bit per-tensor encoding
Summary: raise ValueError for 2-bit per-tensor encoding in node_visitor and validators Test plan: python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedOperator.test_qnn_backend_16a2w_conv2d -b build-android -H ${HOST} -s ${SN} -m SM8850 python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedOperator.test_qnn_backend_16a2w_linear -b build-android -H ${HOST} -s ${SN} -m SM8850
1 parent 46c784f commit ccfedd3

2 files changed

Lines changed: 24 additions & 20 deletions

File tree

backends/qualcomm/builders/node_visitor.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -293,16 +293,15 @@ def make_qnn_per_tensor_config(self, quant_attrs: Dict):
293293
}
294294
# check Qnn_ScaleOffset_t in QNN/include/QnnTypes.h
295295
quant_config[QCOM_OFFSET] = -quant_attrs[QCOM_ZERO_POINT]
296-
range_ = quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN]
297-
assert range_ > 3, (
298-
f"2-bit quantization (range={range_}) does not support per-tensor encoding. "
299-
"Use per-channel quantization instead."
300-
)
301-
# special case for 4 bits
302-
if (
303-
quant_config[QCOM_DTYPE] == torch.int8
304-
and quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN] <= 15
305-
):
296+
# special case for 4-bit / 2-bit integer weights.
297+
quant_range = quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN]
298+
if quant_config[QCOM_DTYPE] == torch.int8 and quant_range <= 15:
299+
if quant_range <= 3:
300+
raise ValueError(
301+
f"2-bit quantization (range={quant_range}) "
302+
"does not support per-tensor encoding. Use per-channel quantization instead."
303+
)
304+
# special case for 4 bits
306305
quant_config[QCOM_BITWIDTH] = 4
307306
return (
308307
PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET,

backends/qualcomm/quantizer/validators.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,21 @@ def _qspec_port_encoding_type(node: Node, qspec: QuantizationSpecBase):
283283
qscheme = qspec.qscheme
284284

285285
if qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]:
286-
range_ = qspec.quant_max - qspec.quant_min
287-
assert range_ > 3, (
288-
f"2-bit quantization (range={range_}) does not support per-tensor encoding. "
289-
"Use per-channel quantization instead."
290-
)
291-
if qspec.dtype == torch.int8 and range_ <= 15:
286+
# quant_max/quant_min are None for non-integer activations (e.g. uint16 in
287+
# 16a2w) whose range is not expressed as a fixed integer bound; skip the
288+
# 4-bit BW_SCALE_OFFSET special-casing for those tensors.
289+
if (
290+
qspec.dtype == torch.int8
291+
and qspec.quant_max is not None
292+
and qspec.quant_min is not None
293+
and (quant_range := qspec.quant_max - qspec.quant_min) <= 15
294+
):
295+
if quant_range <= 3:
296+
raise ValueError(
297+
f"2-bit quantization (range={quant_range}) "
298+
"does not support per-tensor encoding. "
299+
"Use per-channel quantization instead."
300+
)
292301
encoding_type = (
293302
PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET
294303
)
@@ -303,10 +312,6 @@ def _qspec_port_encoding_type(node: Node, qspec: QuantizationSpecBase):
303312
encoding_type = (
304313
PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION
305314
)
306-
elif qspec.dtype == torch.int8 and qspec.quant_max - qspec.quant_min <= 3:
307-
encoding_type = (
308-
PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET
309-
)
310315
elif qspec.dtype == torch.int8 and qspec.quant_max - qspec.quant_min <= 15:
311316
encoding_type = (
312317
PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET

0 commit comments

Comments
 (0)