From 24d47a03f497f068b940ed2237c1784e43ad945b Mon Sep 17 00:00:00 2001 From: Hamidreza Khazaei Date: Thu, 7 May 2026 09:18:05 -0700 Subject: [PATCH] Add optional offset arg to quantized_conv1d_nlc and precompute it AOT (#19344) Summary: Extends `cadence::quantized_conv1d_nlc` (per_tensor / per_tensor_out) with an optional `offset` tensor that carries the precomputed zero-point correction term `-(sum(W) * X_z)` per output channel. - Updates the op schema in functions.yaml / functions_hifi.yaml / ops_registrations.py to add `Tensor? offset=None`. - Threads the new `offset` argument through the generic and HiFi conv1d_nlc kernels (currently unused by these kernels). - Updates the depthwise conv1d_nlc callers to pass an empty optional. - Extends `PrecomputeForQuantizedConvPass` to also precompute the offset for `quantized_conv1d_nlc.per_tensor` (sum over weight dims [1, 2]) and adds a unit test for the new path. Reviewed By: abeakkas Differential Revision: D103893688 --- backends/cadence/aot/functions.yaml | 2 +- backends/cadence/aot/functions_hifi.yaml | 2 +- backends/cadence/aot/ops_registrations.py | 5 +++-- .../cadence/generic/operators/op_quantized_conv1d_nlc.cpp | 1 + backends/cadence/generic/operators/op_quantized_conv1d_nlc.h | 1 + .../generic/operators/op_quantized_depthwise_conv1d_nlc.cpp | 1 + backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp | 4 ++++ .../hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp | 1 + 8 files changed, 13 insertions(+), 4 deletions(-) diff --git a/backends/cadence/aot/functions.yaml b/backends/cadence/aot/functions.yaml index 60fda2853a3..754b781cb7b 100644 --- a/backends/cadence/aot/functions.yaml +++ b/backends/cadence/aot/functions.yaml @@ -399,7 +399,7 @@ - arg_meta: null kernel_name: impl::generic::quantized_conv1d_ncl_per_tensor_out -- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null kernel_name: impl::generic::quantized_conv1d_nlc_per_tensor_out diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index 3b1932d01ec..bf9ef2976a9 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -574,7 +574,7 @@ - arg_meta: null kernel_name: impl::HiFi::quantized_conv1d_ncl_per_tensor_out -- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null kernel_name: impl::HiFi::quantized_conv1d_nlc_per_tensor_out diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py index a1d3ab871e1..f3e73028169 100644 --- a/backends/cadence/aot/ops_registrations.py +++ b/backends/cadence/aot/ops_registrations.py @@ -263,10 +263,10 @@ def register_fake( "quantized_conv1d_nlc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nlc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_nlc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( "quantized_depthwise_conv1d_ncl.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" @@ -1305,6 +1305,7 @@ def quantized_conv1d_nlc_per_tensor_meta( output_zero_point: int, out_multiplier: int, out_shift: int, + offset: Optional[torch.Tensor] = None, ) -> torch.Tensor: torch._check(bias.dtype == torch.int32, lambda: "expected int32") # NLC format: input is [N, L, C], weight is [OC, K, IC/groups] diff --git a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp index b4e253ef366..6f42543cfc1 100644 --- a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp +++ b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp @@ -256,6 +256,7 @@ ::executorch::aten::Tensor& quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, + __ET_UNUSED const ::executorch::aten::optional& offset, Tensor& out) { (void)ctx; quantized_conv1d_nlc( diff --git a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h index 7713121cf97..4f4d2877b27 100644 --- a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h +++ b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h @@ -54,6 +54,7 @@ ::executorch::aten::Tensor& quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, int64_t out_multiplier, int64_t out_shift, + const ::executorch::aten::optional& offset, Tensor& out); } // namespace native diff --git a/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp b/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp index 2ae06a651d2..a8f98a76ffc 100644 --- a/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp +++ b/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp @@ -57,6 +57,7 @@ ::executorch::aten::Tensor& quantized_depthwise_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + ::executorch::aten::optional(), out); } diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp index d4631752495..5171c2908bc 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp @@ -238,6 +238,7 @@ void quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, int64_t out_multiplier, int64_t out_shift, + __ET_UNUSED const ::executorch::aten::optional& offset, Tensor& out) { // HiFi nnlib kernels only support dilation=1. // Fall back to generic implementation for dilation > 1. @@ -258,6 +259,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); return; } @@ -284,6 +286,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); } else { xa_opt_quantized_conv1d_nlc_asym8sxsym8s_asym8s( @@ -320,6 +323,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); } else { xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u( diff --git a/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp b/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp index 9e7e13477ca..4299990b52a 100644 --- a/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp +++ b/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp @@ -206,6 +206,7 @@ void quantized_depthwise_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + ::executorch::aten::optional(), out); return; }