diff --git a/backends/cadence/aot/functions.yaml b/backends/cadence/aot/functions.yaml index 60fda2853a3..754b781cb7b 100644 --- a/backends/cadence/aot/functions.yaml +++ b/backends/cadence/aot/functions.yaml @@ -399,7 +399,7 @@ - arg_meta: null kernel_name: impl::generic::quantized_conv1d_ncl_per_tensor_out -- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null kernel_name: impl::generic::quantized_conv1d_nlc_per_tensor_out diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index 3b1932d01ec..bf9ef2976a9 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -574,7 +574,7 @@ - arg_meta: null kernel_name: impl::HiFi::quantized_conv1d_ncl_per_tensor_out -- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null kernel_name: impl::HiFi::quantized_conv1d_nlc_per_tensor_out diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py index a1d3ab871e1..f3e73028169 100644 --- a/backends/cadence/aot/ops_registrations.py +++ b/backends/cadence/aot/ops_registrations.py @@ -263,10 +263,10 @@ def register_fake( "quantized_conv1d_nlc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nlc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_nlc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( "quantized_depthwise_conv1d_ncl.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" @@ -1305,6 +1305,7 @@ def quantized_conv1d_nlc_per_tensor_meta( output_zero_point: int, out_multiplier: int, out_shift: int, + offset: Optional[torch.Tensor] = None, ) -> torch.Tensor: torch._check(bias.dtype == torch.int32, lambda: "expected int32") # NLC format: input is [N, L, C], weight is [OC, K, IC/groups] diff --git a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp index b4e253ef366..6f42543cfc1 100644 --- a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp +++ b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.cpp @@ -256,6 +256,7 @@ ::executorch::aten::Tensor& quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, + __ET_UNUSED const ::executorch::aten::optional& offset, Tensor& out) { (void)ctx; quantized_conv1d_nlc( diff --git a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h index 7713121cf97..4f4d2877b27 100644 --- a/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h +++ b/backends/cadence/generic/operators/op_quantized_conv1d_nlc.h @@ -54,6 +54,7 @@ ::executorch::aten::Tensor& quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, int64_t out_multiplier, int64_t out_shift, + const ::executorch::aten::optional& offset, Tensor& out); } // namespace native diff --git a/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp b/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp index 2ae06a651d2..a8f98a76ffc 100644 --- a/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp +++ b/backends/cadence/generic/operators/op_quantized_depthwise_conv1d_nlc.cpp @@ -57,6 +57,7 @@ ::executorch::aten::Tensor& quantized_depthwise_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + ::executorch::aten::optional(), out); } diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp index d4631752495..5171c2908bc 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp @@ -238,6 +238,7 @@ void quantized_conv1d_nlc_per_tensor_out( int64_t output_zero_point, int64_t out_multiplier, int64_t out_shift, + __ET_UNUSED const ::executorch::aten::optional& offset, Tensor& out) { // HiFi nnlib kernels only support dilation=1. // Fall back to generic implementation for dilation > 1. @@ -258,6 +259,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); return; } @@ -284,6 +286,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); } else { xa_opt_quantized_conv1d_nlc_asym8sxsym8s_asym8s( @@ -320,6 +323,7 @@ void quantized_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + offset, out); } else { xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u( diff --git a/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp b/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp index 9e7e13477ca..4299990b52a 100644 --- a/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp +++ b/backends/cadence/hifi/operators/op_quantized_depthwise_conv1d_nlc.cpp @@ -206,6 +206,7 @@ void quantized_depthwise_conv1d_nlc_per_tensor_out( output_zero_point, out_multiplier, out_shift, + ::executorch::aten::optional(), out); return; }