openvinotoolkit · vatsalashanubhag · Mar 18, 2025 · Mar 18, 2025
@@ -12,9 +12,13 @@
 #include "openvino/op/divide.hpp"
 #include "openvino/op/maximum.hpp"
 #include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/shape_of.hpp"
 #include "openvino/op/sigmoid.hpp"
+#include "openvino/op/softmax.hpp"
 #include "openvino/op/subtract.hpp"
 #include "utils/common.hpp"
+#include "utils/reshape.hpp"
 
 using namespace ov::op;
 
@@ -102,10 +106,32 @@ ov::OutputVector qlinear_avg_pool(const ov::frontend::onnx::Node& node) {
     });
 }
 
+ov::OutputVector qlinear_softmax(const ov::frontend::onnx::Node& node) {
+    return qlinear_activation(node, [&](const std::shared_ptr<ov::Node>& input_dequantized) {
+        auto axis = node.get_attribute_value<int64_t>("axis", -1);
+        auto opset = node.get_attribute_value<int64_t>("opset");
+
+        auto shape = std::make_shared<v3::ShapeOf>(input_dequantized);
+
+        std::shared_ptr<ov::Node> softmax_result;
+        if (opset <= 12) {
+            const auto coerced_data = ov::op::util::flatten(input_dequantized, static_cast<int>(axis));
+            softmax_result = std::make_shared<v8::Softmax>(coerced_data, 1);
+            softmax_result = std::make_shared<v1::Reshape>(softmax_result, shape, false);
+        } else {
+            softmax_result = std::make_shared<v8::Softmax>(input_dequantized, axis);
+        }
+
+        return softmax_result;
+    });
+}
+
 bool register_multiple_operators(void) {
     ONNX_OP_M("QLinearSigmoid", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_sigmoid, MICROSOFT_DOMAIN);
     ONNX_OP_M("QLinearLeakyRelu", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_leaky_relu, MICROSOFT_DOMAIN);
     ONNX_OP_M("QLinearAveragePool", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_avg_pool, MICROSOFT_DOMAIN);
+    ONNX_OP_M("QLinearSoftmax", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_softmax, MICROSOFT_DOMAIN);
+
     return true;
 }
 

@@ -0,0 +1,101 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+producer_version: ""
+model_version: 0
+graph {
+  name: "test_qlinear_softmax_opset12"
+
+  node {
+    input: "X"
+    input: "X_scale"
+    input: "X_zero_point"
+    input: "Y_scale"
+    input: "Y_zero_point"
+    output: "Y"
+    op_type: "QLinearSoftmax"
+    attribute {
+      name: "opset"
+      i: 12
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 4 }
+          dim { dim_value: 5 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "X_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "X_zero_point"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "Y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "Y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 4 }
+          dim { dim_value: 5 }
+        }
+      }
+    }
+  }
+}
+
+opset_import {
+  version: 1
+}
@@ -0,0 +1,106 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+producer_version: ""
+model_version: 0
+graph {
+  name: "test_qlinear_softmax"
+
+  node {
+    input: "X"
+    input: "X_scale"
+    input: "X_zero_point"
+    input: "Y_scale"
+    input: "Y_zero_point"
+    output: "Y"
+    op_type: "QLinearSoftmax"
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+    attribute {
+      name: "opset"
+      i: 12
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 2 }
+          dim { dim_value: 3 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "X_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "X_zero_point"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "Y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  input {
+    name: "Y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 1 }
+        }
+      }
+    }
+  }
+
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim { dim_value: 2 }
+          dim { dim_value: 3 }
+        }
+      }
+    }
+  }
+}
+
+opset_import {
+  version: 1
+}
@@ -1740,3 +1740,48 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_bias_add) {
 
     test_case.run();
 }
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_qlinearsoftmax) {
+    const auto model = convert_model("com.microsoft/qlinear_softmax_opset13.onnx");
+    auto test_case = ov::test::TestCase(model, s_device);
+
+    const std::vector<int8_t> data_X{-50, -25, 0, 25, 50, 75};
+    const std::vector<float> x_scale{0.1f};
+    const std::vector<int8_t> x_zero_point{0};
+    const std::vector<float> y_scale{0.03f};
+    const std::vector<int8_t> y_zero_point{5};
+
+    const std::vector<int8_t> expected_output{5, 7, 35, 5, 7, 35};
+
+    test_case.add_input<int8_t>(Shape{2, 3}, data_X);
+    test_case.add_input<float>(Shape{1}, x_scale);
+    test_case.add_input<int8_t>(Shape{1}, x_zero_point);
+    test_case.add_input<float>(Shape{1}, y_scale);
+    test_case.add_input<int8_t>(Shape{1}, y_zero_point);
+
+    test_case.add_expected_output<int8_t>(Shape{2, 3}, expected_output);
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_qlinearsoftmax_opset12) {
+    const auto model = convert_model("com.microsoft/qlinear_softmax_opset12.onnx");
+    auto test_case = ov::test::TestCase(model, s_device);
+
+    const std::vector<int8_t> data_X{-60, -40, -20, 0, 20, 40,  60,  80,  100, 120,
+                                     -10, -5,  0,   5, 10, -80, -60, -40, -20, 0};
+    const std::vector<float> x_scale{0.15f};
+    const std::vector<int8_t> x_zero_point{0};
+    const std::vector<float> y_scale{0.05f};
+    const std::vector<int8_t> y_zero_point{3};
+
+    const std::vector<int8_t> expected_output{3, 3, 3, 3, 22, 3, 3, 3, 3, 22, 3, 4, 5, 8, 13, 3, 3, 3, 3, 22};
+
+    test_case.add_input<int8_t>(Shape{4, 5}, data_X);
+    test_case.add_input<float>(Shape{1}, x_scale);
+    test_case.add_input<int8_t>(Shape{1}, x_zero_point);
+    test_case.add_input<float>(Shape{1}, y_scale);
+    test_case.add_input<int8_t>(Shape{1}, y_zero_point);
+
+    test_case.add_expected_output<int8_t>(Shape{4, 5}, expected_output);
+    test_case.run();
+}