Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_subdirectory(llama)
add_subdirectory(minicpm_o)
add_subdirectory(minicpm4)
add_subdirectory(qwen3)
add_subdirectory(qwen3_5)
add_subdirectory(qwen3_service)
add_subdirectory(qwen3_moe)
add_subdirectory(deepseek_ocr)
Expand Down
3 changes: 3 additions & 0 deletions examples/qwen3_5/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_executable(mllm-qwen3-5-runner main.cpp)
target_link_libraries(mllm-qwen3-5-runner PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-qwen3-5-runner PRIVATE ${MLLM_INCLUDE_DIR})
76 changes: 76 additions & 0 deletions examples/qwen3_5/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include <iostream>
#include <fmt/core.h>
#include <mllm/mllm.hpp>
#include <mllm/models/qwen3_5/modeling_qwen3_5.hpp>
#include <mllm/models/qwen3_5/tokenization_qwen3_5.hpp>
#include <mllm/utils/AnyValue.hpp>

using mllm::Argparse;

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model path").required(true);
auto& model_version = Argparse::add<std::string>("-mv|--model_version").help("Model version").required(true);
auto& tokenizer_path = Argparse::add<std::string>("-t|--tokenizer_path").help("Tokenizer directory").required(true);
auto& config_path = Argparse::add<std::string>("-c|--config_path").help("Config path").required(true);

Argparse::parse(argc, argv);

#ifdef MLLM_PERFETTO_ENABLE
mllm::perf::start();
#endif

mllm::ModelFileVersion file_version = mllm::ModelFileVersion::kV1;
if (model_version.get() == "v1") {
file_version = mllm::ModelFileVersion::kV1;
} else if (model_version.get() == "v2") {
file_version = mllm::ModelFileVersion::kV2;
}

if (help.isSet()) {
Argparse::printHelp();
mllm::shutdownContext();
return 0;
}
Comment on lines +30 to +34
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Help flag check may fail due to required argument validation.

The help flag is checked after Argparse::parse(argc, argv) (line 17), but arguments are marked as required(true). If the user runs with just -h, the parser may fail before reaching the help check.

🛠️ Proposed fix

Consider checking for help before validating required arguments, or ensuring Argparse::parse doesn't error on missing required args when -h is present. A common pattern is:

+  // Check for help before full parse
+  for (int i = 1; i < argc; ++i) {
+    if (std::string(argv[i]) == "-h" || std::string(argv[i]) == "--help") {
+      Argparse::printHelp();
+      mllm::shutdownContext();
+      return 0;
+    }
+  }
+
   Argparse::parse(argc, argv);
-
-  if (help.isSet()) {
-    Argparse::printHelp();
-    mllm::shutdownContext();
-    return 0;
-  }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@examples/qwen3_5/main.cpp` around lines 30 - 34, The help-check currently
runs after Argparse::parse(argc, argv) which will fail if required(true)
arguments are missing; modify the flow so help is detected before required-arg
validation by either (a) checking argv for "-h" or "--help" before calling
Argparse::parse, or (b) using a parser option/alternate parse method that allows
short-circuiting validation when help is present; update the code around
Argparse::parse(argc, argv), help.isSet(), and the subsequent
Argparse::printHelp()/mllm::shutdownContext() so that printHelp() is invoked and
the program exits without triggering required-argument errors.


{
auto cfg = mllm::models::qwen3_5::Qwen3_5Config(config_path.get());
auto tokenizer = mllm::models::qwen3_5::Qwen3_5Tokenizer(tokenizer_path.get());
auto model = mllm::models::qwen3_5::Qwen3_5ForCausalLM(cfg);

fmt::print("Qwen3.5 0.8B: {} layers ({} full attention + {} GDN)\n",
cfg.num_hidden_layers, cfg.numFullAttentionLayers(), cfg.numGDNLayers());

auto param = mllm::load(model_path.get(), file_version);
model.load(param);

fmt::print("\n{:*^60}\n", " Qwen3.5 Interactive CLI ");
fmt::print("Enter 'exit' or 'quit' to end the session\n\n");

std::string prompt_text;

fmt::print("Prompt text (or 'exit/quit'): ");
std::getline(std::cin, prompt_text);

try {
fmt::print("Processing...\n");
auto inputs = tokenizer.convertMessage({.prompt = prompt_text});

fmt::print("\nResponse: ");

for (auto& step : model.chat(inputs)) { std::wcout << tokenizer.detokenize(step.cur_token_id) << std::flush; }

fmt::print("\n{}\n", std::string(60, '-'));
} catch (const std::exception& e) { fmt::print("\nError: {}\n{}\n", e.what(), std::string(60, '-')); }

model.perfSummary();
}

#ifdef MLLM_PERFETTO_ENABLE
mllm::perf::stop();
mllm::perf::saveReport("qwen3_5.perf");
#endif

mllm::print("\n");
mllm::memoryReport();
})
5 changes: 4 additions & 1 deletion mllm/backends/qnn/aot/passes/LLM2QnnLoweringPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include "mllm/backends/qnn/aot/visitor/Reduce.hpp"
#include "mllm/backends/qnn/aot/visitor/Equal.hpp"
#include "mllm/backends/qnn/aot/visitor/Sigmoid.hpp"
#include "mllm/backends/qnn/aot/visitor/SiLU.hpp"
#include "mllm/backends/qnn/aot/visitor/RoPE.hpp"
#include "mllm/backends/qnn/aot/visitor/Matmul.hpp"
#include "mllm/backends/qnn/aot/visitor/Repeat.hpp"
#include "mllm/backends/qnn/aot/visitor/Softmax.hpp"
Expand All @@ -39,7 +41,8 @@ LLM2QnnLoweringPass::LLM2QnnLoweringPass() {
QnnAOTViewPattern, QnnAOTIndexPattern, QnnAOTGatherPattern, QnnAOTRMSNormPattern, QnnAOTLinearPattern,
QnnAOTTransposePattern, QnnAOTSlicePattern, QnnAOTConcatPattern, QnnAOTRepeatPattern, QnnAOTMatMulPattern,
QnnAOTReduceMaxPattern, QnnAOTReduceMinPattern, QnnAOTReduceMeanPattern, QnnAOTReduceSumPattern,
QnnAOTEqualPattern, QnnAOTWherePattern, QnnAOTSoftmaxPattern, QnnAOTSigmoidPattern, QnnAOTConv2DPattern>();
QnnAOTEqualPattern, QnnAOTWherePattern, QnnAOTSoftmaxPattern, QnnAOTSigmoidPattern, QnnAOTSiLUPattern,
QnnAOTRoPEPattern, QnnAOTConv2DPattern>();
}

uint8_t LLM2QnnLoweringPass::run(const ir::node_ptr_t& op) {
Expand Down
73 changes: 73 additions & 0 deletions mllm/backends/qnn/aot/visitor/RoPE.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
//
// Lowers RoPE (Rotary Position Embedding) to the custom HTP op from LLaMAPackage.
// The custom op signature: RoPE(input, sin, cos, h_cnt; pose_type) → output
// It supports partial rotation natively via the HVX kernel.

#include "mllm/utils/Common.hpp"
#include "mllm/core/aops/RoPEOp.hpp"
#include "mllm/compile/ir/linalg/Op.hpp"
#include "mllm/compile/ir/builtin/Attribute.hpp"
#include "mllm/compile/ir/tensor/Value.hpp"
#include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp"
#include "mllm/backends/qnn/aot/visitor/RoPE.hpp"
#include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp"

namespace mllm::qnn::aot {

bool QnnAOTRoPEPattern::isMatch(const mllm::ir::op_ptr_t& op) {
return op->isa_<mllm::ir::linalg::RoPEOp>() && (op->getAttr("using_qnn") != nullptr);
}

bool QnnAOTRoPEPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) {
auto env = AOTCompileContext::getInstance().getEnv();

auto rope_op = op->cast_<mllm::ir::linalg::RoPEOp>();
if (!rope_op) {
MLLM_ERROR("Failed to cast to linalg::RoPEOp");
return false;
}

MLLM_RETURN_FALSE_IF_NOT(op->getAttr("qnn_graph_name"));
auto qnn_graph_name = op->getAttr("qnn_graph_name")->cast_<ir::StrAttr>()->data();
MLLM_RETURN_FALSE_IF_NOT(op->getAttr("qnn_context_name"));
auto qnn_context_name = op->getAttr("qnn_context_name")->cast_<ir::StrAttr>()->data();

auto a = rope_op->getAOp();
auto rope_aop = dynamic_cast<mllm::aops::RoPEOp*>(a);
if (!rope_aop) {
MLLM_ERROR("Failed to cast to aops::RoPEOp");
return false;
}

// RoPE inputs: x, sin, cos
auto inputs_it = op->inputs().begin();
auto i_0 = (*inputs_it)->cast_<ir::tensor::TensorValue>(); // input tensor
auto i_sin = (*std::next(inputs_it))->cast_<ir::tensor::TensorValue>(); // sin embeddings
auto i_cos = (*std::next(inputs_it, 2))->cast_<ir::tensor::TensorValue>(); // cos embeddings

// RoPE output
auto o_0 = op->outputs().front()->cast_<ir::tensor::TensorValue>();

Comment on lines +45 to +52
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate RoPE input/output arity before dereferencing iterators.

Line 47, Line 48, and Line 51 assume 3 inputs and 1 output exist. Missing checks here can dereference invalid iterators on malformed IR.

Proposed fix
   // RoPE inputs: x, sin, cos
+  if (op->inputs().size() < 3 || op->outputs().empty()) {
+    MLLM_ERROR("RoPE lowering expects at least 3 inputs and 1 output");
+    return false;
+  }
   auto inputs_it = op->inputs().begin();
   auto i_0 = (*inputs_it)->cast_<ir::tensor::TensorValue>();          // input tensor
   auto i_sin = (*std::next(inputs_it))->cast_<ir::tensor::TensorValue>();  // sin embeddings
   auto i_cos = (*std::next(inputs_it, 2))->cast_<ir::tensor::TensorValue>();  // cos embeddings

As per coding guidelines, "Validate inputs for public APIs and critical internal functions."

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mllm/backends/qnn/aot/visitor/RoPE.cpp` around lines 45 - 52, The code in
RoPE.cpp dereferences op->inputs() and op->outputs() without checking arity
(uses inputs_it, i_0, i_sin, i_cos and o_0); add explicit validation that
op->inputs().size() >= 3 and op->outputs().size() >= 1 before creating inputs_it
or dereferencing std::next, and handle the error path (return, throw, or log and
abort) consistent with surrounding error handling conventions so you never
dereference invalid iterators for the RoPE operator.

// Create the custom HTP RoPE op from LLaMAPackage
auto qnn_op_node = QnnAOTNodeOperation::create("RoPE");
qnn_op_node->setPackageName("LLaMAPackage");

// pose_type parameter: 0 for standard RoPE
// The custom HTP op uses this to select between different RoPE variants
qnn_op_node->emplaceParamScalar(mllm::qnn::QNNParamScalarWrapper::create("pose_type", static_cast<uint32_t>(0)));

Comment on lines +57 to +60
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "1) Verify RoPE kernel pose_type handling in custom package:"
rg -n -C3 'pose_type|only support pose_type' mllm/backends/qnn/custom-op-package/LLaMAPackage/src/ops/RoPE.cpp

echo
echo "2) Verify current lowering hardcodes pose_type:"
rg -n -C3 'emplaceParamScalar|pose_type' mllm/backends/qnn/aot/visitor/RoPE.cpp

echo
echo "3) Find where RoPE variant is represented in IR/AOp to thread through lowering:"
rg -n -C3 'class .*RoPEOp|pose_type' mllm/core mllm/compile mllm/models

Repository: UbiquitousLearning/mllm

Length of output: 6375


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Check RoPEOp struct/class definition ===" 
cat -n mllm/core/aops/RoPEOp.hpp

echo
echo "=== Check visitor code to see how RoPEOp variant is extracted ==="
cat -n mllm/backends/qnn/aot/visitor/RoPE.cpp | head -80

Repository: UbiquitousLearning/mllm

Length of output: 5148


Add pose_type field to RoPEOpOptions and pass through the correct variant to the custom op kernel.

The hardcoded pose_type = 0 at line 59 will cause a fatal error in the custom HTP op. The kernel (mllm/backends/qnn/custom-op-package/LLaMAPackage/src/ops/RoPE.cpp lines 664–665) only supports pose_type == 2 (LLaMA) or pose_type == 4, and rejects all other values with ErrorFatal. The IR representation (RoPEOpOptions) currently lacks a field to capture the variant, so the lowering cannot pass the correct value. Either add a pose_type field to RoPEOpOptions, or derive it from existing parameters (e.g., rope_theta).

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mllm/backends/qnn/aot/visitor/RoPE.cpp` around lines 57 - 60, The lowering
currently hardcodes pose_type=0 in RoPE.cpp when calling
qnn_op_node->emplaceParamScalar, but the custom kernel expects pose_type 2 or 4;
add a pose_type field to the IR struct/class RoPEOpOptions (or derive it
deterministically from an existing field like rope_theta) and populate it from
the op creation site, then replace the hardcoded static_cast<uint32_t>(0) in
RoPE.cpp with the value from RoPEOpOptions (passed through into
QNNParamScalarWrapper::create("pose_type", static_cast<uint32_t>(pose_type))).
Ensure the new field is serialized/constructed where RoPEOpOptions is
instantiated so the lowering can read it.

qnn_op_node->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, i_0))
->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, i_sin))
->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, i_cos))
->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, o_0))
->setName(rope_op->getAOp()->getName());

// Register this op node into one graph.
env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, qnn_op_node);

return true;
}

} // namespace mllm::qnn::aot
25 changes: 25 additions & 0 deletions mllm/backends/qnn/aot/visitor/RoPE.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#pragma once

#include "mllm/core/OpTypes.hpp"
#include "mllm/compile/ir/Node.hpp"
#include "mllm/backends/qnn/aot/visitor/Base.hpp"

namespace mllm::qnn::aot {

// Lowers RoPE to the custom HTP op from LLaMAPackage.
// The custom op handles partial rotation natively (partial_dimension parameter).
class QnnAOTRoPEPattern : public QnnAOTBasePattern {
public:
bool isMatch(const mllm::ir::op_ptr_t& op) override;

bool rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) override;

static inline std::pair<OpTypes, std::shared_ptr<QnnAOTRoPEPattern>> create() {
return {OpTypes::kRoPE, std::make_shared<QnnAOTRoPEPattern>()};
}
};

} // namespace mllm::qnn::aot
67 changes: 67 additions & 0 deletions mllm/backends/qnn/aot/visitor/SiLU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
//
// SiLU(x) = x * sigmoid(x)
// Decomposed into standard QNN ops: Sigmoid + ElementWiseMultiply

#include "mllm/utils/Common.hpp"
#include "mllm/compile/ir/linalg/Op.hpp"
#include "mllm/compile/ir/builtin/Attribute.hpp"
#include "mllm/compile/ir/tensor/Value.hpp"
#include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp"
#include "mllm/backends/qnn/aot/visitor/SiLU.hpp"
#include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp"

namespace mllm::qnn::aot {

bool QnnAOTSiLUPattern::isMatch(const mllm::ir::op_ptr_t& op) {
return op->isa_<mllm::ir::linalg::SiLUOp>() && (op->getAttr("using_qnn") != nullptr);
}

bool QnnAOTSiLUPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) {
auto env = AOTCompileContext::getInstance().getEnv();

auto silu_op = op->cast_<mllm::ir::linalg::SiLUOp>();
if (!silu_op) {
MLLM_ERROR("Failed to cast to linalg::SiLUOp");
return false;
}

MLLM_RETURN_FALSE_IF_NOT(op->getAttr("qnn_graph_name"));
auto qnn_graph_name = op->getAttr("qnn_graph_name")->cast_<ir::StrAttr>()->data();
MLLM_RETURN_FALSE_IF_NOT(op->getAttr("qnn_context_name"));
auto qnn_context_name = op->getAttr("qnn_context_name")->cast_<ir::StrAttr>()->data();

// Input and output tensors
auto i_0 = op->inputs().front()->cast_<ir::tensor::TensorValue>();
auto o_0 = op->outputs().front()->cast_<ir::tensor::TensorValue>();

// Create intermediate tensor for sigmoid output (same shape/dtype as output)
auto sigmoid_out_tensor = Tensor::empty(o_0->tensor_.shape(), o_0->tensor_.dtype());
sigmoid_out_tensor.setName(silu_op->getAOp()->getName() + "_sigmoid_out");
auto sigmoid_out = writer.getContext()->create<ir::tensor::TensorValue>(sigmoid_out_tensor);

// Copy quantization recipe from output to intermediate if available
if (op->getAttr("quant_recipe")) {
sigmoid_out->setAttr("quant_recipe", op->getAttr("quant_recipe"));
}
Comment on lines +44 to +47
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check how other QNN AOT patterns handle quant_recipe
rg -n "quant_recipe" mllm/backends/qnn/aot/visitor/ -A2 -B2

Repository: UbiquitousLearning/mllm

Length of output: 3254


🏁 Script executed:

# Get the full SiLU.cpp file to understand the complete context
cat -n mllm/backends/qnn/aot/visitor/SiLU.cpp

Repository: UbiquitousLearning/mllm

Length of output: 3503


🏁 Script executed:

# Check if there are other activation/unary patterns with optional quant_recipe handling
rg "if.*getAttr.*quant_recipe" mllm/backends/qnn/aot/visitor/ -B5 -A5

Repository: UbiquitousLearning/mllm

Length of output: 1043


🏁 Script executed:

# Count pattern frequency - how many patterns use MLLM_RETURN_FALSE_IF_NOT vs optional check
echo "=== Patterns requiring quant_recipe (MLLM_RETURN_FALSE_IF_NOT) ===" && \
rg "MLLM_RETURN_FALSE_IF_NOT.*quant_recipe" mllm/backends/qnn/aot/visitor/ | wc -l && \
echo "=== Patterns with optional quant_recipe (if check) ===" && \
rg "if.*getAttr.*quant_recipe" mllm/backends/qnn/aot/visitor/ | wc -l

Repository: UbiquitousLearning/mllm

Length of output: 192


Ensure quantization metadata is handled consistently with other QNN patterns.

SiLU treats quant_recipe as optional (lines 45-47), but all other QNN AOT patterns—RMSNorm, Add, Mul, and Neg—require it via MLLM_RETURN_FALSE_IF_NOT. This inconsistency means intermediate sigmoid_out tensors may lack quantization metadata if the attribute is missing, potentially breaking quantization expectations in QNN deployment.

Either make quant_recipe required here for consistency with other patterns, or document why optional handling is intentional for SiLU decomposition.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mllm/backends/qnn/aot/visitor/SiLU.cpp` around lines 44 - 47, The SiLU
visitor currently copies quant_recipe only if present which is inconsistent with
other QNN AOT patterns; update the SiLU handling to require the quantization
recipe the same way RMSNorm/Add/Mul/Neg do by checking for the attribute with
MLLM_RETURN_FALSE_IF_NOT (or equivalent) on op->getAttr("quant_recipe") and then
unconditionally set it on sigmoid_out via sigmoid_out->setAttr("quant_recipe",
op->getAttr("quant_recipe")); this enforces presence of quant metadata and
matches existing QNN patterns.


// Step 1: Sigmoid(input) → sigmoid_out
auto sigmoid_node = QnnAOTNodeOperation::create("Sigmoid");
sigmoid_node->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, i_0))
->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, sigmoid_out))
->setName(silu_op->getAOp()->getName() + "_sigmoid");
env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, sigmoid_node);

// Step 2: ElementWiseMultiply(input, sigmoid_out) → output
auto mul_node = QnnAOTNodeOperation::create("ElementWiseMultiply");
mul_node->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, i_0))
->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, sigmoid_out))
->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, o_0))
->setName(silu_op->getAOp()->getName());
env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, mul_node);

return true;
}

} // namespace mllm::qnn::aot
25 changes: 25 additions & 0 deletions mllm/backends/qnn/aot/visitor/SiLU.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#pragma once

#include "mllm/core/OpTypes.hpp"
#include "mllm/compile/ir/Node.hpp"
#include "mllm/backends/qnn/aot/visitor/Base.hpp"

namespace mllm::qnn::aot {

// SiLU(x) = x * sigmoid(x)
// Decomposed into two standard QNN ops: Sigmoid + ElementWiseMultiply
class QnnAOTSiLUPattern : public QnnAOTBasePattern {
public:
bool isMatch(const mllm::ir::op_ptr_t& op) override;

bool rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) override;

static inline std::pair<OpTypes, std::shared_ptr<QnnAOTSiLUPattern>> create() {
return {OpTypes::kSiLU, std::make_shared<QnnAOTSiLUPattern>()};
}
};

} // namespace mllm::qnn::aot
Loading