Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,43 @@ option(TILELANG_USE_HIP_STUBS
unset(_TILELANG_USE_HIP_STUBS_DEFAULT)
# Configs end

# --- LLVM Detection for Hexagon/CPU Backend ---
if(NOT DEFINED USE_LLVM)
set(USE_LLVM OFF CACHE STRING "Build with LLVM support (required for Hexagon)")
endif()

if(USE_LLVM STREQUAL "ON")
find_program(LLVM_CONFIG_PATH NAMES llvm-config-18 llvm-config-17 llvm-config)
if(LLVM_CONFIG_PATH)
set(USE_LLVM ${LLVM_CONFIG_PATH} CACHE STRING "Path to llvm-config" FORCE)
else()
message(WARNING "USE_LLVM is ON but llvm-config was not found in PATH.")
endif()
endif()

set(_TILELANG_USE_LLVM_SAVED ${USE_LLVM})

# Load TVM sources and base configs
include(cmake/load_tvm.cmake)

if(EXISTS ${TVM_SOURCE}/cmake/config.cmake)
include(${TVM_SOURCE}/cmake/config.cmake)
else()
message(FATAL_ERROR "Nor tvm provided or submodule checkout-ed.")
endif()

# Restore USE_LLVM — TVM's config.cmake may have reset it
if(_TILELANG_USE_LLVM_SAVED AND NOT _TILELANG_USE_LLVM_SAVED STREQUAL "OFF")
set(USE_LLVM ${_TILELANG_USE_LLVM_SAVED} CACHE STRING "LLVM config path" FORCE)
message(STATUS "TileLang: restored USE_LLVM=${USE_LLVM}")
endif()
unset(_TILELANG_USE_LLVM_SAVED)

if(USE_LLVM AND NOT USE_LLVM STREQUAL "OFF")
message(STATUS "TileLang Build with LLVM: ${USE_LLVM}")
add_definitions(-DTILELANG_HEXAGON_ENABLED)
Comment on lines +157 to +186
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

for bin in llvm-config-18 llvm-config-17 llvm-config; do
  if command -v "$bin" >/dev/null 2>&1; then
    "$bin" --version
    "$bin" --targets-built
    exit 0
  fi
done

echo "llvm-config not found in PATH" >&2
exit 1

Repository: tile-ai/tilelang

Length of output: 90


🏁 Script executed:

# First, locate and examine the CMakeLists.txt file
find . -name "CMakeLists.txt" -type f | head -20

Repository: tile-ai/tilelang

Length of output: 180


🏁 Script executed:

# Once found, examine the relevant lines (157-186)
sed -n '157,186p' CMakeLists.txt

Repository: tile-ai/tilelang

Length of output: 1124


🏁 Script executed:

# Search for TILELANG_HEXAGON_ENABLED usage in the codebase
rg -r "TILELANG_HEXAGON_ENABLED" --type cmake --type cpp --type h

Repository: tile-ai/tilelang

Length of output: 457


🏁 Script executed:

# Check for any other LLVM/Hexagon related configurations
rg -i "hexagon" --type cmake --type cpp --type h -B 2 -A 2

Repository: tile-ai/tilelang

Length of output: 5477


🏁 Script executed:

# Look for any cmake configuration files related to LLVM setup
find . -path "*/cmake/*.cmake" -type f | head -20

Repository: tile-ai/tilelang

Length of output: 145


Hexagon support is enabled without verifying LLVM has the Hexagon backend.

When USE_LLVM is set to any truthy value other than "OFF", TILELANG_HEXAGON_ENABLED is unconditionally defined. This happens even if:

  • llvm-config was not found (only a warning is issued on line 162)
  • The discovered LLVM lacks the Hexagon backend

Since hexagon_runtime.cc and Hexagon-specific transforms depend on this definition, the build can advertise Hexagon support and fail later in less obvious places.

Query llvm-config --targets-built after locating it, and only enable Hexagon support if the output contains Hexagon. Additionally, consider making the missing llvm-config a fatal error since Hexagon requires it.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@CMakeLists.txt` around lines 157 - 186, The build currently defines
TILELANG_HEXAGON_ENABLED whenever USE_LLVM is truthy; update the CMake logic
(around the llvm-config discovery that sets LLVM_CONFIG_PATH/USE_LLVM and the
later block that sets TILELANG_HEXAGON_ENABLED) to actually verify the LLVM
toolchain has Hexagon: after finding llvm-config (LLVM_CONFIG_PATH) run
llvm-config --targets-built (or invoke it via execute_process) and check the
output contains "Hexagon"; only then set the TILELANG_HEXAGON_ENABLED definition
and the "TileLang Build with LLVM" status message. If llvm-config is not found
or it does not report Hexagon, change the earlier non-fatal warning into a fatal
error (or at minimum do not enable TILELANG_HEXAGON_ENABLED) so
hexagon_runtime.cc and related transforms are not advertised/compiled
incorrectly; refer to the LLVM_CONFIG_PATH/USE_LLVM and TILELANG_HEXAGON_ENABLED
symbols to locate where to add the execute_process and conditional enablement.

endif()

# Re-apply TileLang's preferred backend settings after TVM's config may have
# overridden the USE_* cache entries.
foreach(BACKEND IN LISTS TILELANG_BACKENDS)
Expand All @@ -170,6 +200,11 @@ set(USE_GTEST OFF)
# Include directories for TileLang
set(TILE_LANG_INCLUDES ${TVM_INCLUDES})

# Include TVM 'src' directory to resolve "tvm/runtime/hexagon/hexagon_htp.h"
list(APPEND TILE_LANG_INCLUDES
${TVM_SOURCE}/src
)

# Collect source files
file(GLOB TILE_LANG_SRCS
src/*.cc
Expand All @@ -188,6 +223,7 @@ file(GLOB TILE_LANG_SRCS
# Always include CPU-safe runtime helpers
list(APPEND TILE_LANG_SRCS
src/runtime/error_helpers.cc
src/runtime/hexagon_runtime.cc
)

set(TILELANG_OUTPUT_TARGETS tilelang tvm)
Expand Down
38 changes: 38 additions & 0 deletions src/runtime/hexagon_runtime.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include <tvm/ffi/function.h>
#include <tvm/ffi/reflection/registry.h>

#ifdef TILELANG_HEXAGON_ENABLED

#include <runtime/hexagon/hexagon_htp.h>

namespace tvm {
namespace tilelang {

TVM_FFI_STATIC_INIT_BLOCK() {
namespace refl = tvm::ffi::reflection;

refl::GlobalDef().def_packed(
"tilelang.hexagon.hmx_kernel_launch",
[](ffi::PackedArgs args, ffi::Any *rv) {
// args[0] is the kernel Function; remaining args are forwarded to it.
// AnyView supports .cast<T>() for type-safe extraction.
ffi::Function kernel = args[0].cast<ffi::Function>();

// PackedArgs(const AnyView* data, int32_t size) — slice past the first
// arg. args.data() returns const AnyView*, args.size() returns int32_t.
ffi::PackedArgs kernel_args(args.data() + 1, args.size() - 1);
Comment on lines +16 to +23
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Validate args.size() before reading args[0] and slicing the tail.

This packed function is globally callable from Python/C++. An empty call will access args[0] and build PackedArgs(..., -1) before the HMX guard runs, so the failure mode is much harsher than a normal runtime error.

Proposed fix
       "tilelang.hexagon.hmx_kernel_launch",
       [](ffi::PackedArgs args, ffi::Any *rv) {
+        ICHECK_GE(args.size(), 1)
+            << "tilelang.hexagon.hmx_kernel_launch expects a kernel function as arg0";
         // args[0] is the kernel Function; remaining args are forwarded to it.
         // AnyView supports .cast<T>() for type-safe extraction.
         ffi::Function kernel = args[0].cast<ffi::Function>();
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
[](ffi::PackedArgs args, ffi::Any *rv) {
// args[0] is the kernel Function; remaining args are forwarded to it.
// AnyView supports .cast<T>() for type-safe extraction.
ffi::Function kernel = args[0].cast<ffi::Function>();
// PackedArgs(const AnyView* data, int32_t size) — slice past the first
// arg. args.data() returns const AnyView*, args.size() returns int32_t.
ffi::PackedArgs kernel_args(args.data() + 1, args.size() - 1);
[](ffi::PackedArgs args, ffi::Any *rv) {
ICHECK_GE(args.size(), 1)
<< "tilelang.hexagon.hmx_kernel_launch expects a kernel function as arg0";
// args[0] is the kernel Function; remaining args are forwarded to it.
// AnyView supports .cast<T>() for type-safe extraction.
ffi::Function kernel = args[0].cast<ffi::Function>();
// PackedArgs(const AnyView* data, int32_t size) — slice past the first
// arg. args.data() returns const AnyView*, args.size() returns int32_t.
ffi::PackedArgs kernel_args(args.data() + 1, args.size() - 1);
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/runtime/hexagon_runtime.cc` around lines 16 - 23, Ensure you validate
args.size() before accessing args[0] or constructing ffi::PackedArgs to avoid UB
when called with zero arguments: in the lambda that reads args and creates
kernel and kernel_args, first check that args.size() >= 1, and if not set an
appropriate runtime error/return (e.g., populate rv with an error or throw a
tvm/ffi runtime error) instead of proceeding; specifically modify the block that
uses args[0], ffi::Function kernel, and ffi::PackedArgs kernel_args(args.data()
+ 1, args.size() - 1) to perform the size check and early error return.


// RAII: powers on HMX on construction, releases on scope exit.
tvm::runtime::hexagon::HexagonHtp htp;

kernel.CallPacked(kernel_args, rv);
});
}

} // namespace tilelang
} // namespace tvm

#else
// Hexagon runtime support disabled.
// Build with -DUSE_LLVM=ON to enable HMX kernel launch support.
#endif // TILELANG_HEXAGON_ENABLED
99 changes: 99 additions & 0 deletions src/transform/lower_hexagon_intrinsics.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <tvm/ffi/reflection/registry.h>
#include <tvm/target/target_info.h>
#include <tvm/tir/builtin.h>
#include <tvm/tir/expr.h>
#include <tvm/tir/op.h>
#include <tvm/tir/stmt_functor.h>
#include <tvm/tir/transform.h>

namespace tvm {
namespace tilelang {

using namespace tir;
using tvm::ffi::Array;

class HexagonIntrinsicLowerer : public StmtExprMutator {
public:
HexagonIntrinsicLowerer() {}

Stmt Run(Stmt stmt) { return this->VisitStmt(stmt); }

Stmt VisitStmt_(const EvaluateNode *op) override {
if (const CallNode *call = op->value.as<CallNode>()) {
if (call->op.same_as(builtin::call_extern())) {
if (const StringImmNode *func_name =
call->args[0].as<StringImmNode>()) {

// Lower HMX MMA placeholder
if (func_name->value == "hmx_mma_placeholder") {
Array<PrimExpr> new_args;
new_args.push_back(StringImm("HexKL_mma_i8acc32"));
new_args.push_back(
call->args[3]); // C_acc (accumulator — first arg to HexKL)
new_args.push_back(call->args[1]); // A_vtcm
new_args.push_back(call->args[2]); // B_vtcm
return Evaluate(
Comment on lines +28 to +35
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Guard the placeholder arity before indexing call->args[3].

This pass is globally callable on arbitrary TIR, so a malformed call_extern("hmx_mma_placeholder", ...) with fewer than four arguments will trip TVM's bounds checks and fail the compile with a hard error. Validate the expected operand count before rewriting, then either keep the node unchanged or emit a targeted diagnostic.

Proposed fix
           // Lower HMX MMA placeholder
           if (func_name->value == "hmx_mma_placeholder") {
+            ICHECK_EQ(call->args.size(), 4)
+                << "hmx_mma_placeholder expects exactly 3 operands";
             Array<PrimExpr> new_args;
             new_args.push_back(StringImm("HexKL_mma_i8acc32"));
             new_args.push_back(
                 call->args[3]); // C_acc (accumulator — first arg to HexKL)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (func_name->value == "hmx_mma_placeholder") {
Array<PrimExpr> new_args;
new_args.push_back(StringImm("HexKL_mma_i8acc32"));
new_args.push_back(
call->args[3]); // C_acc (accumulator — first arg to HexKL)
new_args.push_back(call->args[1]); // A_vtcm
new_args.push_back(call->args[2]); // B_vtcm
return Evaluate(
if (func_name->value == "hmx_mma_placeholder") {
ICHECK_EQ(call->args.size(), 4)
<< "hmx_mma_placeholder expects exactly 3 operands";
Array<PrimExpr> new_args;
new_args.push_back(StringImm("HexKL_mma_i8acc32"));
new_args.push_back(
call->args[3]); // C_acc (accumulator — first arg to HexKL)
new_args.push_back(call->args[1]); // A_vtcm
new_args.push_back(call->args[2]); // B_vtcm
return Evaluate(
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/transform/lower_hexagon_intrinsics.cc` around lines 28 - 35, The rewrite
for the "hmx_mma_placeholder" intrinsic currently indexes call->args[3] without
checking arity; update the branch that handles func_name->value ==
"hmx_mma_placeholder" in lower_hexagon_intrinsics.cc to first validate
call->args.size() >= 4 and only perform the Array<PrimExpr> construction and
return Evaluate(...) when that check passes; if the check fails, preserve and
return the original Call node (or emit a targeted diagnostic/log) instead of
indexing out of bounds to avoid hard runtime/compile errors.

Call(DataType::Int(32), builtin::call_extern(), new_args));
}

// HexagonDmaCopy is not yet available in HexKL v73.
// to-do: LowerHexagonDMA pass.
}
}
}
return StmtExprMutator::VisitStmt_(op);
}
};

namespace transform {

tvm::transform::Pass LowerHexagonIntrinsics() {
auto pass_func = [=](PrimFunc f, IRModule m,
tvm::transform::PassContext ctx) {
auto *n = f.CopyOnWrite();
n->body = HexagonIntrinsicLowerer().Run(std::move(n->body));
return f;
};
return tvm::tir::transform::CreatePrimFuncPass(
pass_func, 0, "tilelang.transform.LowerHexagonIntrinsics", {});
}

// Memory scope descriptors
// These are queried by TVM's storage analysis to understand capacity/alignment.
// Fields confirmed from tvm/target/target_info.h:
// unit_bits — addressable unit size in bits
// max_num_bits — total memory capacity in bits
// max_simd_bits — widest SIMD operation in bits (HVX = 1024-bit)
// head_address — base address PrimExpr (IntImm 0 = no fixed mapping)

static MemoryInfo GetHmxAccMem() {
auto n = tvm::ffi::make_object<MemoryInfoNode>();
// HMX accumulator register file: 32×32 int32 = 32768 bits
n->unit_bits = 32; // 32-bit int32 elements
n->max_num_bits = 32LL * 32 * 32; // 32768 bits total
n->max_simd_bits = 1024; // HVX vector width
n->head_address = IntImm(DataType::Int(32), 0);
return MemoryInfo(n);
}

static MemoryInfo GetVtcmMem() {
auto n = tvm::ffi::make_object<MemoryInfoNode>();
// VTCM on Hexagon v73: 8 MB
n->unit_bits = 8; // byte-addressable
n->max_num_bits = 8LL * 1024 * 1024 * 8; // 8 MB in bits
n->max_simd_bits = 1024; // HVX vector width
n->head_address = IntImm(DataType::Int(32), 0);
return MemoryInfo(n);
}

TVM_FFI_STATIC_INIT_BLOCK() {
namespace refl = tvm::ffi::reflection;
refl::GlobalDef()
.def("tilelang.transform.LowerHexagonIntrinsics", LowerHexagonIntrinsics)
.def("tvm.info.mem.global.hmx.acc", GetHmxAccMem)
.def("tvm.info.mem.global.vtcm", GetVtcmMem);
}

} // namespace transform
} // namespace tilelang
} // namespace tvm
141 changes: 141 additions & 0 deletions testing/python/hexagon/diagnose_hmx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import pytest
from tilelang import tvm as tvm
import tilelang as tl
import tilelang.language as T
from tilelang.intrinsics.hexagon import hmx


def has_hexagon_codegen():
try:
if not tvm.runtime.enabled("llvm"):
return False
tvm.target.Target("llvm -mtriple=hexagon -mcpu=hexagonv73")
return True
except Exception:
return False
Comment on lines +9 to +15
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

cat -n testing/python/hexagon/diagnose_hmx.py | head -120

Repository: tile-ai/tilelang

Length of output: 5134


🏁 Script executed:

cat -n testing/python/hexagon/diagnose_hmx.py | sed -n '110,150p'

Repository: tile-ai/tilelang

Length of output: 1775


🏁 Script executed:

# Search for LowerHMXIntrinsics and LowerHexagonIntrinsics in the codebase
rg "LowerHMXIntrinsics|LowerHexagonIntrinsics" -i

Repository: tile-ai/tilelang

Length of output: 793


🏁 Script executed:

# Check the lower.py file to see what exceptions might be expected
cat -n tilelang/engine/lower.py | head -60

Repository: tile-ai/tilelang

Length of output: 2467


🏁 Script executed:

# Check what exceptions TVM Target raises by looking for its usage and error handling
rg "tvm.target.Target" -A 3 -B 1 | head -80

Repository: tile-ai/tilelang

Length of output: 5048


🏁 Script executed:

# Look for TVM exception types and how Target errors are typically caught
rg "except.*Error|except.*Exception" testing/python/hexagon/diagnose_hmx.py -B 3

Repository: tile-ai/tilelang

Length of output: 358


🏁 Script executed:

# Check if there are TVM-specific exception types in the codebase
rg "from tvm|import.*tvm" testing/python/hexagon/diagnose_hmx.py | head -10

Repository: tile-ai/tilelang

Length of output: 92


🏁 Script executed:

# Check TVM exception handling patterns in the codebase
rg "tvm.error|TVMError|from tvm.*import|except.*as e" -A 2 | head -100

Repository: tile-ai/tilelang

Length of output: 5401


🏁 Script executed:

# Check the diagnostic test more carefully - what's the context around line 141?
cat -n testing/python/hexagon/diagnose_hmx.py | sed -n '135,145p'

Repository: tile-ai/tilelang

Length of output: 561


🏁 Script executed:

# Verify the actual pass name references
rg "LowerHMXIntrinsics|LowerHexagonIntrinsics" -B 2 -A 2

Repository: tile-ai/tilelang

Length of output: 2746


🏁 Script executed:

# Check if there are similar skip-gate functions elsewhere in the codebase with better exception handling
rg "pytest.mark.skipif" -B 2 -A 2 | head -60

Repository: tile-ai/tilelang

Length of output: 4792


Narrow the skip-gate exception handling to prevent silent skips.

Line 14 catches every exception and returns False, which can hide real regressions and silently skip both gated tests (Lines 64 and 110). Catch only expected probe failures and re-raise unexpected exceptions.

Suggested fix
 def has_hexagon_codegen():
     try:
         if not tvm.runtime.enabled("llvm"):
             return False
         tvm.target.Target("llvm -mtriple=hexagon -mcpu=hexagonv73")
         return True
-    except Exception:
-        return False
+    except Exception as err:
+        msg = str(err).lower()
+        # Expected probe failures: missing Hexagon/LLVM target support.
+        if "hexagon" in msg or "llvm" in msg or "target" in msg:
+            return False
+        raise

Please verify and, if available in your TVM version, prefer a concrete TVM exception type (e.g., TVM-specific error class) over message matching.

Also applies to: 64-65, 110-111

🧰 Tools
🪛 Ruff (0.15.12)

[warning] 14-14: Do not catch blind exception: Exception

(BLE001)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@testing/python/hexagon/diagnose_hmx.py` around lines 9 - 15, The try/except
in diagnose_hmx.py currently swallows all exceptions when probing LLVM/Hexagon
target (the block that calls tvm.runtime.enabled("llvm") and
tvm.target.Target(...)); replace the broad except Exception with a narrow catch
for the expected TVM probe error (e.g., tvm.error.TVMError or the TVM-specific
probe exception available in your TVM version, falling back to RuntimeError only
if necessary) and re-raise any other exceptions so real failures are not
silenced; apply the same change to the other probe sites referenced (the blocks
around lines with the gated tests at the other two probe locations) and ensure
error handling logs or returns False only for the known probe failure type while
allowing unexpected exceptions to propagate.



def build_hmx_matmul(M, N, K):
@T.prim_func
def main(
A_host: T.Tensor((M, K), "int8"),
B_host: T.Tensor((K, N), "int8"),
C_host: T.Tensor((M, N), "int32"),
):
A_vtcm = T.alloc_fragment((M, K), "int8", scope="global.vtcm")
B_vtcm = T.alloc_fragment((K, N), "int8", scope="global.vtcm")
C_acc = T.alloc_fragment((M, N), "int32", scope="global.hmx.acc")

for i, k in T.grid(M, K):
A_vtcm[i, k] = A_host[i, k]
for k, j in T.grid(K, N):
B_vtcm[k, j] = B_host[k, j]
for i, j in T.grid(M, N):
C_acc[i, j] = T.cast(0, "int32")

hmx.mma(A_vtcm, B_vtcm, C_acc)

for i, j in T.grid(M, N):
C_host[i, j] = C_acc[i, j]

return main


# Diagnostics (always run, no skipif)
def test_000_environment():
"""Report the full environment so we know exactly what we're working with."""
print("\n")
print("=" * 60)
print("ENVIRONMENT REPORT")
print("=" * 60)
print(f" tvm.__file__ : {tvm.__file__}")
print(f" tvm.__version__ : {tvm.__version__}")
print(f" llvm enabled : {tvm.runtime.enabled('llvm')}")
print(f" has_hexagon_codegen(): {has_hexagon_codegen()}")

try:
t = tvm.target.Target("llvm -mtriple=hexagon -mcpu=hexagonv73")
print(f" hexagon target : OK → {t}")
except Exception as e:
print(f" hexagon target : FAILED → {e}")
print("=" * 60)


@pytest.mark.skipif(not has_hexagon_codegen(), reason="Hexagon LLVM not available")
def test_001_ir_dump():
"""Dump the full kernel_source so we can see what was actually generated."""
M, N, K = 32, 32, 32
func = build_hmx_matmul(M, N, K)
target = tvm.target.Target("llvm -mtriple=hexagon -mcpu=hexagonv73")
kernel = tl.compile(func, target=target)
ir = kernel.kernel_source

print("\n")
print("=" * 60)
print("FULL KERNEL SOURCE")
print("=" * 60)
print(ir)
print("=" * 60)

# Report which assertions would pass/fail without actually asserting
checks = {
'target triple = "hexagon"': "target triple",
"A_vtcm": "VTCM alloc A",
"B_vtcm": "VTCM alloc B",
"C_acc": "HMX accumulator",
"hmx_mma_placeholder": "placeholder NOT lowered (bad)",
"HexKL_mma_i8acc32": "HexKL intrinsic (good)",
"HexKL_mma_i8i32": "HexKL alt spelling",
"call_extern": "any call_extern",
"llvm.hexagon": "LLVM hexagon intrinsic",
}

print("\nASSERTION PROBE RESULTS:")
for needle, label in checks.items():
found = needle in ir
status = "✓ FOUND " if found else "✗ MISSING"
print(f" {status} [{label}] '{needle}'")

print()
# Only hard-assert on things we're sure about
assert 'target triple = "hexagon"' in ir, "Not even targeting Hexagon — target string is wrong or codegen didn't run"


@pytest.mark.skipif(not has_hexagon_codegen(), reason="Hexagon LLVM not available")
def test_002_hmx_lowering_status():
"""Specifically check whether HMX intrinsics were lowered or are still placeholders."""
M, N, K = 32, 32, 32
func = build_hmx_matmul(M, N, K)
target = tvm.target.Target("llvm -mtriple=hexagon -mcpu=hexagonv73")
kernel = tl.compile(func, target=target)
ir = kernel.kernel_source

placeholder_present = "hmx_mma_placeholder" in ir
hexkl_present = any(
s in ir
for s in [
"HexKL_mma_i8acc32",
"HexKL_mma_i8i32",
"HexKL_mma",
]
)
llvm_intrin_present = "llvm.hexagon" in ir

print(f"\n placeholder still in IR : {placeholder_present}")
print(f" HexKL intrinsic in IR : {hexkl_present}")
print(f" llvm.hexagon in IR : {llvm_intrin_present}")

if placeholder_present:
pytest.fail(
"hmx_mma_placeholder was NOT lowered.\n"
"_lower_hexagon_intrinsics is not wired into the compile pipeline.\n"
"Check lower() in tilelang/engine/lower.py"
)
elif not hexkl_present and not llvm_intrin_present:
pytest.fail(
"HMX placeholder is gone but no HexKL/llvm.hexagon intrinsic was emitted.\n"
"The lowering pass may be silently dropping the MMA op.\n"
"Check LowerHMXIntrinsics implementation."
Comment thread
coderabbitai[bot] marked this conversation as resolved.
)
else:
print(" ✓ HMX intrinsics correctly lowered")
Loading