From 423b0bf075189d9638715b490475ead9e6b53c34 Mon Sep 17 00:00:00 2001 From: SiriusNEO Date: Wed, 29 Apr 2026 18:00:21 +0800 Subject: [PATCH] [Refactor][CodeGen] Refactor CodeGen part for multi-backend decoupling --- CMakeLists.txt | 9 ++++--- src/backend/cuda/CMakeLists.txt | 18 ++++++------- .../cuda/codegen}/codegen_cuda.cc | 8 +++--- .../cuda/codegen}/codegen_cuda.h | 0 .../cuda/codegen}/codegen_cutedsl.cc | 6 ++--- .../cuda/codegen}/codegen_cutedsl.h | 0 .../cuda/codegen}/codegen_py.cc | 2 +- .../cuda/codegen}/codegen_py.h | 0 .../cuda/codegen}/intrin_rule_cuda.cc | 2 +- src/{target => backend/cuda/codegen}/ptx.cc | 0 src/{target => backend/cuda/codegen}/ptx.h | 0 .../cuda/codegen}/rt_mod_cuda.cc | 2 +- .../cuda/codegen}/rt_mod_cutedsl.cc | 0 .../cuda/codegen}/stubs/cuda.cc | 0 .../cuda/codegen}/stubs/cuda.h | 0 .../cuda/codegen}/stubs/cudart.cc | 0 .../cuda/codegen}/stubs/nvrtc.cc | 0 .../cuda/codegen}/stubs/vendor/cuda.h | 0 src/backend/metal/CMakeLists.txt | 2 +- src/backend/metal/codegen/rt_mod_metal.cc | 27 +++++++++++++++++++ src/backend/rocm/CMakeLists.txt | 8 +++--- .../rocm/codegen}/codegen_hip.cc | 4 +-- .../rocm/codegen}/codegen_hip.h | 0 .../rocm/codegen}/intrin_rule_hip.cc | 2 +- .../rocm/codegen}/rt_mod_hip.cc | 0 .../rocm/codegen}/stubs/hip.cc | 0 .../rocm/codegen}/stubs/hip.h | 0 .../rocm/codegen}/stubs/hiprtc.cc | 0 .../rocm/codegen}/stubs/vendor/hip_runtime.h | 0 src/op/builtin.cc | 4 +-- src/op/utils.h | 2 +- src/runtime/runtime.cc | 2 +- src/target/codegen_c_host.h | 7 +++++ 33 files changed, 71 insertions(+), 34 deletions(-) rename src/{target => backend/cuda/codegen}/codegen_cuda.cc (99%) rename src/{target => backend/cuda/codegen}/codegen_cuda.h (100%) rename src/{target => backend/cuda/codegen}/codegen_cutedsl.cc (99%) rename src/{target => backend/cuda/codegen}/codegen_cutedsl.h (100%) rename src/{target => backend/cuda/codegen}/codegen_py.cc (99%) rename src/{target => backend/cuda/codegen}/codegen_py.h (100%) rename src/{target => backend/cuda/codegen}/intrin_rule_cuda.cc (99%) rename src/{target => backend/cuda/codegen}/ptx.cc (100%) rename src/{target => backend/cuda/codegen}/ptx.h (100%) rename src/{target => backend/cuda/codegen}/rt_mod_cuda.cc (99%) rename src/{target => backend/cuda/codegen}/rt_mod_cutedsl.cc (100%) rename src/{target => backend/cuda/codegen}/stubs/cuda.cc (100%) rename src/{target => backend/cuda/codegen}/stubs/cuda.h (100%) rename src/{target => backend/cuda/codegen}/stubs/cudart.cc (100%) rename src/{target => backend/cuda/codegen}/stubs/nvrtc.cc (100%) rename src/{target => backend/cuda/codegen}/stubs/vendor/cuda.h (100%) create mode 100644 src/backend/metal/codegen/rt_mod_metal.cc rename src/{target => backend/rocm/codegen}/codegen_hip.cc (99%) rename src/{target => backend/rocm/codegen}/codegen_hip.h (100%) rename src/{target => backend/rocm/codegen}/intrin_rule_hip.cc (99%) rename src/{target => backend/rocm/codegen}/rt_mod_hip.cc (100%) rename src/{target => backend/rocm/codegen}/stubs/hip.cc (100%) rename src/{target => backend/rocm/codegen}/stubs/hip.h (100%) rename src/{target => backend/rocm/codegen}/stubs/hiprtc.cc (100%) rename src/{target => backend/rocm/codegen}/stubs/vendor/hip_runtime.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index cc8e864e42..2ec4e329d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,7 +169,9 @@ set(USE_GTEST OFF) # Include directories for TileLang set(TILE_LANG_INCLUDES ${TVM_INCLUDES}) -list(APPEND TILE_LANG_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/src) +# Add TileLang's own src/ to include path so cross-directory includes +# can use paths relative to src/ (e.g. "target/utils.h", "op/builtin.h"). +list(INSERT TILE_LANG_INCLUDES 0 "${CMAKE_CURRENT_SOURCE_DIR}/src") # Collect source files file(GLOB TILE_LANG_SRCS @@ -186,8 +188,9 @@ file(GLOB TILE_LANG_SRCS src/target/codegen_c_host.cc src/target/codegen_c.cc src/target/rt_mod_c.cc - # intrin_rule doesn't have system dependency - src/target/intrin_rule*.cc + # intrin_rule doesn't have system dependency; always compiled regardless of backend + src/backend/cuda/codegen/intrin_rule_cuda.cc + src/backend/rocm/codegen/intrin_rule_hip.cc ) # Always include CPU-safe runtime helpers diff --git a/src/backend/cuda/CMakeLists.txt b/src/backend/cuda/CMakeLists.txt index 857ab4c00f..2368f36e74 100644 --- a/src/backend/cuda/CMakeLists.txt +++ b/src/backend/cuda/CMakeLists.txt @@ -29,7 +29,7 @@ if(TILELANG_USE_CUDA_STUBS) # - cuModuleLoadData, cuLaunchKernel, cuMemsetD32_v2, etc. # These can be called directly without any wrapper macros. # ============================================================================ - add_library(cuda_stub SHARED src/target/stubs/cuda.cc) + add_library(cuda_stub SHARED src/backend/cuda/codegen/stubs/cuda.cc) target_include_directories(cuda_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) # Export symbols with visibility="default" when building target_compile_definitions(cuda_stub PRIVATE TILELANG_CUDA_STUB_EXPORTS) @@ -53,7 +53,7 @@ if(TILELANG_USE_CUDA_STUBS) # The stub exports a minimal set of CUDA Runtime API entrypoints used by TVM # and lazily loads libcudart at runtime on first API call. # ============================================================================ - add_library(cudart_stub SHARED src/target/stubs/cudart.cc) + add_library(cudart_stub SHARED src/backend/cuda/codegen/stubs/cudart.cc) target_include_directories(cudart_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) target_compile_definitions(cudart_stub PRIVATE TILELANG_CUDART_STUB_EXPORTS) target_link_libraries(cudart_stub PRIVATE ${CMAKE_DL_LIBS}) @@ -81,7 +81,7 @@ if(TILELANG_USE_CUDA_STUBS) # The stub exports a minimal set of NVRTC C API entrypoints used by TVM and # lazily loads libnvrtc at runtime on first API call. # ============================================================================ - add_library(nvrtc_stub SHARED src/target/stubs/nvrtc.cc) + add_library(nvrtc_stub SHARED src/backend/cuda/codegen/stubs/nvrtc.cc) target_include_directories(nvrtc_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) target_compile_definitions(nvrtc_stub PRIVATE TILELANG_NVRTC_STUB_EXPORTS) target_link_libraries(nvrtc_stub PRIVATE ${CMAKE_DL_LIBS}) @@ -102,13 +102,13 @@ endif() file(GLOB TILE_LANG_CUDA_SRCS src/runtime/runtime.cc - src/target/ptx.cc - src/target/codegen_cuda.cc - src/target/codegen_py.cc + src/backend/cuda/codegen/ptx.cc + src/backend/cuda/codegen/codegen_cuda.cc + src/backend/cuda/codegen/codegen_py.cc src/target/codegen_utils.cc - src/target/codegen_cutedsl.cc - src/target/rt_mod_cuda.cc - src/target/rt_mod_cutedsl.cc + src/backend/cuda/codegen/codegen_cutedsl.cc + src/backend/cuda/codegen/rt_mod_cuda.cc + src/backend/cuda/codegen/rt_mod_cutedsl.cc src/backend/cuda/op/*.cc ) list(REMOVE_ITEM TILE_LANG_CUDA_SRCS diff --git a/src/target/codegen_cuda.cc b/src/backend/cuda/codegen/codegen_cuda.cc similarity index 99% rename from src/target/codegen_cuda.cc rename to src/backend/cuda/codegen/codegen_cuda.cc index fd1c90ff05..d50b291832 100644 --- a/src/target/codegen_cuda.cc +++ b/src/backend/cuda/codegen/codegen_cuda.cc @@ -15,11 +15,11 @@ #include #include -#include "../op/builtin.h" -#include "../transform/common/attr.h" -#include "./ptx.h" -#include "./utils.h" #include "arith/pattern_match.h" +#include "backend/cuda/codegen/ptx.h" +#include "op/builtin.h" +#include "target/utils.h" +#include "transform/common/attr.h" namespace tvm { namespace codegen { diff --git a/src/target/codegen_cuda.h b/src/backend/cuda/codegen/codegen_cuda.h similarity index 100% rename from src/target/codegen_cuda.h rename to src/backend/cuda/codegen/codegen_cuda.h diff --git a/src/target/codegen_cutedsl.cc b/src/backend/cuda/codegen/codegen_cutedsl.cc similarity index 99% rename from src/target/codegen_cutedsl.cc rename to src/backend/cuda/codegen/codegen_cutedsl.cc index 0ee1ba3b09..90a6db59b8 100644 --- a/src/target/codegen_cutedsl.cc +++ b/src/backend/cuda/codegen/codegen_cutedsl.cc @@ -3,8 +3,8 @@ */ #include "codegen_cutedsl.h" -#include "codegen_utils.h" -#include "ptx.h" +#include "backend/cuda/codegen/ptx.h" +#include "target/codegen_utils.h" #include #include #include @@ -19,8 +19,8 @@ #include #include -#include "../op/builtin.h" #include "arith/pattern_match.h" +#include "op/builtin.h" namespace tvm { namespace codegen { diff --git a/src/target/codegen_cutedsl.h b/src/backend/cuda/codegen/codegen_cutedsl.h similarity index 100% rename from src/target/codegen_cutedsl.h rename to src/backend/cuda/codegen/codegen_cutedsl.h diff --git a/src/target/codegen_py.cc b/src/backend/cuda/codegen/codegen_py.cc similarity index 99% rename from src/target/codegen_py.cc rename to src/backend/cuda/codegen/codegen_py.cc index 6e0b787bb2..bcd4b6e819 100644 --- a/src/target/codegen_py.cc +++ b/src/backend/cuda/codegen/codegen_py.cc @@ -2,7 +2,7 @@ * \file codegen_py.cc */ #include "codegen_py.h" -#include "codegen_utils.h" +#include "target/codegen_utils.h" #include #include diff --git a/src/target/codegen_py.h b/src/backend/cuda/codegen/codegen_py.h similarity index 100% rename from src/target/codegen_py.h rename to src/backend/cuda/codegen/codegen_py.h diff --git a/src/target/intrin_rule_cuda.cc b/src/backend/cuda/codegen/intrin_rule_cuda.cc similarity index 99% rename from src/target/intrin_rule_cuda.cc rename to src/backend/cuda/codegen/intrin_rule_cuda.cc index 7658d79cc1..54aea8bd70 100644 --- a/src/target/intrin_rule_cuda.cc +++ b/src/backend/cuda/codegen/intrin_rule_cuda.cc @@ -5,7 +5,7 @@ #include #include -#include "../support/ffi_aliases.h" +#include "support/ffi_aliases.h" #include "target/intrin_rule.h" namespace tvm { diff --git a/src/target/ptx.cc b/src/backend/cuda/codegen/ptx.cc similarity index 100% rename from src/target/ptx.cc rename to src/backend/cuda/codegen/ptx.cc diff --git a/src/target/ptx.h b/src/backend/cuda/codegen/ptx.h similarity index 100% rename from src/target/ptx.h rename to src/backend/cuda/codegen/ptx.h diff --git a/src/target/rt_mod_cuda.cc b/src/backend/cuda/codegen/rt_mod_cuda.cc similarity index 99% rename from src/target/rt_mod_cuda.cc rename to src/backend/cuda/codegen/rt_mod_cuda.cc index 37db80d6ac..3f9b58b1bc 100644 --- a/src/target/rt_mod_cuda.cc +++ b/src/backend/cuda/codegen/rt_mod_cuda.cc @@ -1,8 +1,8 @@ -#include "../transform/common/attr.h" #include "codegen_cuda.h" #include "runtime/cuda/cuda_module.h" #include "runtime/meta_data.h" #include "runtime/pack_args.h" +#include "transform/common/attr.h" #include #include diff --git a/src/target/rt_mod_cutedsl.cc b/src/backend/cuda/codegen/rt_mod_cutedsl.cc similarity index 100% rename from src/target/rt_mod_cutedsl.cc rename to src/backend/cuda/codegen/rt_mod_cutedsl.cc diff --git a/src/target/stubs/cuda.cc b/src/backend/cuda/codegen/stubs/cuda.cc similarity index 100% rename from src/target/stubs/cuda.cc rename to src/backend/cuda/codegen/stubs/cuda.cc diff --git a/src/target/stubs/cuda.h b/src/backend/cuda/codegen/stubs/cuda.h similarity index 100% rename from src/target/stubs/cuda.h rename to src/backend/cuda/codegen/stubs/cuda.h diff --git a/src/target/stubs/cudart.cc b/src/backend/cuda/codegen/stubs/cudart.cc similarity index 100% rename from src/target/stubs/cudart.cc rename to src/backend/cuda/codegen/stubs/cudart.cc diff --git a/src/target/stubs/nvrtc.cc b/src/backend/cuda/codegen/stubs/nvrtc.cc similarity index 100% rename from src/target/stubs/nvrtc.cc rename to src/backend/cuda/codegen/stubs/nvrtc.cc diff --git a/src/target/stubs/vendor/cuda.h b/src/backend/cuda/codegen/stubs/vendor/cuda.h similarity index 100% rename from src/target/stubs/vendor/cuda.h rename to src/backend/cuda/codegen/stubs/vendor/cuda.h diff --git a/src/backend/metal/CMakeLists.txt b/src/backend/metal/CMakeLists.txt index 9dbf33204a..330e3f728b 100644 --- a/src/backend/metal/CMakeLists.txt +++ b/src/backend/metal/CMakeLists.txt @@ -11,7 +11,7 @@ if(NOT APPLE) endif() file(GLOB TILE_LANG_METAL_SRCS - src/target/rt_mod_metal.cc + src/backend/metal/codegen/rt_mod_metal.cc ) list(APPEND TILE_LANG_SRCS ${TILE_LANG_METAL_SRCS}) # FIXME: CIBW failed with backtrace, why??? diff --git a/src/backend/metal/codegen/rt_mod_metal.cc b/src/backend/metal/codegen/rt_mod_metal.cc new file mode 100644 index 0000000000..00f3098897 --- /dev/null +++ b/src/backend/metal/codegen/rt_mod_metal.cc @@ -0,0 +1,27 @@ +/*! + * \file rt_mod_metal.cc + * \brief Metal codegen entry point. + * + * Metal codegen is handled by CodeGenCHost (target/codegen_c_host.cc), which + * has built-in Metal context support via the is_in_metal_context flag. + * When IR contains AttrStmt with attr_key == "metal_context", the host + * codegen emits Metal-specific dispatch_sync / MTLCommandBuffer code. + */ +#include "target/codegen_c_host.h" + +#include + +namespace tvm { +namespace codegen { + +ffi::Module BuildTileLangMetal(IRModule mod, Target target) { + return tl::BuildTileLangCHost(mod, target); +} + +TVM_FFI_STATIC_INIT_BLOCK() { + namespace refl = tvm::ffi::reflection; + refl::GlobalDef().def("target.build.tilelang_metal", BuildTileLangMetal); +} + +} // namespace codegen +} // namespace tvm diff --git a/src/backend/rocm/CMakeLists.txt b/src/backend/rocm/CMakeLists.txt index 75ffce9faf..903bb2fab2 100644 --- a/src/backend/rocm/CMakeLists.txt +++ b/src/backend/rocm/CMakeLists.txt @@ -23,7 +23,7 @@ if(TILELANG_USE_HIP_STUBS) # It also provides minimal HSA wrappers (hsa_init / hsa_shut_down) to avoid a # hard DT_NEEDED dependency on libhsa-runtime64 in ROCm-enabled wheels. # ============================================================================ - add_library(hip_stub SHARED src/target/stubs/hip.cc) + add_library(hip_stub SHARED src/backend/rocm/codegen/stubs/hip.cc) target_include_directories(hip_stub PRIVATE ${ROCM_INCLUDE_DIRS}) target_compile_definitions(hip_stub PRIVATE TILELANG_HIP_STUB_EXPORTS) target_link_libraries(hip_stub PRIVATE ${CMAKE_DL_LIBS}) @@ -40,7 +40,7 @@ if(TILELANG_USE_HIP_STUBS) # This library provides a minimal HIPRTC API surface and lazily loads # libhiprtc.so at runtime. # ============================================================================ - add_library(hiprtc_stub SHARED src/target/stubs/hiprtc.cc) + add_library(hiprtc_stub SHARED src/backend/rocm/codegen/stubs/hiprtc.cc) target_include_directories(hiprtc_stub PRIVATE ${ROCM_INCLUDE_DIRS}) target_compile_definitions(hiprtc_stub PRIVATE TILELANG_HIPRTC_STUB_EXPORTS) target_link_libraries(hiprtc_stub PRIVATE ${CMAKE_DL_LIBS}) @@ -66,8 +66,8 @@ if(TILELANG_USE_HIP_STUBS) endif() file(GLOB TILE_LANG_HIP_SRCS - src/target/codegen_hip.cc - src/target/rt_mod_hip.cc + src/backend/rocm/codegen/codegen_hip.cc + src/backend/rocm/codegen/rt_mod_hip.cc src/backend/rocm/op/*.cc ) list(APPEND TILE_LANG_SRCS ${TILE_LANG_HIP_SRCS}) diff --git a/src/target/codegen_hip.cc b/src/backend/rocm/codegen/codegen_hip.cc similarity index 99% rename from src/target/codegen_hip.cc rename to src/backend/rocm/codegen/codegen_hip.cc index ed141e6c2d..c6ed1323d6 100644 --- a/src/target/codegen_hip.cc +++ b/src/backend/rocm/codegen/codegen_hip.cc @@ -15,9 +15,9 @@ #include #include -#include "../op/builtin.h" +#include "op/builtin.h" #include "target/source/ptx.h" -#include "utils.h" +#include "target/utils.h" namespace tvm { namespace codegen { diff --git a/src/target/codegen_hip.h b/src/backend/rocm/codegen/codegen_hip.h similarity index 100% rename from src/target/codegen_hip.h rename to src/backend/rocm/codegen/codegen_hip.h diff --git a/src/target/intrin_rule_hip.cc b/src/backend/rocm/codegen/intrin_rule_hip.cc similarity index 99% rename from src/target/intrin_rule_hip.cc rename to src/backend/rocm/codegen/intrin_rule_hip.cc index e142d84746..6140278a57 100644 --- a/src/target/intrin_rule_hip.cc +++ b/src/backend/rocm/codegen/intrin_rule_hip.cc @@ -5,7 +5,7 @@ #include #include -#include "../support/ffi_aliases.h" +#include "support/ffi_aliases.h" #include "target/intrin_rule.h" namespace tvm { diff --git a/src/target/rt_mod_hip.cc b/src/backend/rocm/codegen/rt_mod_hip.cc similarity index 100% rename from src/target/rt_mod_hip.cc rename to src/backend/rocm/codegen/rt_mod_hip.cc diff --git a/src/target/stubs/hip.cc b/src/backend/rocm/codegen/stubs/hip.cc similarity index 100% rename from src/target/stubs/hip.cc rename to src/backend/rocm/codegen/stubs/hip.cc diff --git a/src/target/stubs/hip.h b/src/backend/rocm/codegen/stubs/hip.h similarity index 100% rename from src/target/stubs/hip.h rename to src/backend/rocm/codegen/stubs/hip.h diff --git a/src/target/stubs/hiprtc.cc b/src/backend/rocm/codegen/stubs/hiprtc.cc similarity index 100% rename from src/target/stubs/hiprtc.cc rename to src/backend/rocm/codegen/stubs/hiprtc.cc diff --git a/src/target/stubs/vendor/hip_runtime.h b/src/backend/rocm/codegen/stubs/vendor/hip_runtime.h similarity index 100% rename from src/target/stubs/vendor/hip_runtime.h rename to src/backend/rocm/codegen/stubs/vendor/hip_runtime.h diff --git a/src/op/builtin.cc b/src/op/builtin.cc index 450b3dcc67..3ff0df96fa 100644 --- a/src/op/builtin.cc +++ b/src/op/builtin.cc @@ -10,8 +10,8 @@ #include #include -#include "../target/stubs/cuda.h" -#include "../target/utils.h" +#include "backend/cuda/codegen/stubs/cuda.h" +#include "target/utils.h" namespace tvm { namespace tl { diff --git a/src/op/utils.h b/src/op/utils.h index 77e21feda6..83370ed48e 100644 --- a/src/op/utils.h +++ b/src/op/utils.h @@ -6,8 +6,8 @@ #ifndef TVM_TL_OP_UTILS_H_ #define TVM_TL_OP_UTILS_H_ -#include "../target/stubs/cuda.h" #include "./operator.h" +#include "backend/cuda/codegen/stubs/cuda.h" #include "region.h" #include "tvm/runtime/base.h" #include diff --git a/src/runtime/runtime.cc b/src/runtime/runtime.cc index d921bb34db..c59dbb4299 100644 --- a/src/runtime/runtime.cc +++ b/src/runtime/runtime.cc @@ -6,7 +6,7 @@ #include "runtime.h" -#include "../target/stubs/cuda.h" +#include "backend/cuda/codegen/stubs/cuda.h" #include #include #include diff --git a/src/target/codegen_c_host.h b/src/target/codegen_c_host.h index 345caab88a..983e606038 100644 --- a/src/target/codegen_c_host.h +++ b/src/target/codegen_c_host.h @@ -127,6 +127,13 @@ class CodeGenCHost : public tvm::codegen::CodeGenC { } }; +/*! + * \brief Build a TileLang C host module for the given IRModule and target. + * Also handles Metal target through is_in_metal_context flag in IR. + */ +::tvm::ffi::Module BuildTileLangCHost(::tvm::IRModule mod, + ::tvm::Target target); + } // namespace tl } // namespace tvm