Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0e0d0a0
[slimtensor] integration into backend
Gasoonjia Jan 13, 2026
58b70ce
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 13, 2026
40cf5ea
make cmake work
Gasoonjia Jan 13, 2026
814ddf0
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 14, 2026
7374bee
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 14, 2026
029540a
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 14, 2026
e38cc02
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 16, 2026
3ad7636
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 22, 2026
64bb069
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 22, 2026
86d7e43
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 23, 2026
8c32492
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 23, 2026
c225e32
parakeet works
Gasoonjia Jan 26, 2026
030a931
parakeet works
Gasoonjia Jan 26, 2026
4fa4dfc
whisper works
Gasoonjia Jan 27, 2026
5e9f654
parakeet works - 2
Gasoonjia Jan 27, 2026
512a3e4
remove nonnecessary debug info
Gasoonjia Jan 27, 2026
18afded
polish cuda backend.cpp comment
Gasoonjia Jan 27, 2026
75287c4
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 27, 2026
ff05337
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 27, 2026
f5af4a8
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 29, 2026
6ccb691
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 29, 2026
7d6a571
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 29, 2026
cfde842
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 29, 2026
c5eea82
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 29, 2026
f8a812e
Update on "[slimtensor] integration into backend"
Gasoonjia Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ if(EXECUTORCH_ENABLE_BUNDLE_IO)
add_definitions(-DET_BUNDLE_IO_ENABLED)
endif()

if(EXECUTORCH_BUILD_CUDA)
add_definitions(-DCUDA_AVAILABLE=1)
endif()

# -ffunction-sections -fdata-sections: breaks function and data into sections so
# they can be properly gc'd. -s: strip symbol.
if(WIN32)
Expand Down
44 changes: 43 additions & 1 deletion backends/aoti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ endif()
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
find_package_torch()

# Common AOTI functionality - combines all AOTI common components
# ==============================================================================
# AOTI common shims using ETensor (for Metal backend)
# TODO(gasoonjia): Remove this after metal migration
# ==============================================================================
set(_aoti_common_sources common_shims.cpp)
add_library(aoti_common STATIC ${_aoti_common_sources})
target_include_directories(
Expand Down Expand Up @@ -59,3 +62,42 @@ install(
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)

# ==============================================================================
# AOTI common shims using SlimTensor (for CUDA backend) Uses SlimTensor for all
# tensor operations
# TODO(gasoonjia): Replace aoti_common with this one after metal migration
# ==============================================================================
add_library(aoti_common_shims_slim STATIC common_shims_slim.cpp)
target_include_directories(
aoti_common_shims_slim
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
)
target_compile_options(
aoti_common_shims_slim
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/EHsc /GR>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions -frtti -fPIC>
)
target_compile_definitions(
aoti_common_shims_slim PUBLIC $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
)

# Add CUDA include directories and link CUDA runtime when building with CUDA
if(EXECUTORCH_BUILD_CUDA)
find_package(CUDAToolkit REQUIRED)
target_include_directories(
aoti_common_shims_slim PUBLIC ${CUDAToolkit_INCLUDE_DIRS}
)
target_link_libraries(aoti_common_shims_slim PUBLIC CUDA::cudart)
endif()

target_link_libraries(
aoti_common_shims_slim PUBLIC slimtensor extension_tensor ${CMAKE_DL_LIBS}
)

install(
TARGETS aoti_common_shims_slim
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
21 changes: 20 additions & 1 deletion backends/aoti/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def define_common_targets():
],
)

# AOTI common shims functionality
# AOTI common shims functionality using ETensor
# TODO(gasoonjia): Remove this after metal migration
runtime.cxx_library(
name = "common_shims",
srcs = [
Expand Down Expand Up @@ -89,6 +90,7 @@ def define_common_targets():

# SlimTensor-based common shims library
# Uses SlimTensor for all tensor operations
# TODO(gasoonjia): Replace common_shims with this one after metal migration
runtime.cxx_library(
name = "common_shims_slim",
srcs = [
Expand All @@ -97,10 +99,27 @@ def define_common_targets():
headers = [
"common_shims_slim.h",
"export.h",
"utils.h",
],
visibility = ["@EXECUTORCH_CLIENTS"],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/backends/aoti/slim/core:slimtensor",
],
)

# Common AOTI functionality for SlimTensor-based backends (combining common_shims_slim and delegate_handle)
# All CUDA backend code should depend on this target
# TODO(gasoonjia): Replace aoti_common with this one after metal migration
runtime.cxx_library(
name = "aoti_common_slim",
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
supports_python_dlopen = True,
visibility = ["PUBLIC"],
exported_deps = [
":common_shims_slim",
":delegate_handle",
],
)
11 changes: 8 additions & 3 deletions backends/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,18 @@ install(

# CUDA-specific AOTI shim symbols (dynamically linked)
set(_aoti_cuda_shim_sources
runtime/shims/memory.cpp runtime/shims/tensor_attribute.cpp
runtime/guard.cpp runtime/shims/cuda_guard.cpp runtime/shims/int4mm.cu
${EXECUTORCH_ROOT}/backends/aoti/common_shims.cpp
runtime/shims/memory.cpp
runtime/shims/cuda_guard.cpp
runtime/shims/int4mm.cu
${EXECUTORCH_ROOT}/backends/aoti/common_shims_slim.cpp
${EXECUTORCH_ROOT}/backends/aoti/slim/cuda/guard.cpp
)

add_library(aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources})

# Define CUDA_AVAILABLE to use SlimTensor on GPU in common_shims_slim.h
target_compile_definitions(aoti_cuda_shims PRIVATE CUDA_AVAILABLE=1)

# Define export macros for shared library
if(MSVC)
target_compile_definitions(aoti_cuda_shims PRIVATE EXPORT_AOTI_FUNCTIONS)
Expand Down
67 changes: 12 additions & 55 deletions backends/cuda/runtime/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,6 @@ load("//tools/build/buck:nvcc_flags.bzl", "get_nvcc_arch_args")

oncall("executorch")

runtime.cxx_library(
name = "guard",
srcs = [
"guard.cpp",
],
headers = [
"guard.h",
"utils.h",
],
visibility = ["PUBLIC"],
deps = [
"//executorch/runtime/platform:platform",
],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
)

runtime.cxx_library(
name = "cuda_platform",
srcs = [
Expand Down Expand Up @@ -71,14 +49,12 @@ runtime.cxx_library(
runtime.cxx_library(
name = "runtime_shims",
srcs = [
"guard.cpp",
"shims/cuda_guard.cpp",
"shims/int4mm.cu",
"shims/memory.cpp",
"shims/tensor_attribute.cpp",
],
headers = [
"guard.h",
"shims/cuda_guard.h",
"shims/int4mm.cuh",
"shims/int4mm.h",
Expand All @@ -91,43 +67,18 @@ runtime.cxx_library(
supports_python_dlopen = True,
# Constructor needed for backend registration.
compiler_flags = ["-Wno-global-constructors"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
visibility = ["PUBLIC"],
deps = [
":tensor_maker",
"//executorch/backends/aoti:common_shims",
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/platform:platform",
"//executorch/backends/cuda/runtime:cuda_platform",
],
nvcc_flags = get_nvcc_arch_args() + [
"-_NVCC_HOST_COMPILER_FLAG_",
"gcc",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
)

runtime.cxx_library(
name = "runtime_shims_slim",
srcs = [
"shims/memory_slim.cpp",
],
headers = [
"shims/memory_slim.h",
],
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
supports_python_dlopen = True,
visibility = ["@EXECUTORCH_CLIENTS"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
deps = [
"//executorch/backends/aoti:aoti_common_slim",
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
"//executorch/backends/aoti/slim/factory:from_blob",
"//executorch/backends/aoti:common_shims",
"//executorch/backends/aoti/slim/cuda:guard",
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/platform:platform",
],
nvcc_flags = get_nvcc_arch_args() + [
Expand All @@ -149,10 +100,16 @@ runtime.cxx_library(
supports_python_dlopen = True,
# Constructor needed for backend registration.
compiler_flags = ["-Wno-global-constructors"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
visibility = ["PUBLIC"],
deps = [
":runtime_shims",
"//executorch/backends/aoti:aoti_common",
"//executorch/backends/aoti:aoti_common_slim",
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
"//executorch/backends/aoti/slim/factory:from_blob",
"//executorch/backends/aoti/slim/factory:from_etensor",
"//executorch/extension/tensor:tensor",
"//executorch/runtime/backend:interface",
"//executorch/runtime/core/exec_aten/util:tensor_util",
],
Expand Down
Loading
Loading