Skip to content

[fbgemm_gpu] Enable ROCm builds for GenAI, pt 1 #3910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 75 additions & 33 deletions fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,67 @@ include(${CMAKEMODULES}/Utilities.cmake)
set(CMAKE_VERBOSE_MAKEFILE ON)

################################################################################
# FBGEMM_GPU Build Options
# Set Build Target
################################################################################

set(BUILD_TARGET_DEFAULT "default")
set(BUILD_TARGET_GENAI "genai")
set(BUILD_TARGET_VALUES "${BUILD_TARGET_DEFAULT};${BUILD_TARGET_GENAI}")

if(NOT DEFINED FBGEMM_BUILD_TARGET)
set(FBGEMM_BUILD_TARGET "${BUILD_TARGET_DEFAULT}")
elseif(NOT FBGEMM_BUILD_TARGET IN_LIST BUILD_TARGET_VALUES)
message(FATAL_ERROR
"Invalid FBGEMM_BUILD_TARGET value: ${FBGEMM_BUILD_TARGET}.
Allowed values: ${BUILD_TARGET_VALUES}")
endif()

################################################################################
# Set Build Variant
################################################################################

set(BUILD_VARIANT_CPU "cpu")
set(BUILD_VARIANT_CUDA "cuda")
set(BUILD_VARIANT_ROCM "rocm")
set(BUILD_VARIANT_VALUES
"${BUILD_VARIANT_CPU};${BUILD_VARIANT_CUDA};${BUILD_VARIANT_ROCM}")

option(FBGEMM_CPU_ONLY "Build FBGEMM_GPU without GPU support" OFF)
option(USE_ROCM "Build FBGEMM_GPU for ROCm" OFF)
option(FBGEMM_GENAI_ONLY "Build FBGEMM_GPU with GEN AI only support" OFF)
option(USE_FB_ONLY "Build FBGEMM_GPU FB-only operators" OFF)

if((NOT FBGEMM_CPU_ONLY) AND
((EXISTS "/opt/rocm/") OR (EXISTS $ENV{ROCM_PATH})) AND
(NOT EXISTS "/bin/nvcc"))
message(
"CMake has been set to build a non-CPU variant"
"and AMD GPU has been detected; "
"will default to ROCm build"
)
set(USE_ROCM ON)
endif()

if(FBGEMM_CPU_ONLY)
BLOCK_PRINT("Building the CPU-only variant of FBGEMM-GPU")
elseif(USE_ROCM)
BLOCK_PRINT("Building the ROCm variant of FBGEMM-GPU")
if (DEFINED FBGEMM_BUILD_VARIANT)
# If FBGEMM_BUILD_VARIANT is set, validate it
if(NOT FBGEMM_BUILD_VARIANT IN_LIST BUILD_VARIANT_VALUES)
message(FATAL_ERROR
"Invalid FBGEMM_BUILD_VARIANT value: ${FBGEMM_BUILD_VARIANT}.
Allowed values: ${BUILD_VARIANT_VALUES}")
endif()

else()
BLOCK_PRINT("Building the CUDA variant of FBGEMM-GPU")
endif()
# Else fall back to looking at FBGEMM_CPU_ONLY and filesystem paths to see if
# the build variant should be set to CPU, CUDA. or ROCM (legacy)
if(FBGEMM_CPU_ONLY)
set(FBGEMM_BUILD_VARIANT "${BUILD_VARIANT_CPU}")

elseif(((EXISTS "/opt/rocm/") OR (EXISTS $ENV{ROCM_PATH})) AND
(NOT EXISTS "/bin/nvcc"))
message(
"CMake has been set to build a non-CPU variant"
"and AMD GPU has been detected; "
"will default to ROCm build"
)
set(FBGEMM_BUILD_VARIANT "${BUILD_VARIANT_ROCM}")

# Set USE_ROCM (legacy)
# NOTE: Should be removed once other CMake scripts have migrated over to
# FBGEMM_BUILD_VARIANT
set(USE_ROCM ON)

else()
set(FBGEMM_BUILD_VARIANT "${BUILD_VARIANT_CUDA}")

endif()
endif()

################################################################################
# FBGEMM_GPU Build Kickstart
Expand All @@ -62,6 +96,9 @@ endif()
BLOCK_PRINT(
"Build Settings"
""
"FBGEMM_BUILD_TARGET : ${FBGEMM_BUILD_TARGET}"
"FBGEMM_BUILD_VARIANT : ${FBGEMM_BUILD_VARIANT}"
""
"NVCC_VERBOSE : ${NVCC_VERBOSE}"
"CUDNN_INCLUDE_DIR : ${CUDNN_INCLUDE_DIR}"
"CUDNN_LIBRARY : ${CUDNN_LIBRARY}"
Expand All @@ -73,18 +110,17 @@ BLOCK_PRINT(
"AMDGPU_TARGETS : ${AMDGPU_TARGETS}"
"PYTORCH_ROCM_ARCH : ${PYTORCH_ROCM_ARCH}")

if(FBGEMM_CPU_ONLY OR USE_ROCM)
project(
fbgemm_gpu
VERSION 0.8.0
LANGUAGES CXX C)
else()
project(
fbgemm_gpu
VERSION 0.8.0
LANGUAGES CXX C CUDA)
set(project_languages CXX C)
if(FBGEMM_BUILD_VARIANT STREQUAL BUILD_VARIANT_CUDA)
list(APPEND project_languages CUDA)
endif()

# Declare CMake project
project(
fbgemm_gpu
VERSION 1.2.0
LANGUAGES ${project_languages})

# AVX Flags Setup - must be set AFTER project declaration
include(${CMAKEMODULES}/FindAVX.cmake)

Expand Down Expand Up @@ -119,6 +155,7 @@ set(fbgemm_sources_include_directories
${THIRDPARTY}/cpuinfo/include
${THIRDPARTY}/cutlass/include
${THIRDPARTY}/cutlass/tools/util/include
${THIRDPARTY}/composable_kernel/include
${THIRDPARTY}/json/include
${NCCL_INCLUDE_DIRS})

Expand Down Expand Up @@ -240,7 +277,13 @@ endfunction()
# Build Targets
################################################################################

if(FBGEMM_GENAI_ONLY)
if(FBGEMM_BUILD_TARGET STREQUAL BUILD_TARGET_GENAI)
if(FBGEMM_BUILD_VARIANT STREQUAL BUILD_VARIANT_CPU)
message(FATAL_ERROR
"Unsupported (target, variant) combination:
(${FBGEMM_BUILD_TARGET}, ${FBGEMM_BUILD_VARIANT})")
endif()

# Build FBGEMM GenAI
add_subdirectory(experimental/gen_ai)

Expand All @@ -250,8 +293,7 @@ if(FBGEMM_GENAI_ONLY)
# Add Triton GEMM (GenAI) kernels if non-CPU build
add_subdirectory(experimental/gemm)

else()
elseif(FBGEMM_BUILD_TARGET STREQUAL BUILD_TARGET_DEFAULT)
# Build FBGEMM_GPU
include(FbgemmGpu.cmake)

endif()
58 changes: 35 additions & 23 deletions fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,43 @@
# Target Sources
################################################################################

file(GLOB experimental_gen_ai_cpp_source_files_cpu
src/quantize/*.cpp)

file(GLOB experimental_gen_ai_cpp_source_files_gpu
# Attention Ops
src/attention/*.cpp
src/attention/*.cu
# Coalesce Ops
src/coalesce/*.cpp
src/coalesce/*.cu
# Comm Ops
src/comm/*.cpp
src/comm/*.cu
# Gather Scatter Ops
src/gather_scatter/*.cpp
src/gather_scatter/*.cu
# Quantize Ops
src/kv_cache/*.cpp
src/kv_cache/*.cu
# Quantize Ops
src/quantize/*.cpp
src/quantize/*.cu)

if(FBGEMM_BUILD_VARIANT STREQUAL BUILD_VARIANT_CUDA)
file(GLOB tmp_list_cpu
src/attention/*.cpp
src/coalesce/*.cpp
src/comm/*.cpp
src/gather_scatter/*.cpp
src/kv_cache/*.cpp)

file(GLOB tmp_list_gpu
src/attention/*.cu
src/coalesce/*.cu
src/comm/*.cu
src/gather_scatter/*.cu
src/kv_cache/*.cu)

list(APPEND experimental_gen_ai_cpp_source_files_cpu ${tmp_list_cpu})
list(APPEND experimental_gen_ai_cpp_source_files_gpu ${tmp_list_gpu})
endif()

# Set the source file for FB only CPP
if(USE_FB_ONLY)
file(GLOB fb_only_cxx_sources
fb/src/moe/*.cu
if(USE_FB_ONLY AND (FBGEMM_BUILD_VARIANT STREQUAL BUILD_VARIANT_CUDA))
file(GLOB fb_only_sources_cpu
fb/src/moe/*.cpp
fb/src/trt_llm/*.cu
fb/src/trt_llm/*.cpp)
list(APPEND experimental_gen_ai_cpp_source_files_gpu ${fb_only_cxx_sources})

file(GLOB fb_only_sources_gpu
fb/src/moe/*.cu
fb/src/trt_llm/*.cu)

list(APPEND experimental_gen_ai_cpp_source_files_cpu ${fb_only_sources_cpu})
list(APPEND experimental_gen_ai_cpp_source_files_gpu ${fb_only_sources_gpu})
endif()

# CUDA-specific sources
Expand All @@ -57,7 +65,9 @@ file(GLOB_RECURSE experimental_gen_ai_cpp_source_files_hip
# Python sources
file(GLOB_RECURSE experimental_gen_ai_python_source_files
bench/*.py
gen_ai/*.py)
bench/**/*.py
gen_ai/*.py
gen_ai/**/*.py)


################################################################################
Expand All @@ -72,6 +82,8 @@ gpu_cpp_library(
INCLUDE_DIRS
${fbgemm_sources_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/src/quantize
CPU_SRCS
${experimental_gen_ai_cpp_source_files_cpu}
GPU_SRCS
${experimental_gen_ai_cpp_source_files_gpu}
CUDA_SPECIFIC_SRCS
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def _get_cxx11_abi():

if self.args.package_variant == "genai":
print("[SETUP.PY] Building the GENAI-ONLY variant of FBGEMM_GPU ...")
cmake_args.append("-DFBGEMM_GENAI_ONLY=ON")
cmake_args.append("-DFBGEMM_BUILD_TARGET=genai")

if self.args.nvml_lib_path:
cmake_args.append(f"-DNVML_LIB_PATH={self.args.nvml_lib_path}")
Expand Down
Loading