-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ROCm] add support for ROCm/HIP device #6086
base: master
Are you sure you want to change the base?
Changes from all commits
01ff268
ac966a5
6ae6432
5b87bcd
3ad89f8
7b8b6a0
62aa30b
bb27c55
58ace9c
0bc1cfb
a7c9653
96e3a52
cb7623a
9ba27bb
5ba59b8
c0abd17
e7129a0
eb0036f
dbd972e
4cd0dea
3ad2482
8f6600e
47fc353
2e8869c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -463,3 +463,6 @@ dask-worker-space/ | |
*.pub | ||
*.rdp | ||
*_rsa | ||
|
||
# hipify-perl -inplace leaves behind *.prehip files | ||
*.prehip |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -4,6 +4,7 @@ option(USE_GPU "Enable GPU-accelerated training" OFF) | |||||
option(USE_SWIG "Enable SWIG to generate Java API" OFF) | ||||||
option(USE_TIMETAG "Set to ON to output time costs" OFF) | ||||||
option(USE_CUDA "Enable CUDA-accelerated training " OFF) | ||||||
option(USE_ROCM "Enable ROCM-accelerated training " OFF) | ||||||
option(USE_DEBUG "Set to ON for Debug mode" OFF) | ||||||
option(USE_SANITIZER "Use sanitizer flags" OFF) | ||||||
set( | ||||||
|
@@ -160,6 +161,11 @@ if(USE_CUDA) | |||||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE) | ||||||
endif() | ||||||
|
||||||
if(USE_ROCM) | ||||||
enable_language(HIP) | ||||||
set(USE_OPENMP ON CACHE BOOL "ROCM requires OpenMP" FORCE) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
endif() | ||||||
|
||||||
if(USE_OPENMP) | ||||||
if(APPLE) | ||||||
find_package(OpenMP) | ||||||
|
@@ -271,35 +277,53 @@ if(USE_CUDA) | |||||
|
||||||
message(STATUS "ALLFEATS_DEFINES: ${ALLFEATS_DEFINES}") | ||||||
message(STATUS "FULLDATA_DEFINES: ${FULLDATA_DEFINES}") | ||||||
endif() | ||||||
|
||||||
function(add_histogram hsize hname hadd hconst hdir) | ||||||
add_library(histo${hsize}${hname} OBJECT src/treelearner/kernels/histogram${hsize}.cu) | ||||||
set_target_properties( | ||||||
histo${hsize}${hname} | ||||||
PROPERTIES | ||||||
CUDA_SEPARABLE_COMPILATION ON | ||||||
CUDA_ARCHITECTURES ${CUDA_ARCHS} | ||||||
) | ||||||
if(hadd) | ||||||
list(APPEND histograms histo${hsize}${hname}) | ||||||
set(histograms ${histograms} PARENT_SCOPE) | ||||||
endif() | ||||||
target_compile_definitions( | ||||||
histo${hsize}${hname} | ||||||
PRIVATE | ||||||
-DCONST_HESSIAN=${hconst} | ||||||
${hdir} | ||||||
) | ||||||
endfunction() | ||||||
if(USE_ROCM) | ||||||
find_package(HIP) | ||||||
include_directories(${HIP_INCLUDE_DIRS}) | ||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it the same as
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also set For NVIDIA, are they reused from |
||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${OpenMP_CXX_FLAGS} -fPIC -Wall") | ||||||
|
||||||
# avoid warning: unused variable 'mask' due to __shfl_down_sync work-around | ||||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-unused-variable") | ||||||
# avoid warning: 'hipHostAlloc' is deprecated: use hipHostMalloc instead | ||||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-deprecated-declarations") | ||||||
# avoid many warnings about missing overrides | ||||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-inconsistent-missing-override") | ||||||
# avoid warning: shift count >= width of type in feature_histogram.hpp | ||||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-shift-count-overflow") | ||||||
|
||||||
foreach(hsize _16_64_256) | ||||||
add_histogram("${hsize}" "_sp_const" "True" "1" "${BASE_DEFINES}") | ||||||
add_histogram("${hsize}" "_sp" "True" "0" "${BASE_DEFINES}") | ||||||
add_histogram("${hsize}" "-allfeats_sp_const" "False" "1" "${ALLFEATS_DEFINES}") | ||||||
add_histogram("${hsize}" "-allfeats_sp" "False" "0" "${ALLFEATS_DEFINES}") | ||||||
add_histogram("${hsize}" "-fulldata_sp_const" "True" "1" "${FULLDATA_DEFINES}") | ||||||
add_histogram("${hsize}" "-fulldata_sp" "True" "0" "${FULLDATA_DEFINES}") | ||||||
endforeach() | ||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DISABLED_WARNINGS}") | ||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${DISABLED_WARNINGS}") | ||||||
|
||||||
if(USE_DEBUG) | ||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -g -O0") | ||||||
else() | ||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -O3") | ||||||
endif() | ||||||
message(STATUS "CMAKE_HIP_FLAGS: ${CMAKE_HIP_FLAGS}") | ||||||
|
||||||
add_definitions(-DUSE_CUDA) | ||||||
|
||||||
set( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not used. See #6766 (review). |
||||||
BASE_DEFINES | ||||||
-DPOWER_FEATURE_WORKGROUPS=12 | ||||||
-DUSE_CONSTANT_BUF=0 | ||||||
) | ||||||
set( | ||||||
ALLFEATS_DEFINES | ||||||
${BASE_DEFINES} | ||||||
-DENABLE_ALL_FEATURES | ||||||
) | ||||||
set( | ||||||
FULLDATA_DEFINES | ||||||
${ALLFEATS_DEFINES} | ||||||
-DIGNORE_INDICES | ||||||
) | ||||||
|
||||||
message(STATUS "ALLFEATS_DEFINES: ${ALLFEATS_DEFINES}") | ||||||
message(STATUS "FULLDATA_DEFINES: ${FULLDATA_DEFINES}") | ||||||
endif() | ||||||
|
||||||
include(CheckCXXSourceCompiles) | ||||||
|
@@ -634,7 +658,9 @@ if(USE_CUDA) | |||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON | ||||||
) | ||||||
endif() | ||||||
endif() | ||||||
|
||||||
if(USE_ROCM OR USE_CUDA) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not used. See #6766 (review). |
||||||
# histograms are list of object libraries. Linking object library to other | ||||||
# object libraries only gets usage requirements, the linked objects won't be | ||||||
# used. Thus we have to call target_link_libraries on final targets here. | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,20 @@ | ||||||
/*! | ||||||
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. | ||||||
*/ | ||||||
#ifdef USE_CUDA | ||||||
|
||||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__) | ||||||
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask. | ||||||
// Since mask is full 0xffffffff, we can use __shfl_down instead. | ||||||
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset) | ||||||
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset) | ||||||
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch. | ||||||
#define WARPSIZE warpSize | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should
|
||||||
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd | ||||||
#define atomicAdd_block atomicAdd | ||||||
#else | ||||||
// CUDA warpSize is not a constexpr, but always 32 | ||||||
#define WARPSIZE 32 | ||||||
#endif | ||||||
|
||||||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.