Skip to content

Commit a544665

Browse files
committed
Add workflow to compile for HIP
1 parent 65939a6 commit a544665

File tree

7 files changed

+98
-20
lines changed

7 files changed

+98
-20
lines changed

.github/workflows/cmake-run-hip.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: CMake
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
rocm-version:
7+
required: true
8+
type: string
9+
gpu-arch:
10+
required: false
11+
type: string
12+
default: "gfx942"
13+
14+
env:
15+
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
16+
BUILD_TYPE: Debug
17+
18+
jobs:
19+
build:
20+
# The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
21+
# You can convert this to a matrix build if you need cross-platform coverage.
22+
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
23+
runs-on: ubuntu-latest
24+
25+
steps:
26+
- uses: loostrum/[email protected]
27+
with:
28+
version: ${{ inputs.rocm-version }}
29+
30+
- uses: actions/checkout@v3
31+
with:
32+
submodules: 'true'
33+
34+
- name: Configure CMake
35+
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
36+
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
37+
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DKERNEL_FLOAT_BUILD_TEST=1 -DKERNEL_FLOAT_BUILD_EXAMPLE=1 -DKERNEL_FLOAT_LANGUAGE=HIP -DCMAKE_HIP_ARCHITECTURES=${{ inputs.gpu-arch }}
38+
39+
- name: Build
40+
# Build your program with the given configuration
41+
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
42+
43+
- name: Test
44+
working-directory: ${{github.workspace}}/build
45+
# Execute tests defined by the CMake configuration.
46+
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
47+
run: ./tests/kernel_float_tests --durations=yes --success --verbosity=high ~[GPU]
48+

.github/workflows/cmake.yml

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,49 @@ env:
1010
BUILD_TYPE: Debug
1111

1212
jobs:
13-
build-cuda:
14-
uses: ./.github/workflows/cmake-action.yml
13+
#build-cuda:
14+
# uses: ./.github/workflows/cmake-run-cuda.yml
15+
# with:
16+
# cuda-version: "12.8.0"
17+
#
18+
#build-cuda-13-0:
19+
# needs: build-cuda
20+
# uses: ./.github/workflows/cmake-run-cuda.yml
21+
# with:
22+
# cuda-version: "13.0.0"
23+
#
24+
#build-cuda-12-6:
25+
# needs: build-cuda
26+
# uses: ./.github/workflows/cmake-run-cuda.yml
27+
# with:
28+
# cuda-version: "12.6.0"
29+
30+
#build-cuda-12-5:
31+
# needs: build-cuda
32+
# uses: ./.github/workflows/cmake-run-cuda.yml
33+
# with:
34+
# cuda-version: "12.5.0"
35+
36+
build-hip:
37+
#needs: build-cuda # Only attempt HIP after CUDA was succesfull
38+
uses: ./.github/workflows/cmake-run-hip.yml
39+
with:
40+
rocm-version: "6.3.0"
41+
42+
build-hip-6-1:
43+
needs: build-hip
44+
uses: ./.github/workflows/cmake-run-hip.yml
1545
with:
16-
cuda-version: "12.8.0"
46+
rocm-version: "6.1.0"
1747

18-
build-cuda-12-6:
19-
needs: build-cuda
20-
uses: ./.github/workflows/cmake-action.yml
48+
build-hip-6-2:
49+
needs: build-hip
50+
uses: ./.github/workflows/cmake-run-hip.yml
2151
with:
22-
cuda-version: "12.6.0"
52+
rocm-version: "6.2.0"
2353

24-
build-cuda-12-5:
25-
needs: build-cuda
26-
uses: ./.github/workflows/cmake-action.yml
54+
build-hip-6-4:
55+
needs: build-hip
56+
uses: ./.github/workflows/cmake-run-hip.yml
2757
with:
28-
cuda-version: "12.5.0"
58+
rocm-version: "6.4.0"

include/kernel_float/bf16.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,12 @@ KERNEL_FLOAT_BF16_UNARY_FUN(negate, ::__hneg, ::__hneg2)
107107
#elif KERNEL_FLOAT_IS_HIP
108108
KERNEL_FLOAT_INLINE __hip_bfloat16 hip_habs(const __hip_bfloat16 a) {
109109
unsigned short int res = __bfloat16_as_ushort(a);
110-
res &= 0x7FFF;
111-
return __ushort_as_bfloat16();
110+
return __ushort_as_bfloat16(res & 0x7FFF);
112111
}
113112

114113
KERNEL_FLOAT_INLINE __hip_bfloat16 hip_hneg(const __hip_bfloat16 a) {
115114
unsigned short int res = __bfloat16_as_ushort(a);
116-
res ^= 0x8000;
117-
return __ushort_as_bfloat16(res);
115+
return __ushort_as_bfloat16(res ^ 0x8000);
118116
}
119117

120118
KERNEL_FLOAT_INLINE __hip_bfloat162 hip_habs2(const __hip_bfloat162 a) {

include/kernel_float/macros.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#endif // __CUDA_ARCH__
2020
#elif defined(__HIPCC__)
2121
#define KERNEL_FLOAT_IS_HIP (1)
22-
#define KERNEL_FLOAT_DEVICE __attribute__((always_inline)) __device__
23-
#define KERNEL_FLOAT_INLINE __attribute__((always_inline)) __host__ __device__
22+
#define KERNEL_FLOAT_DEVICE __attribute__((always_inline)) inline __device__
23+
#define KERNEL_FLOAT_INLINE __attribute__((always_inline)) inline __host__ __device__
2424

2525
#ifdef __HIP_DEVICE_COMPILE__
2626
#define KERNEL_FLOAT_IS_DEVICE (1)

tests/binops.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ struct minmax_tests {
100100
kf::vec<T, N> a = {x[I]...};
101101
kf::vec<T, N> b = {y[I]...};
102102

103-
kf::vec<T, N> lo = min(a, b);
104-
kf::vec<T, N> hi = max(a, b);
103+
kf::vec<T, N> lo = kernel_float::min(a, b);
104+
kf::vec<T, N> hi = kernel_float::max(a, b);
105105

106106
if constexpr (is_one_of<T, double>) {
107107
ASSERT(equals(fmin(a[I], b[I]), lo[I]) && ...);
@@ -130,7 +130,7 @@ struct cross_test {
130130
__host__ __device__ void operator()(generator<T> gen) {
131131
kf::vec<T, 3> a = {T(1.0), T(2.0), T(3.0)};
132132
kf::vec<T, 3> b = {T(4.0), T(5.0), T(6.0)};
133-
kf::vec<T, 3> c = cross(a, b);
133+
kf::vec<T, 3> c = kernel_float::cross(a, b);
134134

135135
ASSERT(c[0] == T(-3.0));
136136
ASSERT(c[1] == T(6.0));

tests/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#pragma once
22

33
#include <cstdint>
4+
#include <cmath>
5+
#include <tgmath.h>
46

57
#include "catch2/catch_all.hpp"
68
#include "kernel_float.h"

0 commit comments

Comments
 (0)