Skip to content

Commit 2730789

Browse files
committed
vector_ptr now requires alignment in bytes instead of elements
1 parent f94bd10 commit 2730789

File tree

4 files changed

+177
-106
lines changed

4 files changed

+177
-106
lines changed

example.cu

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include "kernel_float.h"
2+
#include <cuda_fp16.h>
3+
4+
namespace kf = kernel_float;
5+
6+
__global__ void kernel(
7+
kf::vec_ptr<half, 4, const __nv_fp8_e5m2> input,
8+
float constant,
9+
kf::vec_ptr<half, 4> output
10+
) {
11+
int i = blockIdx.x * blockDim.x + threadIdx.x;
12+
output(i) = input[i] + kf::cast<half>(constant);
13+
}

examples/vector_add/main.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ __global__ void my_kernel(
2222
int i = blockIdx.x * blockDim.x + threadIdx.x;
2323

2424
if (i * N < length) {
25-
output(i) = kf::fma(input[i], input[i], kf::cast<half>(constant));
25+
output[i] = kf::fma(input[i], input[i], kf::cast<half>(constant));
2626
}
2727
}
2828

0 commit comments

Comments
 (0)