Skip to content

Commit ef4c0f9

Browse files
committed
Update example in README
1 parent 15a92ee commit ef4c0f9

File tree

1 file changed

+12
-13
lines changed

1 file changed

+12
-13
lines changed

README.md

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,31 +57,30 @@ Notice how easy it would be to change the precision (for example, `double` to `h
5757
#include "kernel_float.h"
5858
namespace kf = kernel_float;
5959

60-
__global__ void kernel(const kf::vec<half, 2>* input, float constant, kf::vec<float, 2>* output) {
60+
__global__ void kernel(kf::vec_ptr<const half, 2> input, int constant, kf::vec_ptr<float, 2> output) {
6161
int i = blockIdx.x * blockDim.x + threadIdx.x;
62-
output[i] = input[i] + kf::cast<half>(constant);
62+
output[i] += input[i] * constant;
6363
}
6464

6565
```
6666
6767
Here is how the same kernel would look for CUDA without Kernel Float.
6868
6969
```cpp
70-
__global__ void kernel(const __half* input, float constant, float* output) {
70+
__global__ void kernel(const half* input, double constant, float* output) {
7171
int i = blockIdx.x * blockDim.x + threadIdx.x;
7272
__half in0 = input[2 * i + 0];
7373
__half in1 = input[2 * i + 1];
7474
__half2 a = __halves2half2(in0, in1);
75-
float b = float(constant);
76-
__half c = __float2half(b);
77-
__half2 d = __half2half2(c);
78-
__half2 e = __hadd2(a, d);
79-
__half f = __low2half(e);
80-
__half g = __high2half(e);
81-
float out0 = __half2float(f);
82-
float out1 = __half2float(g);
83-
output[2 * i + 0] = out0;
84-
output[2 * i + 1] = out1;
75+
__half b = __int2half_rn(constant);
76+
__half2 c = __half2half2(b);
77+
__half2 d = __hmul2(a, c);
78+
__half e = __low2half(d);
79+
__half f = __high2half(d);
80+
float out0 = __half2float(e);
81+
float out1 = __half2float(f);
82+
output[2 * i + 0] += out0;
83+
output[2 * i + 1] += out1;
8584
}
8685
8786
```

0 commit comments

Comments
 (0)