-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvectorAdd2.cu
More file actions
61 lines (46 loc) · 1.32 KB
/
vectorAdd2.cu
File metadata and controls
61 lines (46 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include<stdio.h>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#define SIZE 2048
__global__ void vectorAdd(int *A, int* B, int *C, int n) {
int i = threadIdx.x + blockDim.x * blockIdx.x;
C[i] = A[i] + B[i];
}
int main() {
int *A, *B, *C;
int *d_A, *d_B, *d_C;
int size = SIZE * sizeof(int);
A = (int*) malloc(size);
B = (int*) malloc(size);
C = (int*)malloc(size);
cudaMalloc((void**)&d_A, size);
cudaMalloc((void**)&d_B, size);
cudaMalloc((void**)&d_C, size);
for(int i = 0; i < SIZE; ++i) {
A[i] = i;
B[i] = SIZE-i;
}
cudaMemcpy(d_A, A, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, size, cudaMemcpyHostToDevice);
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
vectorAdd<<<2, 32>>> (d_A, d_B, d_C, SIZE);
cudaEventRecord(stop);
cudaMemcpy(C, d_C, size, cudaMemcpyDeviceToHost);
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
printf("execution time: %f millis\n", milliseconds);
// for(int i = 0; i < SIZE; ++i) {
// printf("%d %d %d\n", A[i], B[i], C[i]);
// }
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
free(A);
free(B);
free(C);
return 0;
}