-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMakefile_cuda
81 lines (73 loc) · 2.62 KB
/
Makefile_cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
ifeq (x$(CUDA_PATH),x)
CUDA_PATH=/usr/local/cuda
endif
NVCC=$(CUDA_PATH)/bin/nvcc
NVCCOPT=\
-O3 \
--compiler-options -fno-strict-aliasing \
--compiler-options -fopenmp \
--compiler-options -Wall \
-DUNIX \
--ptxas-options=-v \
--generate-code arch=compute_60,code=sm_60 \
--generate-code arch=compute_61,code=sm_61 \
--generate-code arch=compute_62,code=sm_62 \
--generate-code arch=compute_70,code=sm_70 \
--generate-code arch=compute_75,code=sm_75 \
--generate-code arch=compute_80,code=sm_80 \
--generate-code arch=compute_86,code=sm_86 \
--generate-code arch=compute_89,code=sm_89 \
--generate-code arch=compute_90,code=sm_90
ifneq (x$(shell nvcc --version 2>&1|grep "release 12\.8"),x)
NVCCOPT:= $(NVCCOPT) \
--generate-code arch=compute_100,code=sm_100 \
--generate-code arch=compute_100,code=compute_100
else
NVCCOPT:= $(NVCCOPT) \
--generate-code arch=compute_90,code=compute_90
endif
NVCCOPT:= $(NVCCOPT) \
--extra-device-vectorization \
--restrict \
--ptxas-options='--allow-expensive-optimizations true' \
--ptxas-options='--register-usage-level 0' \
--maxrregcount=127 \
--dopt=on \
--no-compress \
--prec-div=true --prec-sqrt=true --fmad=true
NVCCOPTmin=\
-O3 \
--compiler-options -fno-strict-aliasing \
--compiler-options -fopenmp \
--compiler-options -Wall \
-DUNIX \
--ptxas-options=-v \
--generate-code arch=compute_80,code=sm_80 \
--extra-device-vectorization \
--restrict \
--ptxas-options='--allow-expensive-optimizations true' \
--ptxas-options='--register-usage-level 0' \
--maxrregcount=127 \
--dopt=on \
--no-compress \
--prec-div=true --prec-sqrt=true --fmad=true
OBJS = main.o eigen_GPU_check.o
OBJSS = $(OBJS) eigen_GPU_batch.o
LIBS = libeigenGbatch.a
LIBOPT = -leigenGbatch
all: a.out $(LIBS)
a.out : $(OBJSS) $(LIBS)
$(NVCC) -o $@ $(OBJSS) -L./ -L$(CUDA_PATH)/lib64 -lcuda -lcudart -lcusolver -lcublas -lm -lgomp
cp a.out a.out-cuda
main.o: main.cpp
$(NVCC) -c -o $@ $< -I$(CUDA_PATH)/include -DPRINT_DIAGNOSTIC=0 --compiler-options -fopenmp
libeigenGbatch.a: eigen_GPU_batch.o
ar cr libeigenGbatch.a $<
ranlib libeigenGbatch.a
eigen_GPU_batch.o: eigen_GPU_batch.cu
$(NVCC) -c -o $@ $(NVCCOPT) $<
$(NVCC) --ptx $(NVCCOPTmin) $<
eigen_GPU_check.o: eigen_GPU_check.cu
$(NVCC) -c -o $@ $(NVCCOPT) $<
clean:
-\rm a.out a.out-* *.o *.cu_o *.ptx lib*.a