Skip to content

Commit 1787210

Browse files
authored
Merge pull request #738 from ademeure/faster_compile
Improve compile time (simple makefile changes)
2 parents 6e6a528 + fc88344 commit 1787210

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

Makefile

+6-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@ REMOVE_FILES = rm -f
1111
OUTPUT_FILE = -o $@
1212
CUDA_OUTPUT_FILE = -o $@
1313

14+
# Default O3 CPU optimization level for NVCC (0 for fastest compile time)
15+
FORCE_NVCC_O ?= 3
16+
1417
# NVCC flags
1518
# -t=0 is short for --threads, 0 = number of CPUs on the machine
16-
NVCC_FLAGS = -O3 -t=0 --use_fast_math -std=c++17
19+
NVCC_FLAGS = --threads=0 -t=0 --use_fast_math -std=c++17 -O$(FORCE_NVCC_O)
1720
NVCC_LDFLAGS = -lcublas -lcublasLt
1821
NVCC_INCLUDES =
1922
NVCC_LDLIBS =
@@ -45,8 +48,8 @@ endif
4548

4649
ifneq ($(CI),true) # if not in CI, then use the GPU query
4750
ifndef GPU_COMPUTE_CAPABILITY # set to defaults if: make GPU_COMPUTE_CAPABILITY=
48-
ifneq ($(call file_exists_in_path, __nvcc_device_query),)
49-
GPU_COMPUTE_CAPABILITY = $(shell __nvcc_device_query)
51+
ifneq ($(call file_exists_in_path, nvidia-smi),)
52+
GPU_COMPUTE_CAPABILITY = $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')
5053
GPU_COMPUTE_CAPABILITY := $(strip $(GPU_COMPUTE_CAPABILITY))
5154
endif
5255
endif

0 commit comments

Comments
 (0)