Skip to content

Commit 48a0070

Browse files
committed
ch8 makefile
1 parent 6d77340 commit 48a0070

File tree

3 files changed

+118
-2
lines changed

3 files changed

+118
-2
lines changed

chapter_08/Makefile

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
CC = nvcc
2+
OBJDIR = ./obj
3+
4+
DEPSNAMES = \
5+
nvixnu__array_utils \
6+
nvixnu__populate_arrays_utils \
7+
nvixnu__error_utils \
8+
pmpp__prefix_sum \
9+
nvixnu__cuda_devices_props
10+
11+
CFLAGS = -g -G --compiler-options -Wall -lopencv_imgcodecs -lopencv_core
12+
13+
INCLUDES = $(patsubst %,-I ../../%, $(DEPSNAMES))
14+
15+
# List with all .cu files inside $(REPOSDIR)/<repoName>
16+
CUFILES = $(foreach dep,$(DEPSNAMES), $(wildcard ../../$(dep)/*.cu))
17+
18+
# List with all .o paths
19+
OBJS = $(patsubst %.cu,%.o,$(CUFILES))
20+
21+
# Compiled objects path
22+
COMPILEDOBJS := $(patsubst %,$(OBJDIR)/%,$(notdir $(OBJS)))
23+
24+
# Creates the obj dir, compiles each dependency and then the main app
25+
all: objdir $(OBJS)
26+
nvcc ch8__full_prefix_sum.cu -o full_prefix_sum.out $(COMPILEDOBJS) $(CFLAGS) $(INCLUDES)
27+
nvcc ch8__partial_prefix_sum.cu -o partial_prefix_sum.out $(COMPILEDOBJS) $(CFLAGS) $(INCLUDES)
28+
29+
# Creates the ./obj dir
30+
objdir:
31+
mkdir -p $(OBJDIR)
32+
33+
# Compile a dependency
34+
%.o: %.cu
35+
nvcc -c $< -o $(OBJDIR)/$(notdir $@) $(CFLAGS)
36+
37+
# Run the executable
38+
run:
39+
./main
40+
41+
# Remove the generated artifacts
42+
clean:
43+
rm -Rf $(OBJDIR)/*.o
44+
rm -Rf 1d_convolution.out
45+
rm -Rf partial_prefix_sum.out
46+
47+
.PHONY: all clean app run objdir

chapter_08/ch8__full_prefix_sum.cu

+36-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "nvixnu__populate_arrays_utils.h"
1717
#include "nvixnu__error_utils.h"
1818
#include "pmpp__prefix_sum.h"
19-
19+
#include "nvixnu__cuda_devices_props.h"
2020

2121

2222
__global__
@@ -140,3 +140,38 @@ void ch8__full_prefix_sum(env_e env, kernel_config_t config){
140140
return;
141141
}
142142

143+
144+
int main(){
145+
//Gets the max length of shared memory to use as SECTION_SIZE of the 3-phase algorithm
146+
cudaDeviceProp device_props = nvixnu__get_cuda_device_props(0);
147+
const int memory_bound_section_size = device_props.sharedMemPerBlock;
148+
const int memory_bound_section_length = memory_bound_section_size/sizeof(double);
149+
const int thread_bound_section_length = device_props.maxThreadsDim[0];
150+
151+
printf("Chapter 08\n");
152+
printf("Array with %d Elements\n", CH8__ARRAY_LENGTH);
153+
154+
printf("\n_____ full_prefix_sum [Hierarchical Three phase Kogge-Stone] _____\n\n");
155+
156+
printf("\nRunning on Device with %d threads per block and section length equals to %d...", thread_bound_section_length, memory_bound_section_length);
157+
ch8__full_prefix_sum(Device, {
158+
.block_dim = {thread_bound_section_length,1,1},
159+
.kernel_version = CH8__HIERARCHICAL_PREFIX_SUM_3_PHASE_KOGGE_STONE,
160+
.shared_memory_size = memory_bound_section_size
161+
});
162+
163+
printf("\n_____ full_prefix_sum [Single-pass Kogge-Stone] _____\n");
164+
165+
printf("\nRunning on Device with %d threads per block", thread_bound_section_length);
166+
ch8__full_prefix_sum(Device, {
167+
.block_dim = {thread_bound_section_length,1,1},
168+
.kernel_version = CH8__SINGLE_PASS_PREFIX_SUM_KOGGE_STONE,
169+
.shared_memory_size = thread_bound_section_length*sizeof(double)
170+
});
171+
172+
printf("\n_____ full_prefix_sum_CPU _____\n");
173+
ch8__full_prefix_sum(Host, {});
174+
175+
return 0;
176+
}
177+

chapter_08/ch8__partial_prefix_sum.cu

+35-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "nvixnu__populate_arrays_utils.h"
1717
#include "nvixnu__error_utils.h"
1818
#include "pmpp__prefix_sum.h"
19-
19+
#include "nvixnu__cuda_devices_props.h"
2020

2121
/**
2222
* This partial (or sectioned) host version is only for comparison purpose with the partial scan kernels
@@ -99,3 +99,37 @@ void ch8__partial_prefix_sum(env_e env, kernel_config_t config, const int sectio
9999

100100
return;
101101
}
102+
103+
int main(){
104+
//Gets the max length of shared memory to use as SECTION_SIZE of the 3-phase algorithm
105+
cudaDeviceProp device_props = nvixnu__get_cuda_device_props(0);
106+
const int memory_bound_section_size = device_props.sharedMemPerBlock;
107+
const int memory_bound_section_length = memory_bound_section_size/sizeof(double);
108+
const int thread_bound_section_length = device_props.maxThreadsDim[0];
109+
110+
printf("Chapter 08\n");
111+
printf("Array with %d Elements\n", CH8__ARRAY_LENGTH);
112+
113+
printf("\n_____ partial_prefix_sum [Kogge-Stone] _____\n\n");
114+
115+
printf("\nRunning on Device with %d threads per block...", thread_bound_section_length);
116+
ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_KOGGE_STONE}, 0);
117+
118+
printf("\n_____ partial_prefix_sum [Brent-Kung] _____\n");
119+
120+
printf("\nRunning on Device with %d threads per block...", thread_bound_section_length);
121+
ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_BRENT_KUNG}, 0);
122+
123+
printf("\n_____ partial_prefix_sum_CPU [For Kogge-Stone/Brent-Kung comparison] _____\n");
124+
ch8__partial_prefix_sum(Host, {}, thread_bound_section_length);
125+
126+
printf("\n_____ partial_prefix_sum [Three phase Kogge-Stone] _____\n");
127+
128+
printf("\nRunning on Device with %d threads per block and section length equals to %d...", thread_bound_section_length, memory_bound_section_length);
129+
ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_3_PHASE_KOGGE_STONE, .shared_memory_size = memory_bound_section_size}, 0);
130+
131+
printf("\n_____ partial_prefix_sum_CPU [For Three phase Kogge-Stone comparison] _____\n");
132+
ch8__partial_prefix_sum(Host, {}, memory_bound_section_length);
133+
134+
return 0;
135+
}

0 commit comments

Comments
 (0)