|
16 | 16 | #include "nvixnu__populate_arrays_utils.h"
|
17 | 17 | #include "nvixnu__error_utils.h"
|
18 | 18 | #include "pmpp__prefix_sum.h"
|
19 |
| - |
| 19 | +#include "nvixnu__cuda_devices_props.h" |
20 | 20 |
|
21 | 21 | /**
|
22 | 22 | * This partial (or sectioned) host version is only for comparison purpose with the partial scan kernels
|
@@ -99,3 +99,37 @@ void ch8__partial_prefix_sum(env_e env, kernel_config_t config, const int sectio
|
99 | 99 |
|
100 | 100 | return;
|
101 | 101 | }
|
| 102 | + |
| 103 | +int main(){ |
| 104 | + //Gets the max length of shared memory to use as SECTION_SIZE of the 3-phase algorithm |
| 105 | + cudaDeviceProp device_props = nvixnu__get_cuda_device_props(0); |
| 106 | + const int memory_bound_section_size = device_props.sharedMemPerBlock; |
| 107 | + const int memory_bound_section_length = memory_bound_section_size/sizeof(double); |
| 108 | + const int thread_bound_section_length = device_props.maxThreadsDim[0]; |
| 109 | + |
| 110 | + printf("Chapter 08\n"); |
| 111 | + printf("Array with %d Elements\n", CH8__ARRAY_LENGTH); |
| 112 | + |
| 113 | + printf("\n_____ partial_prefix_sum [Kogge-Stone] _____\n\n"); |
| 114 | + |
| 115 | + printf("\nRunning on Device with %d threads per block...", thread_bound_section_length); |
| 116 | + ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_KOGGE_STONE}, 0); |
| 117 | + |
| 118 | + printf("\n_____ partial_prefix_sum [Brent-Kung] _____\n"); |
| 119 | + |
| 120 | + printf("\nRunning on Device with %d threads per block...", thread_bound_section_length); |
| 121 | + ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_BRENT_KUNG}, 0); |
| 122 | + |
| 123 | + printf("\n_____ partial_prefix_sum_CPU [For Kogge-Stone/Brent-Kung comparison] _____\n"); |
| 124 | + ch8__partial_prefix_sum(Host, {}, thread_bound_section_length); |
| 125 | + |
| 126 | + printf("\n_____ partial_prefix_sum [Three phase Kogge-Stone] _____\n"); |
| 127 | + |
| 128 | + printf("\nRunning on Device with %d threads per block and section length equals to %d...", thread_bound_section_length, memory_bound_section_length); |
| 129 | + ch8__partial_prefix_sum(Device, {.block_dim = {thread_bound_section_length, 1, 1}, .kernel_version = CH8__PREFIX_SUM_3_PHASE_KOGGE_STONE, .shared_memory_size = memory_bound_section_size}, 0); |
| 130 | + |
| 131 | + printf("\n_____ partial_prefix_sum_CPU [For Three phase Kogge-Stone comparison] _____\n"); |
| 132 | + ch8__partial_prefix_sum(Host, {}, memory_bound_section_length); |
| 133 | + |
| 134 | + return 0; |
| 135 | +} |
0 commit comments