Skip to content

Commit 3af2b6f

Browse files
committed
Refactor map2DcomputelocallyTo1DGrid function to use block_count variable instead of dev_count
1 parent bfebd49 commit 3af2b6f

File tree

1 file changed

+31
-34
lines changed

1 file changed

+31
-34
lines changed

cuda_rasterizer/rasterizer_impl.cu

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -516,17 +516,44 @@ __global__ void map2DcomputelocallyTo1D(
516516
const bool* compute_locally,
517517
int* compute_locally_1D_2D_map,
518518
dim3 grid,
519-
int* count
519+
int* block_count
520520
) {
521521
int i = blockIdx.x * blockDim.x + threadIdx.x;
522522
if (i < tile_num) {
523523
if (compute_locally[i]) {
524-
int j = atomicAdd(count, 1);
524+
int j = atomicAdd(block_count, 1);
525525
compute_locally_1D_2D_map[j] = i;
526526
}
527527
}
528528
}
529529

530+
dim3 map2DcomputelocallyTo1DGrid(
531+
const int tile_num,
532+
const bool* compute_locally,
533+
int* compute_locally_1D_2D_map,
534+
const dim3 tile_grid,
535+
bool debug
536+
) {
537+
int block_count = 0;
538+
int* block_count_dev;
539+
CHECK_CUDA(cudaMalloc(&block_count_dev, sizeof(int)), debug);
540+
CHECK_CUDA(cudaMemcpy(block_count_dev, &block_count, sizeof(int), cudaMemcpyHostToDevice), debug);
541+
542+
// Perform the mapping on the device side
543+
map2DcomputelocallyTo1D<<<cdiv(tile_num, ONE_DIM_BLOCK_SIZE), ONE_DIM_BLOCK_SIZE>>>(
544+
tile_num,
545+
compute_locally,
546+
compute_locally_1D_2D_map,
547+
tile_grid,
548+
block_count_dev
549+
);
550+
551+
CHECK_CUDA(cudaMemcpy(&block_count, block_count_dev, sizeof(int), cudaMemcpyDeviceToHost), debug);
552+
CHECK_CUDA(cudaFree(block_count_dev), debug);
553+
554+
return dim3(block_count, 1, 1);
555+
}
556+
530557
int CudaRasterizer::Rasterizer::renderForward(
531558
std::function<char* (size_t)> geometryBuffer,
532559
std::function<char* (size_t)> binningBuffer,
@@ -632,24 +659,10 @@ int CudaRasterizer::Rasterizer::renderForward(
632659
timer.stop("60 identifyTileRanges");
633660

634661
timer.start("61 map2DcomputelocallyTo1D");
635-
int count = 0;
636662
int* compute_locally_1D_2D_map;
637-
int* dev_count;
638663
CHECK_CUDA(cudaMalloc(&compute_locally_1D_2D_map, tile_num * sizeof(int)), debug);
639-
CHECK_CUDA(cudaMalloc(&dev_count, sizeof(int)), debug);
640-
CHECK_CUDA(cudaMemcpy(dev_count, &count, sizeof(int), cudaMemcpyHostToDevice), debug);
641664

642-
// Perform the mapping on the device side
643-
map2DcomputelocallyTo1D<<<cdiv(tile_num, ONE_DIM_BLOCK_SIZE), ONE_DIM_BLOCK_SIZE>>>(
644-
tile_num,
645-
compute_locally,
646-
compute_locally_1D_2D_map,
647-
tile_grid,
648-
dev_count
649-
);
650-
651-
CHECK_CUDA(cudaMemcpy(&count, dev_count, sizeof(int), cudaMemcpyDeviceToHost), debug);
652-
dim3 tile_grid_1d(count, 1, 1);
665+
dim3 tile_grid_1d = map2DcomputelocallyTo1DGrid(tile_num, compute_locally, compute_locally_1D_2D_map, tile_grid, debug);
653666

654667
timer.stop("61 map2DcomputelocallyTo1D");
655668

@@ -791,7 +804,6 @@ int CudaRasterizer::Rasterizer::renderForward(
791804

792805
delete[] log_tmp;
793806
CHECK_CUDA(cudaFree(compute_locally_1D_2D_map), debug);
794-
CHECK_CUDA(cudaFree(dev_count), debug);
795807
return num_rendered;
796808
}
797809

@@ -828,24 +840,10 @@ void CudaRasterizer::Rasterizer::renderBackward(
828840
const int tile_num = tile_grid.x * tile_grid.y;
829841

830842
timer.start("61 map2DcomputelocallyTo1D");
831-
int count = 0;
832843
int* compute_locally_1D_2D_map;
833-
int* dev_count;
834844
CHECK_CUDA(cudaMalloc(&compute_locally_1D_2D_map, tile_num * sizeof(int)), debug);
835-
CHECK_CUDA(cudaMalloc(&dev_count, sizeof(int)), debug);
836-
CHECK_CUDA(cudaMemcpy(dev_count, &count, sizeof(int), cudaMemcpyHostToDevice), debug);
837-
838-
// Perform the mapping on the device side
839-
map2DcomputelocallyTo1D<<<cdiv(tile_num, ONE_DIM_BLOCK_SIZE), ONE_DIM_BLOCK_SIZE>>>(
840-
tile_num,
841-
compute_locally,
842-
compute_locally_1D_2D_map,
843-
tile_grid,
844-
dev_count
845-
);
846845

847-
CHECK_CUDA(cudaMemcpy(&count, dev_count, sizeof(int), cudaMemcpyDeviceToHost), debug);
848-
dim3 tile_grid_1d(count, 1, 1);
846+
dim3 tile_grid_1d = map2DcomputelocallyTo1DGrid(tile_num, compute_locally, compute_locally_1D_2D_map, tile_grid, debug);
849847

850848
timer.stop("61 map2DcomputelocallyTo1D");
851849

@@ -885,5 +883,4 @@ void CudaRasterizer::Rasterizer::renderBackward(
885883

886884
// Free used memory
887885
CHECK_CUDA(cudaFree(compute_locally_1D_2D_map), debug);
888-
CHECK_CUDA(cudaFree(dev_count), debug);
889886
}

0 commit comments

Comments
 (0)