-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
838 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
cmake_minimum_required(VERSION 3.17) | ||
project(benchmark_gpu CXX) | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
|
||
add_executable(benchmark_gpu | ||
main.cpp) | ||
|
||
target_link_libraries(benchmark_gpu | ||
lib::common | ||
lib::gpu | ||
3rdparty::cxxopts | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#include <sstream> | ||
#include <vector> | ||
#include <cstdint> | ||
|
||
#include <config.h> | ||
#include <motif_finder_gpu_internal.h> | ||
#include <motif_finder_gpu_external.h> | ||
#include <fst_reader.h> | ||
|
||
using namespace std; | ||
|
||
void run() | ||
{ | ||
GpuInternalParams params; | ||
params.sequences = read_fasta("test.fst"); | ||
params.search_complementary = false; | ||
params.gpu_count = 1; | ||
params.unified_memory = false; | ||
params.motif_range_size = MOT_PER_CHUNK; | ||
params.threads_per_block = THREADS_PER_BLOCK; | ||
|
||
std::vector<uint16_t> res; | ||
find_motifs_internal_gpu(res, params); | ||
|
||
|
||
std::vector<uint32_t> motif_hashes; | ||
GpuExternalParams external_params(motif_hashes); | ||
find_motifs_external_gpu(res, params); | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
argc = 0; | ||
argv = 0; | ||
run(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
> 0 | ||
CGAATAATGGCGCAACGACC | ||
> 1 | ||
TCGCACCAAATATAGGCTGG | ||
> 2 | ||
AGCATCTGTCAAAAATGGGA | ||
> 3 | ||
GCCCCTCGGTAATTAAGGCA | ||
> 4 | ||
ACACCTTAAATTTAGGACAA | ||
> 5 | ||
AAAAAATAAAATTGGGCTTA | ||
> 6 | ||
AAAAAAGGCGACTCTGGGGT | ||
> 7 | ||
TAAACGCACTTAACAAATCG | ||
> 8 | ||
TCTCACTTTCTTTACTATTA | ||
> 9 | ||
CTGCGGACATAATCGTGATG | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
cmake_minimum_required(VERSION 3.17) | ||
project(motif_finder_gpu LANGUAGES CUDA) | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
set(CMAKE_CUDA_STANDARD 14) | ||
include_directories(.) | ||
set(COMMON_NVCC_FLAGS -Xptxas -O3) | ||
|
||
set(ARCH "30 35 37 50 52 60 61 70 80 86") | ||
|
||
set(SOURCES | ||
finder_kernel.cu | ||
finder_kernel.cuh | ||
gpu_cuda_params.h | ||
gpu_info.cpp | ||
gpu_info.h | ||
gpu_memory.cu | ||
gpu_memory.h | ||
gpu_range_motif_finder_external.cpp | ||
gpu_range_motif_finder_external.h | ||
gpu_range_motif_finder_internal.cpp | ||
gpu_range_motif_finder_internal.h | ||
motif_finder_gpu_external.h | ||
motif_finder_gpu_external.cpp | ||
motif_finder_gpu_internal.cpp | ||
motif_finder_gpu_internal.h | ||
sequence_hashes_params.h) | ||
|
||
add_library(${PROJECT_NAME} ${SOURCES}) | ||
add_library(lib::gpu ALIAS ${PROJECT_NAME}) | ||
|
||
set_target_properties( | ||
${PROJECT_NAME} | ||
PROPERTIES | ||
CUDA_SEPARABLE_COMPILATION ON | ||
CUDA_ARCHITECTURES ${ARCH}) | ||
|
||
|
||
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${COMMON_NVCC_FLAGS}>) | ||
target_link_libraries(${PROJECT_NAME} lib::common) | ||
target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/.) | ||
target_include_directories(${PROJECT_NAME} PRIVATE .) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#include "finder_kernel.cuh" | ||
|
||
namespace { | ||
__device__ uint16_t get_occurrence(uint32_t motif_hash, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t threads_per_block) | ||
{ | ||
// Shared memory size bytes: hash length * sizeof (uint32_t) | ||
// Max sequence length: shared memory size / 4bytes (16-25k) | ||
extern __shared__ uint32_t sequence[]; | ||
|
||
// Max number of sequences: 64k | ||
uint16_t result = 0; | ||
|
||
for (uint32_t j = 0; j < sequences_count; j++) { | ||
uint32_t seq_offset = hash_begins[j]; | ||
uint32_t seq_length = hash_lengths[j]; | ||
|
||
// Load sequence to fast shared memory | ||
for (uint32_t i = threadIdx.x; i < seq_length; i += threads_per_block) { | ||
if (i < seq_length) { | ||
sequence[i] = seq_hashes[i + seq_offset]; | ||
} | ||
} | ||
|
||
__syncthreads(); | ||
// Match motif to sequence | ||
uint8_t found = 0; | ||
for (uint32_t i = 0; (i < length) && !found; i++) { | ||
found = (sequence[i] & motif_hash) == sequence[i]; | ||
} | ||
result += found; | ||
__syncthreads(); | ||
} | ||
return result; | ||
} | ||
|
||
__device__ uint32_t motif_index_to_hash(uint32_t motif_index) { | ||
uint32_t result = 0; | ||
uint32_t mult = 1; | ||
for (uint32_t i = MOTIV_LEN - 1; i >= 0; i--) { | ||
result += (motif_index % ALPH_SIZE + 1) * mult; | ||
mult *= HASH_BASE; | ||
motif_index /= ALPH_SIZE; | ||
} | ||
return result; | ||
} | ||
|
||
__global__ void motif_finder_kernel_external( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t *mot_hashes, | ||
uint32_t mots_to_copy, | ||
uint32_t threads_per_block) | ||
{ | ||
uint32_t index = blockIdx.x * blockDim.x + threadIdx.x; | ||
uint32_t motif_hash = (index <= mots_to_copy) ? mot_hashes[index] : 0; | ||
weights_out[index] = get_occurrence(motif_hash, seq_hashes, sequences_count, hash_lengths, hash_begins, threads_per_block); | ||
} | ||
|
||
__global__ void motif_finder_kernel_internal( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t mots_to_copy, | ||
uint32_t motif_idx_offset, | ||
uint32_t threads_per_block) | ||
{ | ||
uint32_t index = blockIdx.x * blockDim.x + threadIdx.x; | ||
uint32_t motif_hash = (index <= mots_to_copy) ? motif_index_to_hash(motif_idx_offset + index) : 0; | ||
weights_out[index] = get_occurrence(motif_hash, seq_hashes, sequences_count, hash_lengths, hash_begins, threads_per_block); | ||
} | ||
} // namespace | ||
|
||
void motif_finder_gpu_internal( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t mots_to_copy, | ||
uint32_t motif_range_size, | ||
uint32_t shared_memory_size, | ||
uint32_t motif_idx_offset, | ||
uint32_t threads_per_block) | ||
{ | ||
motif_finder_kernel_internal<<<motif_range_size/threads_per_block, threads_per_block, shared_memory_size>>>( | ||
weights_out, | ||
seq_hashes, | ||
sequences_count, | ||
hash_lengths, | ||
hash_begins, | ||
mots_to_copy, | ||
motif_idx_offset, | ||
threads_per_block); | ||
cudaDeviceSynchronize(); | ||
} | ||
|
||
void motif_finder_gpu_external( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t *motif_hashes, | ||
uint32_t mots_to_copy, | ||
uint32_t motif_range_size, | ||
uint32_t shared_memory_size, | ||
uint32_t motif_idx_offset, | ||
uint32_t threads_per_block) | ||
{ | ||
motif_finder_kernel_external<<<motif_range_size/threads_per_block, threads_per_block, shared_memory_size>>>( | ||
weights_out, | ||
seq_hashes, | ||
sequences_count, | ||
hash_lengths, | ||
hash_begins, | ||
motif_hashes, | ||
mots_to_copy, | ||
motif_idx_offset, | ||
threads_per_block); | ||
cudaDeviceSynchronize(); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#ifndef MOTIF_FINDER_FINDER_KERNEL_CUH | ||
#define MOTIF_FINDER_FINDER_KERNEL_CUH | ||
|
||
// these are wrapper functions for cuda kernels | ||
|
||
void motif_finder_gpu_external( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t *motif_hashes, | ||
uint32_t mots_to_copy, | ||
uint32_t motif_range_size, | ||
uint32_t shared_memory_size, | ||
uint32_t motif_idx_offset, | ||
uint32_t threads_per_block); | ||
|
||
void motif_finder_gpu_internal( | ||
uint16_t *weights_out, | ||
uint32_t *seq_hashes, | ||
uint32_t sequences_count, | ||
uint32_t *hash_lengths, | ||
uint32_t *hash_begins, | ||
uint32_t mots_to_copy, | ||
uint32_t motif_range_size, | ||
uint32_t shared_memory_size, | ||
uint32_t motif_idx_offset, | ||
uint32_t threads_per_block); | ||
|
||
#endif //MOTIF_FINDER_FINDER_KERNEL_CUH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#ifndef MOTIF_FINDER_GPU_CUDA_PARAMS_H | ||
#define MOTIF_FINDER_GPU_CUDA_PARAMS_H | ||
#include <config.h> | ||
|
||
struct GpuCudaParams { | ||
bool unified_memory {UNIFIED_MEMORY_SUPPORT}; | ||
uint32_t motif_range_size {MOT_PER_CHUNK}; | ||
uint32_t threads_per_block {THREADS_PER_BLOCK}; | ||
}; | ||
|
||
#endif //MOTIF_FINDER_GPU_CUDA_PARAMS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#include "gpu_info.h" | ||
|
||
#include <stdio.h> | ||
#include <cuda_runtime_api.h> | ||
#include <cuda.h> | ||
|
||
int gpu_count() | ||
{ | ||
int nDevices; | ||
cudaGetDeviceCount(&nDevices); | ||
//printf("Device Count: %d\n", nDevices); | ||
for (int i = 0; i < nDevices; i++) { | ||
cudaDeviceProp prop; | ||
cudaGetDeviceProperties(&prop, i); | ||
if (false) { | ||
printf("Device Number: %d\n", i); | ||
printf(" Device name: %s\n", prop.name); | ||
printf(" Memory Clock Rate (KHz): %d\n", | ||
prop.memoryClockRate); | ||
printf(" Memory Bus Width (bits): %d\n", | ||
prop.memoryBusWidth); | ||
printf(" Peak Memory Bandwidth (GB/s): %f\n\n", | ||
2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6); | ||
} | ||
} | ||
return nDevices; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#ifndef MOTIF_FINDER_GPU_INFO_H | ||
#define MOTIF_FINDER_GPU_INFO_H | ||
|
||
// get number of available GPUs | ||
int gpu_count(); | ||
|
||
#endif //MOTIF_FINDER_GPU_INFO_H |
Oops, something went wrong.