Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/utilities/_hypre_utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ typedef struct
#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceUVM(hypre_handle) hypre_DeviceDataDeviceUVM(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle))
Expand Down
30 changes: 24 additions & 6 deletions src/utilities/_hypre_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,25 +335,41 @@ using hypre_DeviceItem = void*;
// Macro for device memory prefetching (CUDART 13.0+)
#define HYPRE_MEM_PREFETCH_DEVICE(ptr, size, stream) \
do { \
cudaMemLocation loc = {cudaMemLocationTypeDevice, hypre_HandleDevice(hypre_handle())}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
cudaMemLocation loc = {cudaMemLocationTypeDevice, hypre_HandleDevice(hypre_handle())}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
} \
} while (0)

// Macro for host memory prefetching (CUDART 13.0+)
#define HYPRE_MEM_PREFETCH_HOST(ptr, size, stream) \
do { \
cudaMemLocation loc = {cudaMemLocationTypeHost, cudaCpuDeviceId}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
cudaMemLocation loc = {cudaMemLocationTypeHost, cudaCpuDeviceId}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
} \
} while (0)

#else
// Macro for device memory prefetching (< CUDART 13.0)
#define HYPRE_MEM_PREFETCH_DEVICE(ptr, size, stream) \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, hypre_HandleDevice(hypre_handle()), stream))
do { \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, hypre_HandleDevice(hypre_handle()), stream)); \
} \
} while (0)

// Macro for host memory prefetching (< CUDART 13.0)
#define HYPRE_MEM_PREFETCH_HOST(ptr, size, stream) \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId, stream))
do { \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId, stream)); \
} \
} while (0)
#endif

#endif /* defined(HYPRE_USING_CUDA) */
Expand Down Expand Up @@ -986,6 +1002,7 @@ struct hypre_DeviceData
HYPRE_Int device_max_work_group_size;
#else
HYPRE_Int device;
HYPRE_Int device_uvm;
#endif
hypre_int device_max_shmem_per_block[3];
/* by default, hypre puts GPU computations in this stream
Expand Down Expand Up @@ -1015,6 +1032,7 @@ struct hypre_DeviceData
};

#define hypre_DeviceDataDevice(data) ((data) -> device)
#define hypre_DeviceDataDeviceUVM(data) ((data) -> device_uvm)
#define hypre_DeviceDataDeviceMaxWorkGroupSize(data) ((data) -> device_max_work_group_size)
#define hypre_DeviceDataDeviceMaxShmemPerBlock(data) ((data) -> device_max_shmem_per_block)
#define hypre_DeviceDataDeviceMaxShmemPerBlockInited(data) (((data) -> device_max_shmem_per_block)[2])
Expand Down
30 changes: 24 additions & 6 deletions src/utilities/device_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,25 +117,41 @@ using hypre_DeviceItem = void*;
// Macro for device memory prefetching (CUDART 13.0+)
#define HYPRE_MEM_PREFETCH_DEVICE(ptr, size, stream) \
do { \
cudaMemLocation loc = {cudaMemLocationTypeDevice, hypre_HandleDevice(hypre_handle())}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
cudaMemLocation loc = {cudaMemLocationTypeDevice, hypre_HandleDevice(hypre_handle())}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
} \
} while (0)

// Macro for host memory prefetching (CUDART 13.0+)
#define HYPRE_MEM_PREFETCH_HOST(ptr, size, stream) \
do { \
cudaMemLocation loc = {cudaMemLocationTypeHost, cudaCpuDeviceId}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
cudaMemLocation loc = {cudaMemLocationTypeHost, cudaCpuDeviceId}; \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, loc, 0, stream)); \
} \
} while (0)

#else
// Macro for device memory prefetching (< CUDART 13.0)
#define HYPRE_MEM_PREFETCH_DEVICE(ptr, size, stream) \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, hypre_HandleDevice(hypre_handle()), stream))
do { \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, hypre_HandleDevice(hypre_handle()), stream)); \
} \
} while (0)

// Macro for host memory prefetching (< CUDART 13.0)
#define HYPRE_MEM_PREFETCH_HOST(ptr, size, stream) \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId, stream))
do { \
if (hypre_HandleDeviceUVM(hypre_handle())) \
{ \
HYPRE_CUDA_CALL(cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId, stream)); \
} \
} while (0)
#endif

#endif /* defined(HYPRE_USING_CUDA) */
Expand Down Expand Up @@ -768,6 +784,7 @@ struct hypre_DeviceData
HYPRE_Int device_max_work_group_size;
#else
HYPRE_Int device;
HYPRE_Int device_uvm;
#endif
hypre_int device_max_shmem_per_block[3];
/* by default, hypre puts GPU computations in this stream
Expand Down Expand Up @@ -797,6 +814,7 @@ struct hypre_DeviceData
};

#define hypre_DeviceDataDevice(data) ((data) -> device)
#define hypre_DeviceDataDeviceUVM(data) ((data) -> device_uvm)
#define hypre_DeviceDataDeviceMaxWorkGroupSize(data) ((data) -> device_max_work_group_size)
#define hypre_DeviceDataDeviceMaxShmemPerBlock(data) ((data) -> device_max_shmem_per_block)
#define hypre_DeviceDataDeviceMaxShmemPerBlockInited(data) (((data) -> device_max_shmem_per_block)[2])
Expand Down
24 changes: 13 additions & 11 deletions src/utilities/general.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,18 +363,20 @@ HYPRE_DeviceInitialize(void)
hypre_CudaCompileFlagCheck();
#endif

#if defined(HYPRE_USING_DEVICE_POOL)
/* Keep this check here at the end of HYPRE_Initialize()
* Make sure that device pool allocator has not been setup in HYPRE_Initialize,
* otherwise users are not able to set all the parameters
*/
if ( hypre_HandleCubDevAllocator(handle) ||
hypre_HandleCubUvmAllocator(handle) )
/* Check if OS supports UVM */
#if defined(HYPRE_USING_CUDA) && defined(HYPRE_USING_UNIFIED_MEMORY)
{
char msg[256];
hypre_sprintf(msg, "%s %s", "ERROR: device pool allocators have been created in", __func__);
hypre_error_w_msg(-1, msg);
}
struct cudaDeviceProp deviceProp;

HYPRE_CUDA_CALL( cudaGetDevice(&device_id) );
HYPRE_CUDA_CALL( cudaGetDeviceProperties(&deviceProp, device_id) );

if (deviceProp.managedMemory == 1 && deviceProp.concurrentManagedAccess == 1)
{
hypre_HandleDeviceUVM(handle) = 1;
}
}

#endif

#endif /* if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) */
Expand Down
1 change: 1 addition & 0 deletions src/utilities/handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ typedef struct
#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceUVM(hypre_handle) hypre_DeviceDataDeviceUVM(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle))
Expand Down