Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions HeterogeneousCore/CUDAServices/src/CUDAService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ void *CUDAService::allocate_device(int dev, size_t nbytes, cuda::stream_t<>& str
}

void CUDAService::free_device(int device, void *ptr) {
allocator_->deviceAllocator.DeviceFree(device, ptr);
cuda::throw_if_error(allocator_->deviceAllocator.DeviceFree(device, ptr));
}

void *CUDAService::allocate_host(size_t nbytes, cuda::stream_t<>& stream) {
Expand All @@ -456,5 +456,5 @@ void *CUDAService::allocate_host(size_t nbytes, cuda::stream_t<>& stream) {
}

void CUDAService::free_host(void *ptr) {
allocator_->hostAllocator.HostFree(ptr);
cuda::throw_if_error(allocator_->hostAllocator.HostFree(ptr));
}
29 changes: 19 additions & 10 deletions HeterogeneousCore/CUDAServices/src/CachingHostAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -395,14 +395,23 @@ struct CachingHostAllocator
found = true;
search_key = *block_itr;
search_key.associated_stream = active_stream;
if(search_key.device != device) {
// If "associated" device changes, need to re-create the event on the right device
if (CubDebug(error = cudaSetDevice(search_key.device))) return error;
if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error;
if (CubDebug(error = cudaSetDevice(device))) return error;
if (CubDebug(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming))) return error;
search_key.device = device;
}

live_blocks.insert(search_key);

// Remove from free blocks
cached_bytes.free -= search_key.bytes;
cached_bytes.live += search_key.bytes;

if (debug) _CubLog("\tHost reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n",
search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream);
if (debug) _CubLog("\tHost reused cached block at %p (%lld bytes) for stream %lld on device %lld (previously associated with stream %lld).\n",
search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) block_itr->associated_stream);

cached_blocks.erase(block_itr);

Expand All @@ -423,8 +432,8 @@ struct CachingHostAllocator
if (CubDebug(error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault)) == cudaErrorMemoryAllocation)
{
// The allocation attempt failed: free all cached blocks on device and retry
if (debug) _CubLog("\tHost failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations",
(long long) search_key.bytes, (long long) search_key.associated_stream);
if (debug) _CubLog("\tHost failed to allocate %lld bytes for stream %lld on device %lld, retrying after freeing cached allocations",
(long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device);

error = cudaSuccess; // Reset the error we will return
cudaGetLastError(); // Reset CUDART's error
Expand Down Expand Up @@ -476,8 +485,8 @@ struct CachingHostAllocator
cached_bytes.live += search_key.bytes;
mutex.Unlock();

if (debug) _CubLog("\tHost allocated new host block at %p (%lld bytes associated with stream %lld).\n",
search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream);
if (debug) _CubLog("\tHost allocated new host block at %p (%lld bytes associated with stream %lld on device %lld).\n",
search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device);
}

// Copy host pointer to output parameter
Expand Down Expand Up @@ -523,8 +532,8 @@ struct CachingHostAllocator
cached_blocks.insert(search_key);
cached_bytes.free += search_key.bytes;

if (debug) _CubLog("\tHost returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
(long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(),
if (debug) _CubLog("\tHost returned %lld bytes from associated stream %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
(long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) cached_blocks.size(),
(long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live);
}
}
Expand All @@ -547,8 +556,8 @@ struct CachingHostAllocator
if (CubDebug(error = cudaFreeHost(d_ptr))) return error;
if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error;

if (debug) _CubLog("\tHost freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
(long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live);
if (debug) _CubLog("\tHost freed %lld bytes from associated stream %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
(long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) cached_blocks.size(), (long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live);
}

// Reset device
Expand Down