From b504c8eb08d4080484441f0a65bf11e57df3bc2d Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Sun, 1 Dec 2024 16:57:33 +0200 Subject: [PATCH] UCT/GDR_COPY: Optimize memory registration in fast-path --- src/ucs/memory/rcache.inl | 12 ++++++++++++ src/uct/cuda/gdr_copy/gdr_copy_md.c | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/src/ucs/memory/rcache.inl b/src/ucs/memory/rcache.inl index 16d30da2ca4..25aac2e9e0e 100644 --- a/src/ucs/memory/rcache.inl +++ b/src/ucs/memory/rcache.inl @@ -8,6 +8,7 @@ #define UCS_RCACHE_INL_ #include "rcache_int.h" +#include static UCS_F_ALWAYS_INLINE int ucs_rcache_region_test(ucs_rcache_region_t *region, int prot, size_t alignment) @@ -80,6 +81,17 @@ ucs_rcache_lookup_unsafe(ucs_rcache_t *rcache, void *address, size_t length, return region; } +static UCS_F_ALWAYS_INLINE ucs_rcache_region_t * +ucs_rcache_lookup(ucs_rcache_t *rcache, void *address, size_t length, + size_t alignment, int prot) +{ + ucs_rcache_region_t *region; + + ucs_rwlock_read_lock(&rcache->pgt_lock); + region = ucs_rcache_lookup_unsafe(rcache, address, length, alignment, prot); + ucs_rwlock_read_unlock(&rcache->pgt_lock); + return region; +} static UCS_F_ALWAYS_INLINE void ucs_rcache_region_put_unsafe(ucs_rcache_t *rcache, ucs_rcache_region_t *region) diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c index f613dc4d4df..313bcd592f1 100644 --- a/src/uct/cuda/gdr_copy/gdr_copy_md.c +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -342,12 +343,19 @@ uct_gdr_copy_mem_rcache_reg(uct_md_h uct_md, void *address, size_t length, ucs_status_t status; uct_gdr_copy_mem_t *memh; + rregion = ucs_rcache_lookup(md->rcache, address, length, GPU_PAGE_SIZE, + PROT_READ | PROT_WRITE); + if (rregion != NULL) { + goto out; + } + status = ucs_rcache_get(md->rcache, address, length, GPU_PAGE_SIZE, PROT_READ | PROT_WRITE, &flags, &rregion); if (status != UCS_OK) { return status; } +out: ucs_assert(rregion->refcount > 0); memh = &ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t)->memh; *memh_p = memh;