Skip to content

Commit 853475e

Browse files
authored
Merge pull request #10405 from Akshay-Venkatesh/topic/cuda-ipc-buffer-id-va-recycle
UCT/CUDA_IPC: Use buffer id to detect VA recylcing
2 parents c208f47 + 21b4c78 commit 853475e

File tree

3 files changed

+36
-39
lines changed

3 files changed

+36
-39
lines changed

src/uct/cuda/cuda_ipc/cuda_ipc_cache.c

+12-18
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ static ucs_status_t uct_cuda_ipc_close_memhandle(uct_cuda_ipc_cache_region_t *re
124124
(CUdeviceptr)region->mapped_addr, region->key.b_len));
125125
}
126126
} else if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL) {
127-
return UCT_CUDADRV_FUNC_LOG_WARN(cuMemPoolDestroy(region->key.ph.pool));
127+
return UCT_CUDADRV_FUNC_LOG_WARN(
128+
cuMemFree((CUdeviceptr)region->mapped_addr));
128129
} else
129130
#endif
130131
{
@@ -335,22 +336,22 @@ static ucs_status_t uct_cuda_ipc_open_memhandle(uct_cuda_ipc_rkey_t *key,
335336
CUdeviceptr *mapped_addr)
336337
{
337338

338-
#if HAVE_CUDA_FABRIC
339339
ucs_trace("key handle type %u", key->ph.handle_type);
340340

341-
if (key->ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY) {
341+
switch(key->ph.handle_type) {
342+
case UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY:
342343
return uct_cuda_ipc_open_memhandle_legacy(key->ph.handle.legacy,
343344
mapped_addr);
344-
} else if (key->ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM) {
345+
#if HAVE_CUDA_FABRIC
346+
case UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM:
345347
return uct_cuda_ipc_open_memhandle_vmm(key, mapped_addr);
346-
} else if (key->ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL) {
348+
case UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL:
347349
return uct_cuda_ipc_open_memhandle_mempool(key, mapped_addr);
348-
} else {
350+
#endif
351+
default:
352+
ucs_error("unsupported key handle type");
349353
return UCS_ERR_INVALID_PARAM;
350354
}
351-
#else
352-
return uct_cuda_ipc_open_memhandle_legacy(key->ph, mapped_addr);
353-
#endif
354355
}
355356

356357
static void uct_cuda_ipc_cache_invalidate_regions(uct_cuda_ipc_cache_t *cache,
@@ -480,13 +481,6 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr),
480481
ucs_pgt_region_t *pgt_region;
481482
uct_cuda_ipc_cache_region_t *region;
482483
int ret;
483-
size_t cmp_size;
484-
485-
#if HAVE_CUDA_FABRIC
486-
cmp_size = sizeof(key->ph.handle);
487-
#else
488-
cmp_size = sizeof(key->ph);
489-
#endif
490484

491485
status = uct_cuda_ipc_get_remote_cache(key->pid, &cache);
492486
if (status != UCS_OK) {
@@ -498,8 +492,8 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr),
498492
&cache->pgtable, key->d_bptr);
499493
if (ucs_likely(pgt_region != NULL)) {
500494
region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t);
501-
if (memcmp((const void *)&key->ph, (const void *)&region->key.ph,
502-
cmp_size) == 0) {
495+
496+
if (key->ph.buffer_id == region->key.ph.buffer_id) {
503497
/*cache hit */
504498
ucs_trace("%s: cuda_ipc cache hit addr:%p size:%lu region:"
505499
UCS_PGT_REGION_FMT, cache->name, (void *)key->d_bptr,

src/uct/cuda/cuda_ipc/cuda_ipc_md.c

+17-16
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,7 @@ uct_cuda_ipc_get_dev_cache(uct_cuda_ipc_component_t *component,
7070
int ret;
7171

7272
key.uuid = rkey->uuid;
73-
#if HAVE_CUDA_FABRIC
7473
key.type = rkey->ph.handle_type;
75-
#else
76-
key.type = 0;
77-
#endif
7874

7975
iter = kh_put(cuda_ipc_uuid_hash, hash, key, &ret);
8076
if (ret == UCS_KH_PUT_KEY_PRESENT) {
@@ -112,11 +108,10 @@ static ucs_status_t
112108
uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
113109
uct_cuda_ipc_lkey_t **key_p)
114110
{
115-
CUipcMemHandle *legacy_handle;
116111
uct_cuda_ipc_lkey_t *key;
117112
ucs_status_t status;
118113
#if HAVE_CUDA_FABRIC
119-
#define UCT_CUDA_IPC_QUERY_NUM_ATTRS 2
114+
#define UCT_CUDA_IPC_QUERY_NUM_ATTRS 3
120115
CUmemGenericAllocationHandle handle;
121116
CUmemoryPool mempool;
122117
CUpointer_attribute attr_type[UCT_CUDA_IPC_QUERY_NUM_ATTRS];
@@ -130,10 +125,16 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
130125
return UCS_ERR_NO_MEMORY;
131126
}
132127

133-
legacy_handle = (CUipcMemHandle*)&key->ph;
134128
UCT_CUDADRV_FUNC_LOG_ERR(cuMemGetAddressRange(&key->d_bptr, &key->b_len,
135129
(CUdeviceptr)addr));
136130

131+
status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute(&key->ph.buffer_id,
132+
CU_POINTER_ATTRIBUTE_BUFFER_ID,
133+
(CUdeviceptr)addr));
134+
if (status != UCS_OK) {
135+
goto err;
136+
}
137+
137138
#if HAVE_CUDA_FABRIC
138139
/* cuda_ipc can handle VMM, mallocasync, and legacy pinned device so need to
139140
* pack appropriate handle */
@@ -142,6 +143,8 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
142143
attr_data[0] = &legacy_capable;
143144
attr_type[1] = CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES;
144145
attr_data[1] = &allowed_handle_types;
146+
attr_type[2] = CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE;
147+
attr_data[2] = &mempool;
145148

146149
status = UCT_CUDADRV_FUNC_LOG_ERR(
147150
cuPointerGetAttributes(ucs_static_array_size(attr_data), attr_type,
@@ -151,8 +154,6 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
151154
}
152155

153156
if (legacy_capable) {
154-
key->ph.handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY;
155-
legacy_handle = &key->ph.handle.legacy;
156157
goto legacy_path;
157158
}
158159

@@ -184,9 +185,7 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
184185
goto common_path;
185186
}
186187

187-
status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute(&mempool,
188-
CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE, (CUdeviceptr)addr));
189-
if ((status != UCS_OK) || (mempool == 0)) {
188+
if (mempool == 0) {
190189
/* cuda_ipc can only handle UCS_MEMORY_TYPE_CUDA, which has to be either
191190
* legacy type, or VMM type, or mempool type. Return error if memory
192191
* does not belong to any of the three types */
@@ -216,16 +215,18 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
216215
goto common_path;
217216
#endif
218217
legacy_path:
219-
status = UCT_CUDADRV_FUNC(cuIpcGetMemHandle(legacy_handle, (CUdeviceptr)addr),
220-
UCS_LOG_LEVEL_ERROR);
218+
key->ph.handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY;
219+
status = UCT_CUDADRV_FUNC_LOG_ERR(
220+
cuIpcGetMemHandle(&key->ph.handle.legacy, (CUdeviceptr)addr));
221221
if (status != UCS_OK) {
222222
goto err;
223223
}
224224

225225
common_path:
226226
ucs_list_add_tail(&memh->list, &key->link);
227-
ucs_trace("registered addr:%p/%p length:%zd dev_num:%d",
228-
addr, (void *)key->d_bptr, key->b_len, (int)memh->dev_num);
227+
ucs_trace("registered addr:%p/%p length:%zd dev_num:%d buffer_id:%llu",
228+
addr, (void *)key->d_bptr, key->b_len, (int)memh->dev_num,
229+
key->ph.buffer_id);
229230

230231
*key_p = key;
231232
return UCS_OK;

src/uct/cuda/cuda_ipc/cuda_ipc_md.h

+7-5
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,30 @@
1414
#include <ucs/config/types.h>
1515

1616

17-
#if HAVE_CUDA_FABRIC
1817
typedef enum uct_cuda_ipc_key_handle {
1918
UCT_CUDA_IPC_KEY_HANDLE_TYPE_ERROR = 0,
2019
UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY, /* cudaMalloc memory */
20+
#if HAVE_CUDA_FABRIC
2121
UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM, /* cuMemCreate memory */
2222
UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL /* cudaMallocAsync memory */
23+
#endif
2324
} uct_cuda_ipc_key_handle_t;
2425

2526

2627
typedef struct uct_cuda_ipc_md_handle {
2728
uct_cuda_ipc_key_handle_t handle_type;
2829
union {
2930
CUipcMemHandle legacy; /* Legacy IPC handle */
31+
#if HAVE_CUDA_FABRIC
3032
CUmemFabricHandle fabric_handle; /* VMM/Mallocasync export handle */
33+
#endif
3134
} handle;
35+
#if HAVE_CUDA_FABRIC
3236
CUmemPoolPtrExportData ptr;
3337
CUmemoryPool pool;
34-
} uct_cuda_ipc_md_handle_t;
35-
#else
36-
typedef CUipcMemHandle uct_cuda_ipc_md_handle_t;
3738
#endif
38-
39+
unsigned long long buffer_id;
40+
} uct_cuda_ipc_md_handle_t;
3941

4042
/**
4143
* @brief cuda ipc MD descriptor

0 commit comments

Comments
 (0)