@@ -126,6 +126,8 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
126
126
return UCS_ERR_NO_MEMORY ;
127
127
}
128
128
129
+ key -> ph .handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_ERROR ;
130
+
129
131
UCT_CUDADRV_FUNC_LOG_ERR (cuMemGetAddressRange (& key -> d_bptr , & key -> b_len ,
130
132
(CUdeviceptr )addr ));
131
133
@@ -136,11 +138,6 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
136
138
goto err ;
137
139
}
138
140
139
- key -> ph .handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_ERROR ;
140
-
141
- ucs_trace ("exporting handle for %p: base %p b_len %lu buffer_id %llu" , addr ,
142
- (void * )key -> d_bptr , key -> b_len , key -> ph .buffer_id );
143
-
144
141
#if HAVE_CUDA_FABRIC
145
142
/* cuda_ipc can handle VMM, mallocasync, and legacy pinned device so need to
146
143
* pack appropriate handle */
@@ -230,8 +227,9 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
230
227
231
228
common_path :
232
229
ucs_list_add_tail (& memh -> list , & key -> link );
233
- ucs_trace ("registered addr:%p/%p length:%zd dev_num:%d" ,
234
- addr , (void * )key -> d_bptr , key -> b_len , (int )memh -> dev_num );
230
+ ucs_trace ("registered addr:%p/%p length:%zd dev_num:%d buffer_id:%llu" ,
231
+ addr , (void * )key -> d_bptr , key -> b_len , (int )memh -> dev_num ,
232
+ key -> ph .buffer_id );
235
233
236
234
* key_p = key ;
237
235
return UCS_OK ;
@@ -248,41 +246,16 @@ uct_cuda_ipc_mkey_pack(uct_md_h md, uct_mem_h tl_memh, void *address,
248
246
{
249
247
uct_cuda_ipc_rkey_t * packed = mkey_buffer ;
250
248
uct_cuda_ipc_memh_t * memh = tl_memh ;
251
- uct_cuda_ipc_lkey_t * key , * tmp ;
249
+ uct_cuda_ipc_lkey_t * key ;
252
250
ucs_status_t status ;
253
- unsigned long long buffer_id ;
254
251
255
- status = UCT_CUDADRV_FUNC_LOG_ERR (cuPointerGetAttribute (& buffer_id ,
256
- CU_POINTER_ATTRIBUTE_BUFFER_ID ,
257
- (CUdeviceptr )address ));
258
- if (status != UCS_OK ) {
259
- return status ;
260
- }
261
-
262
- ucs_list_for_each_safe (key , tmp , & memh -> list , link ) {
252
+ ucs_list_for_each (key , & memh -> list , link ) {
263
253
if (((uintptr_t )address >= key -> d_bptr ) &&
264
254
((uintptr_t )address < (key -> d_bptr + key -> b_len ))) {
265
- ucs_trace ("found range (%p ... %p) in local cache" , address ,
266
- address + length );
267
- if (buffer_id == key -> ph .buffer_id ) {
268
- ucs_trace ("buffer_id(%llu) match found" , buffer_id );
269
- goto found ;
270
- } else {
271
- /* VA recycling case. Remove entry. A given pointer should only
272
- * belong to one region so no need to look through rest of the
273
- * items in the linked list. Skip to export phase. */
274
- ucs_trace ("VA recycling detected for (%p ... %p) (%llu, %llu)" ,
275
- address , address + length , buffer_id , key -> ph .buffer_id );
276
- ucs_list_del (& key -> link );
277
- goto not_found ;
278
- }
255
+ goto found ;
279
256
}
280
257
}
281
258
282
- ucs_trace ("export handle for (%p ... %p) not found in local cache" , address ,
283
- address + length );
284
-
285
- not_found :
286
259
status = uct_cuda_ipc_mem_add_reg (address , memh , & key );
287
260
if (status != UCS_OK ) {
288
261
return status ;
0 commit comments