Skip to content

Commit bb5ee28

Browse files
author
ferrol aderholdt
committed
REVIEW: address feedback
TL/UCP: additional support for memh in onesided alltoall
1 parent c8ec986 commit bb5ee28

File tree

16 files changed

+136
-112
lines changed

16 files changed

+136
-112
lines changed

src/components/base/ucc_base_iface.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ typedef struct ucc_team ucc_team_t;
2121
typedef struct ucc_context ucc_context_t;
2222
typedef struct ucc_coll_score ucc_coll_score_t;
2323
typedef struct ucc_coll_task ucc_coll_task_t;
24+
typedef struct ucc_mem_map_memh_t ucc_mem_map_memh_t;
25+
typedef struct ucc_mem_map_tl_t ucc_mem_map_tl_t;
2426

2527
typedef struct ucc_base_lib {
2628
ucc_log_component_config_t log_component;
@@ -123,10 +125,22 @@ typedef struct ucc_base_context_iface {
123125
void (*destroy)(ucc_base_context_t *ctx);
124126
ucc_status_t (*get_attr)(const ucc_base_context_t *context,
125127
ucc_base_ctx_attr_t *attr);
126-
ucc_status_t (*mem_map)(const ucc_base_context_t *context, int type,
127-
void *memh, void *tl_h);
128-
ucc_status_t (*mem_unmap)(const ucc_base_context_t *context, int type, void *tl_h);
129-
ucc_status_t (*memh_pack)(const ucc_base_context_t *context, int type, void *memh,
128+
/* maps a memory-region specified by memory handle, memh, to a tl specific
129+
handle, tl_h, based on the mapping mode defined by mode. For the export
130+
mode, the TL will map a local memory-region memory and store the
131+
necessary information in the tl_h. For the import mode, the TL will
132+
map, if necessary, memory handles provided by a peer and store the
133+
necessary information in the tl_h. */
134+
ucc_status_t (*mem_map)(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
135+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
136+
/* unmaps a memory-region previously mapped to a specific TL pointed to by tl_h
137+
with a mode of mapping by mode. */
138+
ucc_status_t (*mem_unmap)(const ucc_base_context_t *context, ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h);
139+
/* packs necessary TL specific elements for a mapped memory-region to a
140+
packed buffer. Each TL implementing this function should set the
141+
packed_size member of the tl_h, allocate memory for the pack_buffer, and
142+
pack data in the buffer. */
143+
ucc_status_t (*memh_pack)(const ucc_base_context_t *context, ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h,
130144
void **pack_buffer);
131145
} ucc_base_context_iface_t;
132146

src/components/cl/basic/cl_basic.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ ucc_status_t ucc_cl_basic_get_lib_attr(const ucc_base_lib_t *lib,
1010
ucc_base_lib_attr_t *base_attr);
1111
ucc_status_t ucc_cl_basic_get_context_attr(const ucc_base_context_t *context,
1212
ucc_base_ctx_attr_t *base_attr);
13-
ucc_status_t ucc_cl_basic_mem_map(const ucc_base_context_t *context, int type,
14-
void *memh, void *tl_h);
15-
ucc_status_t ucc_cl_basic_mem_unmap(const ucc_base_context_t *context, int type,
16-
void *tl_h);
17-
ucc_status_t ucc_cl_basic_memh_pack(const ucc_base_context_t *context, int type,
18-
void *memh, void **packed_buffer);
13+
ucc_status_t ucc_cl_basic_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
14+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
15+
ucc_status_t ucc_cl_basic_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
16+
ucc_mem_map_tl_t *tl_h);
17+
ucc_status_t ucc_cl_basic_memh_pack(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
18+
ucc_mem_map_tl_t *tl_h, void **packed_buffer);
1919

2020
ucc_status_t ucc_cl_basic_get_lib_properties(ucc_base_lib_properties_t *prop);
2121

src/components/cl/hier/cl_hier.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ ucc_status_t ucc_cl_hier_get_lib_properties(ucc_base_lib_properties_t *prop);
1818
ucc_status_t ucc_cl_hier_get_context_attr(const ucc_base_context_t *context,
1919
ucc_base_ctx_attr_t *base_attr);
2020

21-
ucc_status_t ucc_cl_hier_mem_map(const ucc_base_context_t *context, int type,
22-
void *memh, void *tl_h);
21+
ucc_status_t ucc_cl_hier_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
22+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
2323

24-
ucc_status_t ucc_cl_hier_mem_unmap(const ucc_base_context_t *context, int type,
25-
void *tl_h);
24+
ucc_status_t ucc_cl_hier_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
25+
ucc_mem_map_tl_t *tl_h);
2626

2727
ucc_status_t ucc_cl_hier_memh_pack(const ucc_base_context_t *context,
28-
int type, void *memh, void **packed_buffer);
28+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **packed_buffer);
2929

3030
static ucc_config_field_t ucc_cl_hier_lib_config_table[] = {
3131
{"", "", NULL, ucc_offsetof(ucc_cl_hier_lib_config_t, super),

src/components/tl/cuda/tl_cuda.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ static ucs_config_field_t ucc_tl_cuda_context_config_table[] = {
6565
ucc_status_t ucc_tl_cuda_get_context_attr(const ucc_base_context_t *context,
6666
ucc_base_ctx_attr_t *base_attr);
6767

68-
ucc_status_t ucc_tl_cuda_mem_map(const ucc_base_context_t *context, int type,
69-
void *memh, void *tl_h);
68+
ucc_status_t ucc_tl_cuda_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
69+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
7070

71-
ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, int type,
72-
void *memh);
71+
ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
72+
ucc_mem_map_tl_t *tl_h);
7373

7474
ucc_status_t ucc_tl_cuda_memh_pack(const ucc_base_context_t *context,
75-
int type, void *memh, void **pack_buffer);
75+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
7676

7777
UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_cuda_context_t, ucc_base_context_t,
7878
const ucc_base_context_params_t *,

src/components/tl/mlx5/tl_mlx5.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ ucc_status_t ucc_tl_mlx5_get_lib_attr(const ucc_base_lib_t *lib,
1111
ucc_status_t ucc_tl_mlx5_get_context_attr(const ucc_base_context_t *context,
1212
ucc_base_ctx_attr_t * base_attr);
1313

14-
ucc_status_t ucc_tl_mlx5_mem_map(const ucc_base_context_t *context, int type,
15-
void *memh, void *tl_h);
14+
ucc_status_t ucc_tl_mlx5_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
15+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
1616

17-
ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, int type,
18-
void *memh);
17+
ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
18+
ucc_mem_map_tl_t *tl_h);
1919

2020
ucc_status_t ucc_tl_mlx5_memh_pack(const ucc_base_context_t *context,
21-
int type, void *memh, void **pack_buffer);
21+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
2222

2323
ucc_status_t ucc_tl_mlx5_get_lib_properties(ucc_base_lib_properties_t *prop);
2424

src/components/tl/nccl/tl_nccl.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ ucc_status_t ucc_tl_nccl_get_lib_properties(ucc_base_lib_properties_t *prop);
1616
ucc_status_t ucc_tl_nccl_get_context_attr(const ucc_base_context_t *context,
1717
ucc_base_ctx_attr_t *base_attr);
1818

19-
ucc_status_t ucc_tl_nccl_mem_map(const ucc_base_context_t *context, int type,
20-
void *memh, void *tl_h);
19+
ucc_status_t ucc_tl_nccl_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
20+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
2121

22-
ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, int type,
23-
void *memh);
22+
ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
23+
ucc_mem_map_tl_t *tl_h);
2424

2525
ucc_status_t ucc_tl_nccl_memh_pack(const ucc_base_context_t *context,
26-
int type, void *memh, void **pack_buffer);
26+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
2727

2828
static ucc_config_field_t ucc_tl_nccl_lib_config_table[] = {
2929
{"", "", NULL, ucc_offsetof(ucc_tl_nccl_lib_config_t, super),

src/components/tl/rccl/tl_rccl.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ ucc_status_t ucc_tl_rccl_get_lib_properties(ucc_base_lib_properties_t *prop);
1717
ucc_status_t ucc_tl_rccl_get_context_attr(const ucc_base_context_t *context,
1818
ucc_base_ctx_attr_t *base_attr);
1919

20-
ucc_status_t ucc_tl_rccl_mem_map(const ucc_base_context_t *context, int type,
21-
void *memh, void *tl_h);
20+
ucc_status_t ucc_tl_rccl_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
21+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
2222

23-
ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, int type,
24-
void *memh);
23+
ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
24+
ucc_mem_map_tl_t *tl_h);
2525

2626
ucc_status_t ucc_tl_rccl_memh_pack(const ucc_base_context_t *context,
27-
int type, void *memh, void **pack_buffer);
27+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
2828

2929
static ucc_config_field_t ucc_tl_rccl_lib_config_table[] = {
3030
{"", "", NULL, ucc_offsetof(ucc_tl_rccl_lib_config_t, super),

src/components/tl/self/tl_self.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ ucc_status_t ucc_tl_self_get_lib_attr(const ucc_base_lib_t *lib,
1616
ucc_status_t ucc_tl_self_get_context_attr(const ucc_base_context_t *context,
1717
ucc_base_ctx_attr_t *base_attr);
1818

19-
ucc_status_t ucc_tl_self_mem_map(const ucc_base_context_t *context, int type,
20-
void *memh, void *tl_h);
19+
ucc_status_t ucc_tl_self_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
20+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
2121

22-
ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, int type,
23-
void *memh);
22+
ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
23+
ucc_mem_map_tl_t *tl_h);
2424

2525
ucc_status_t ucc_tl_self_memh_pack(const ucc_base_context_t *context,
26-
int type, void *memh, void **pack_buffer);
26+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
2727

2828
ucc_status_t ucc_tl_self_get_lib_properties(ucc_base_lib_properties_t *prop);
2929

src/components/tl/sharp/tl_sharp.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ ucc_status_t ucc_tl_sharp_get_lib_properties(ucc_base_lib_properties_t *prop);
1515
ucc_status_t ucc_tl_sharp_get_context_attr(const ucc_base_context_t *context,
1616
ucc_base_ctx_attr_t *base_attr);
1717

18-
ucc_status_t ucc_tl_sharp_mem_map(const ucc_base_context_t *context, int type,
19-
void *memh, void *tl_h);
18+
ucc_status_t ucc_tl_sharp_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
19+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
2020

21-
ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, int type,
22-
void *memh);
21+
ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
22+
ucc_mem_map_tl_t *tl_h);
2323

2424
ucc_status_t ucc_tl_sharp_memh_pack(const ucc_base_context_t *context,
25-
int type, void *memh, void **pack_buffer);
25+
ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h, void **pack_buffer);
2626

2727
static ucc_config_field_t ucc_tl_sharp_lib_config_table[] = {
2828
{"", "", NULL, ucc_offsetof(ucc_tl_sharp_lib_config_t, super),

src/components/tl/ucp/alltoall/alltoall.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_init(ucc_base_coll_args_t *coll_args,
100100
}
101101
if (!(coll_args->args.mask & UCC_COLL_ARGS_FIELD_MEM_MAP_DST_MEMH)) {
102102
coll_args->args.dst_memh.global_memh = NULL;
103+
} else {
104+
if (!(coll_args->args.flags & UCC_COLL_ARGS_FLAG_DST_GLOBAL)) {
105+
tl_error(UCC_TL_TEAM_LIB(tl_team),
106+
"onesided alltoall requires global memory handles for dst buffers");
107+
status = UCC_ERR_INVALID_PARAM;
108+
goto out;
109+
}
103110
}
104111

105112
task = ucc_tl_ucp_init_task(coll_args, team);

src/components/tl/ucp/alltoall/alltoall_onesided.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask)
2828
ucc_mem_map_mem_h *dst_memh = TASK_ARGS(task).dst_memh.global_memh;
2929
ucc_rank_t peer;
3030

31+
if (TASK_ARGS(task).flags & UCC_COLL_ARGS_FLAG_SRC_GLOBAL) {
32+
src_memh = TASK_ARGS(task).src_memh.global_memh[grank];
33+
}
34+
3135
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS);
3236
/* TODO: change when support for library-based work buffers is complete */
3337
nelems = (nelems / gsize) * ucc_dt_size(TASK_ARGS(task).src.info.datatype);

src/components/tl/ucp/tl_ucp.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ ucc_status_t ucc_tl_ucp_get_lib_properties(ucc_base_lib_properties_t *prop);
3232
ucc_status_t ucc_tl_ucp_get_context_attr(const ucc_base_context_t *context,
3333
ucc_base_ctx_attr_t *base_attr);
3434

35-
ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, int type,
36-
void *memh, void *tl_h);
35+
ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
36+
ucc_mem_map_memh_t *memh, ucc_mem_map_tl_t *tl_h);
3737

38-
ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, int type, void *memh,
38+
ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, ucc_mem_map_mode_t mode, ucc_mem_map_tl_t *tl_h,
3939
void **pack_buffer);
4040

41-
ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type,
42-
void *memh);
41+
ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, ucc_mem_map_mode_t mode,
42+
ucc_mem_map_tl_t *memh);
4343

4444
ucc_config_field_t ucc_tl_ucp_lib_config_table[] = {
4545
{"", "", NULL, ucc_offsetof(ucc_tl_ucp_lib_config_t, super),
@@ -292,7 +292,7 @@ static ucs_config_field_t ucc_tl_ucp_context_config_table[] = {
292292
UCC_CONFIG_TYPE_BOOL},
293293

294294
{"EXPORTED_MEMORY_HANDLE", "n",
295-
"If set to 1, initialize UCP context with the exported memory handle "
295+
"If set to yes, initialize UCP context with the exported memory handle "
296296
"feature, which is useful for offload devices such as a DPU. Otherwise "
297297
"disable the use of this feature.",
298298
ucc_offsetof(ucc_tl_ucp_context_config_t, exported_memory_handle),

0 commit comments

Comments
 (0)