Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete topo map #44

Open
wants to merge 1 commit into
base: huawei
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion api/ucg.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,13 @@ enum ucg_group_params_field {
UCG_GROUP_PARAM_FIELD_IS_SOCKET_BALANCE = UCS_BIT(10),
};

typedef struct {
uint32_t rank_continuous_in_node : 1;
uint32_t rank_continuous_in_sock : 1;
uint32_t rank_balance_in_node : 1;
uint32_t rank_balance_in_sock : 1;
} ucg_topo_args_t;

typedef struct ucg_group_params {
/**
* Mask of valid fields in this structure, using bits from @ref ucg_group_params_field.
Expand All @@ -161,7 +168,7 @@ typedef struct ucg_group_params {
/* number of group members */
int member_count;

char **topo_map; /* Global topology map, topo_map[i][j] means Distance between rank i and rank j. */
ucg_topo_args_t topo_args;

/*
* This array contains information about the process placement of different
Expand Down
55 changes: 0 additions & 55 deletions base/ucg_group.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,45 +70,6 @@ static void ucg_group_calc_cache_index(ucg_collective_params_t *params,
return;
}


static void ucg_group_free_topo_map(char** topo_map, int member_count)
{
int i;
for (i = 0; i < member_count; ++i) {
if (topo_map[i] == NULL) {
break;
}
ucs_free(topo_map[i]);
}
ucs_free(topo_map);

return;
}

static char** ucg_group_dup_topo_map(char** topo_map, int member_count)
{
int malloc_size = sizeof(char*) * member_count;
char** dup_topo_map = ucs_calloc(1, malloc_size, "topo map");
if (dup_topo_map == NULL) {
return NULL;
}

int i;
for (i = 0; i < member_count; ++i) {
char* one_row = ucs_malloc(malloc_size, "topo map one row");
if (one_row == NULL) {
goto err_free_topo_map;
}
memcpy(one_row, topo_map[i], malloc_size);
dup_topo_map[i] = one_row;
}

return dup_topo_map;
err_free_topo_map:
ucg_group_free_topo_map(dup_topo_map, i);
return NULL;
}

static void* ucg_dup_one_dim_array(const void* array,
int member_size,
int member_count,
Expand Down Expand Up @@ -152,15 +113,6 @@ static ucs_status_t ucg_group_apply_params(ucg_group_h group_p,
UCG_CHECK_REQUIRED_FIELD(field_mask, UCG_GROUP_PARAM_FIELD_MEMBER_COUNT, err);
group_p->params.member_count = params->member_count;

if (field_mask & UCG_GROUP_PARAM_FIELD_TOPO_MAP) {
group_p->params.topo_map = ucg_group_dup_topo_map(params->topo_map,
params->member_count);
if (group_p->params.topo_map == NULL) {
status = UCS_ERR_NO_MEMORY;
goto err;
}
}

UCG_CHECK_REQUIRED_FIELD(field_mask, UCG_GROUP_PARAM_FIELD_DISTANCE, err_free_topo_map);
group_p->params.distance = ucg_group_dup_distance(params->distance, params->member_count);
if (group_p->params.distance == NULL) {
Expand Down Expand Up @@ -192,8 +144,6 @@ static ucs_status_t ucg_group_apply_params(ucg_group_h group_p,
ucs_free(group_p->params.distance);
group_p->params.distance = NULL;
err_free_topo_map:
ucg_group_free_topo_map(group_p->params.topo_map, group_p->params.member_count);
group_p->params.topo_map = NULL;
err:
return status;
}
Expand All @@ -220,11 +170,6 @@ static void ucg_group_release_params(ucg_group_h group_p)
{
ucg_group_params_t *params = &group_p->params;

if (params->topo_map != NULL) {
ucg_group_free_topo_map(params->topo_map, params->member_count);
params->topo_map = NULL;
}

if (params->distance != NULL) {
ucs_free(params->distance);
params->distance = NULL;
Expand Down
82 changes: 5 additions & 77 deletions builtin/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,88 +708,16 @@ enum choose_ops_mask ucg_builtin_plan_choose_ops(ucg_builtin_config_t *config,
return result;
}

void ucg_builtin_check_continuous_number_by_sort(ucg_group_member_index_t *array,
unsigned array_len,
unsigned *discont_flag)
{
ucg_group_member_index_t member_idx;
unsigned idx, idx2;
/* bubble sort */
for (idx = 0; idx < array_len - 1; idx++) {
for (idx2 = 0; idx2 < array_len - 1 - idx; idx2++) {
if (array[idx2] > array[idx2 + 1]) {
member_idx = array[idx2 + 1];
array[idx2 + 1] = array[idx2];
array[idx2] = member_idx;
}
}
}
/* discontinous or not */
for (idx = 0; idx < array_len - 1; idx++) {
if (array[idx + 1] - array[idx] != 1) {
*discont_flag = 1;
break;
}
}
}

static void ucg_builtin_prepare_rank_same_unit(const ucg_group_params_t *group_params,
enum ucg_group_member_distance domain_distance,
ucg_group_member_index_t *rank_same_unit)
{
unsigned idx, member_idx;
enum ucg_group_member_distance next_distance;
for (idx = 0, member_idx = 0; member_idx < group_params->member_count; member_idx++) {
next_distance = group_params->distance[member_idx];
if (ucs_likely(next_distance <= domain_distance)) {
rank_same_unit[idx++] = member_idx;
}
}
}

ucs_status_t ucg_builtin_check_continuous_number_no_topo_map(const ucg_group_params_t *group_params,
enum ucg_group_member_distance domain_distance,
unsigned *discont_flag)
{
unsigned ppx = ucg_builtin_calculate_ppx(group_params, domain_distance);

/* store rank number in same unit */
size_t alloc_size = ppx * sizeof(ucg_group_member_index_t);
ucg_group_member_index_t *rank_same_unit = (ucg_group_member_index_t*)UCG_ALLOC_CHECK(alloc_size, "rank number");
memset(rank_same_unit, 0, alloc_size);
ucg_builtin_prepare_rank_same_unit(group_params, domain_distance, rank_same_unit);

ucg_builtin_check_continuous_number_by_sort(rank_same_unit, ppx, discont_flag);
ucg_builtin_free((void **)&rank_same_unit);
return UCS_OK;
}

ucs_status_t ucg_builtin_check_continuous_number(const ucg_group_params_t *group_params,
enum ucg_group_member_distance domain_distance,
unsigned *discont_flag)
{
if (group_params->topo_map == NULL) {
return ucg_builtin_check_continuous_number_no_topo_map(group_params, domain_distance, discont_flag);
}

char domain_distance_ch = (char)domain_distance;
/* Check the topo distance in each line and find all ranks in the same node
Make sure the ranks in the same node is continuous. */
for (unsigned i = 0; i < group_params->member_count; i++) {
int last_same_unit_rank = -1;
for (unsigned j = 0; j < group_params->member_count; j++) {
if (group_params->topo_map[i][j] > domain_distance_ch) {
continue;
}

if (last_same_unit_rank != -1 && j - last_same_unit_rank != 1) {
*discont_flag = 1;
return UCS_OK;
}
last_same_unit_rank = j;
}
if (domain_distance == UCG_GROUP_MEMBER_DISTANCE_SOCKET) {
*discont_flag = !group_params->topo_args.rank_continuous_in_sock;
} else {
*discont_flag = !group_params->topo_args.rank_continuous_in_node;
}
*discont_flag = 0;

return UCS_OK;
}

Expand Down