Skip to content

Commit 45b618a

Browse files
authored
Merge pull request #1264 from nileshnegi/rocm-6.2-revert/consistent-channel-shuffling
[ROCm 6.2.0] Reverts to ensure consistent channel shuffling for MI300X multi-node
2 parents 43b9844 + 816b391 commit 45b618a

File tree

2 files changed

+5
-166
lines changed

2 files changed

+5
-166
lines changed

src/graph/connect.cc

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -624,29 +624,11 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa
624624
NCCLCHECK(connectRings(comm, ringRecv, ringSend, ringPrev, ringNext));
625625
NCCLCHECK(connectTrees(comm, treeToParent, treeToChild0, treeToChild1, treePatterns));
626626

627-
// Define channels for non-gfx94 GPU architectures
628-
int maxChannels = 2*CHANNEL_LIMIT;
629-
int multiNodeNchannels = maxChannels;
630-
631-
// Define channels for gfx94 GPU architectures
632-
if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94")) {
633-
// Only use full MAXCHANNELS for gfx94x
634-
maxChannels = MAXCHANNELS;
635-
636-
// Define channels=64 for gfx94 multi-node systems
637-
multiNodeNchannels = 64;
638-
639-
// Check if NCCL_IB_GID_INDEX=3 -- needed for RoCE systems
640-
const char* ncclIbGidIndex = ncclGetEnv("NCCL_IB_GID_INDEX");
641-
int gid_index = 0;
642-
if (ncclIbGidIndex) gid_index = atoi(ncclIbGidIndex);
643-
644-
// Limit channels=48 for RoCE gfx94 multi-node systems
645-
multiNodeNchannels = gid_index == 3 ? 48 : multiNodeNchannels;
646-
}
627+
// Only use full MAXCHANNELS for gfx94x
628+
int maxChannels = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? MAXCHANNELS : 2*CHANNEL_LIMIT;
647629

648630
if (graphs[NCCL_ALGO_RING]->nIntraChannels > 0 || comm->nNodes > 1) {
649-
maxChannels = std::min(multiNodeNchannels, maxChannels);
631+
maxChannels = std::min(64, maxChannels);
650632
}
651633

652634
// Duplicate ringPrev/ringNext for ncclBuildRing
@@ -692,7 +674,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa
692674

693675
int minNchannels = ncclMinNchannels();
694676
if (comm->nNodes > 1) {
695-
minNchannels = std::min(multiNodeNchannels, minNchannels);
677+
minNchannels = std::min(64, minNchannels);
696678
}
697679

698680
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled())) {

src/graph/rome_models.cc

Lines changed: 1 addition & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ THE SOFTWARE.
2828
#include <algorithm>
2929
#include <string.h>
3030
#include "rome_models.h"
31-
#include "param.h"
3231

3332
struct rcclRomeModel {
3433
int nGpus;
@@ -813,7 +812,7 @@ static struct rcclRomeModel rome_model_81 = {
813812
"N7 7 3 2 6 0 4 1 5 N5|"
814813
"N1 1 0 2 4 3 5 7 6 N6|",
815814

816-
.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=0",
815+
.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1",
817816
};
818817

819818
static struct rcclRomeModel rome_model_84 = {
@@ -842,114 +841,6 @@ static struct rcclRomeModel rome_model_85 = {
842841
.options = "tuning=2",
843842
};
844843

845-
static struct rcclRomeModel rome_model_86 = {
846-
.nGpus = 8, .nCpus = 2, .nNics = 8, .nLinks = 7,
847-
.gpuIds = { 0xc000, 0x22000, 0x38000, 0x5c000, 0x9f000, 0xaf000, 0xbf000, 0xdf000, },
848-
.nicIds = { 0x7000, 0x1d000, 0x33000, 0x57000, 0x9a000, 0xaa000, 0xba000, 0xda000, },
849-
.gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
850-
.nicNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
851-
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1,
852-
1, 0, 1, 1, 1, 1, 1, 1,
853-
1, 1, 0, 1, 1, 1, 1, 1,
854-
1, 1, 1, 0, 1, 1, 1, 1,
855-
1, 1, 1, 1, 0, 1, 1, 1,
856-
1, 1, 1, 1, 1, 0, 1, 1,
857-
1, 1, 1, 1, 1, 1, 0, 1,
858-
1, 1, 1, 1, 1, 1, 1, 0, },
859-
.gdrLevel = {PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
860-
PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
861-
PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
862-
PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
863-
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB,
864-
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB,
865-
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB,
866-
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, },
867-
.pattern = "4444",
868-
.ringBase = "N0 0 1 2 3 4 5 6 7 N7|"
869-
"N1 1 0 2 4 3 5 7 6 N6|"
870-
"N2 2 5 0 3 6 1 7 4 N4|"
871-
"N3 3 7 0 4 2 1 6 5 N5|"
872-
"N4 4 6 2 7 3 0 5 1 N1|"
873-
"N5 5 4 7 1 3 2 6 0 N0|"
874-
"N6 6 3 1 4 0 7 5 2 N2|"
875-
"N7 7 2 0 6 4 1 5 3 N3|"
876-
877-
"N0 0 1 2 3 4 5 6 7 N7|"
878-
"N1 1 0 2 4 3 5 7 6 N6|"
879-
"N2 2 5 0 3 6 1 7 4 N4|"
880-
"N3 3 7 0 4 2 1 6 5 N5|"
881-
"N4 4 6 2 7 3 0 5 1 N1|"
882-
"N5 5 4 7 1 3 2 6 0 N0|"
883-
"N6 6 3 1 4 0 7 5 2 N2|"
884-
"N7 7 2 0 6 4 1 5 3 N3|"
885-
886-
"N0 0 1 2 3 4 5 6 7 N7|"
887-
"N1 1 0 2 4 3 5 7 6 N6|"
888-
"N2 2 5 0 3 6 1 7 4 N4|"
889-
"N3 3 7 0 4 2 1 6 5 N5|"
890-
"N4 4 6 2 7 3 0 5 1 N1|"
891-
"N5 5 4 7 1 3 2 6 0 N0|"
892-
"N6 6 3 1 4 0 7 5 2 N2|"
893-
"N7 7 2 0 6 4 1 5 3 N3",
894-
895-
.ringTail2 = "N7 7 4 1 3 2 0 6 5 N5|"
896-
"N6 6 3 0 7 5 1 4 2 N2|"
897-
"N4 4 6 2 1 7 0 5 3 N3|"
898-
"N5 5 2 7 3 1 6 0 4 N4|"
899-
"N1 1 0 2 4 3 5 7 6 N6|"
900-
"N0 0 1 2 3 4 5 6 7 N7|"
901-
"N2 2 5 0 3 6 4 7 1 N1|"
902-
"N3 3 7 2 6 1 5 4 0 N0|"
903-
904-
"N7 7 4 1 3 2 0 6 5 N5|"
905-
"N6 6 3 0 7 5 1 4 2 N2|"
906-
"N4 4 6 2 1 7 0 5 3 N3|"
907-
"N5 5 2 7 3 1 6 0 4 N4|"
908-
"N1 1 0 2 4 3 5 7 6 N6|"
909-
"N0 0 1 2 3 4 5 6 7 N7|"
910-
"N2 2 5 0 3 6 4 7 1 N1|"
911-
"N3 3 7 2 6 1 5 4 0 N0|"
912-
913-
"N7 7 4 1 3 2 0 6 5 N5|"
914-
"N6 6 3 0 7 5 1 4 2 N2|"
915-
"N4 4 6 2 1 7 0 5 3 N3|"
916-
"N5 5 2 7 3 1 6 0 4 N4|"
917-
"N1 1 0 2 4 3 5 7 6 N6|"
918-
"N0 0 1 2 3 4 5 6 7 N7|"
919-
"N2 2 5 0 3 6 4 7 1 N1|"
920-
"N3 3 7 2 6 1 5 4 0 N0",
921-
922-
923-
.ringTail1 = "N5 5 4 2 7 1 6 3 0 N0|"
924-
"N2 2 5 0 3 7 4 6 1 N1|"
925-
"N3 3 6 4 0 5 1 7 2 N2|"
926-
"N4 4 7 0 6 5 2 1 3 N3|"
927-
"N6 6 2 0 7 5 3 1 4 N4|"
928-
"N7 7 3 2 6 0 4 1 5 N5|"
929-
"N1 1 0 2 4 3 5 7 6 N6|"
930-
"N0 0 1 2 3 4 5 6 7 N7|"
931-
932-
"N5 5 4 2 7 1 6 3 0 N0|"
933-
"N2 2 5 0 3 7 4 6 1 N1|"
934-
"N3 3 6 4 0 5 1 7 2 N2|"
935-
"N4 4 7 0 6 5 2 1 3 N3|"
936-
"N6 6 2 0 7 5 3 1 4 N4|"
937-
"N7 7 3 2 6 0 4 1 5 N5|"
938-
"N1 1 0 2 4 3 5 7 6 N6|"
939-
"N0 0 1 2 3 4 5 6 7 N7|"
940-
941-
"N5 5 4 2 7 1 6 3 0 N0|"
942-
"N2 2 5 0 3 7 4 6 1 N1|"
943-
"N3 3 6 4 0 5 1 7 2 N2|"
944-
"N4 4 7 0 6 5 2 1 3 N3|"
945-
"N6 6 2 0 7 5 3 1 4 N4|"
946-
"N7 7 3 2 6 0 4 1 5 N5|"
947-
"N1 1 0 2 4 3 5 7 6 N6|"
948-
"N0 0 1 2 3 4 5 6 7 N7",
949-
950-
.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=1",
951-
};
952-
953844

954845
static struct rcclRomeModel romeTopoModels[] = {
955846
rome_model_22, /* 0 */
@@ -995,7 +886,6 @@ static struct rcclRomeModel romeTopoModels[] = {
995886
rome_model_81, /* 40 */
996887
rome_model_84, /* 41 */
997888
rome_model_85, /* 42 */
998-
rome_model_86, /* 43 */
999889
};
1000890

1001891
/* Parse user defined rings. Format is like :
@@ -1283,27 +1173,6 @@ static bool checkOption(const char *options, const char *name) {
12831173
return false;
12841174
}
12851175

1286-
static int checkOptionValue(const char *options, const char *name) {
1287-
if (strcmp(options, "")) {
1288-
char *str_temp = (char *)malloc(strlen(options) + 1);
1289-
strcpy(str_temp, options);
1290-
char* tokens[MAX_OPT_TOKENS];
1291-
int numTokens = 0;
1292-
char* state;
1293-
tokens[numTokens] = strtok_r(str_temp, "=, ", &state);
1294-
numTokens++;
1295-
while (tokens[numTokens-1] != NULL && numTokens < MAX_OPT_TOKENS)
1296-
tokens[numTokens++] = strtok_r(NULL, "=, ", &state);
1297-
for (int i = 0; i < numTokens/2; i++) {
1298-
if (strcmp(tokens[i*2], name) == 0) {
1299-
return atol(tokens[i*2+1]);
1300-
}
1301-
}
1302-
free(str_temp);
1303-
}
1304-
return -2;
1305-
}
1306-
13071176
ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
13081177
static const char *ringBase = "0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4|0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3";
13091178
int id[8], dist[8];
@@ -1667,24 +1536,12 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
16671536
}
16681537
if (i < romeTopo.nGpus) match_nbio = false;
16691538

1670-
// check if NCCL_IB_GID_INDEX=3 -- needed for RoCE systems
1671-
const char* ncclIbGidIndex = ncclGetEnv("NCCL_IB_GID_INDEX");
1672-
int gid_index = 0;
1673-
if (ncclIbGidIndex) gid_index = atoi(ncclIbGidIndex);
1674-
int isRoCE = gid_index == 3 ? 1 : 0;
1675-
16761539
for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) {
16771540
bool ignore_cpu = checkOption(romeTopoModels[i].options, "noCpuCheck");
16781541
if (!ignore_cpu && (arch != NCCL_TOPO_CPU_ARCH_X86 || vendor != NCCL_TOPO_CPU_VENDOR_AMD || model != NCCL_TOPO_CPU_TYPE_ROME))
16791542
continue;
1680-
16811543
bool ignore_numa = checkOption(romeTopoModels[i].options, "disableNumaMatching");
16821544
if (!ignore_numa && romeTopo.nCpus != romeTopoModels[i].nCpus) continue;
1683-
1684-
// check if "isRoCE=1" is defined in model struct options
1685-
int optionsIsRoCE = checkOptionValue(romeTopoModels[i].options, "isRoCE");
1686-
if (optionsIsRoCE != -2 && optionsIsRoCE != isRoCE) continue;
1687-
16881545
if (romeTopo.nGpus != romeTopoModels[i].nGpus ||
16891546
romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue;
16901547
if (!ignore_numa && strcmp(romeTopoModels[i].pattern, pattern)) continue;

0 commit comments

Comments
 (0)