@@ -28,7 +28,6 @@ THE SOFTWARE.
28
28
#include < algorithm>
29
29
#include < string.h>
30
30
#include " rome_models.h"
31
- #include " param.h"
32
31
33
32
struct rcclRomeModel {
34
33
int nGpus;
@@ -813,7 +812,7 @@ static struct rcclRomeModel rome_model_81 = {
813
812
" N7 7 3 2 6 0 4 1 5 N5|"
814
813
" N1 1 0 2 4 3 5 7 6 N6|" ,
815
814
816
- .options = " noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=0 " ,
815
+ .options = " noCpuCheck=1,tuning=5,disableNumaMatching=1" ,
817
816
};
818
817
819
818
static struct rcclRomeModel rome_model_84 = {
@@ -842,114 +841,6 @@ static struct rcclRomeModel rome_model_85 = {
842
841
.options = " tuning=2" ,
843
842
};
844
843
845
- static struct rcclRomeModel rome_model_86 = {
846
- .nGpus = 8 , .nCpus = 2 , .nNics = 8 , .nLinks = 7 ,
847
- .gpuIds = { 0xc000 , 0x22000 , 0x38000 , 0x5c000 , 0x9f000 , 0xaf000 , 0xbf000 , 0xdf000 , },
848
- .nicIds = { 0x7000 , 0x1d000 , 0x33000 , 0x57000 , 0x9a000 , 0xaa000 , 0xba000 , 0xda000 , },
849
- .gpuNuma = { 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , },
850
- .nicNuma = { 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , },
851
- .connMatrix = { 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
852
- 1 , 0 , 1 , 1 , 1 , 1 , 1 , 1 ,
853
- 1 , 1 , 0 , 1 , 1 , 1 , 1 , 1 ,
854
- 1 , 1 , 1 , 0 , 1 , 1 , 1 , 1 ,
855
- 1 , 1 , 1 , 1 , 0 , 1 , 1 , 1 ,
856
- 1 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ,
857
- 1 , 1 , 1 , 1 , 1 , 1 , 0 , 1 ,
858
- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , },
859
- .gdrLevel = {PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
860
- PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
861
- PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
862
- PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
863
- PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB,
864
- PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB,
865
- PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB,
866
- PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, },
867
- .pattern = " 4444" ,
868
- .ringBase = " N0 0 1 2 3 4 5 6 7 N7|"
869
- " N1 1 0 2 4 3 5 7 6 N6|"
870
- " N2 2 5 0 3 6 1 7 4 N4|"
871
- " N3 3 7 0 4 2 1 6 5 N5|"
872
- " N4 4 6 2 7 3 0 5 1 N1|"
873
- " N5 5 4 7 1 3 2 6 0 N0|"
874
- " N6 6 3 1 4 0 7 5 2 N2|"
875
- " N7 7 2 0 6 4 1 5 3 N3|"
876
-
877
- " N0 0 1 2 3 4 5 6 7 N7|"
878
- " N1 1 0 2 4 3 5 7 6 N6|"
879
- " N2 2 5 0 3 6 1 7 4 N4|"
880
- " N3 3 7 0 4 2 1 6 5 N5|"
881
- " N4 4 6 2 7 3 0 5 1 N1|"
882
- " N5 5 4 7 1 3 2 6 0 N0|"
883
- " N6 6 3 1 4 0 7 5 2 N2|"
884
- " N7 7 2 0 6 4 1 5 3 N3|"
885
-
886
- " N0 0 1 2 3 4 5 6 7 N7|"
887
- " N1 1 0 2 4 3 5 7 6 N6|"
888
- " N2 2 5 0 3 6 1 7 4 N4|"
889
- " N3 3 7 0 4 2 1 6 5 N5|"
890
- " N4 4 6 2 7 3 0 5 1 N1|"
891
- " N5 5 4 7 1 3 2 6 0 N0|"
892
- " N6 6 3 1 4 0 7 5 2 N2|"
893
- " N7 7 2 0 6 4 1 5 3 N3" ,
894
-
895
- .ringTail2 = " N7 7 4 1 3 2 0 6 5 N5|"
896
- " N6 6 3 0 7 5 1 4 2 N2|"
897
- " N4 4 6 2 1 7 0 5 3 N3|"
898
- " N5 5 2 7 3 1 6 0 4 N4|"
899
- " N1 1 0 2 4 3 5 7 6 N6|"
900
- " N0 0 1 2 3 4 5 6 7 N7|"
901
- " N2 2 5 0 3 6 4 7 1 N1|"
902
- " N3 3 7 2 6 1 5 4 0 N0|"
903
-
904
- " N7 7 4 1 3 2 0 6 5 N5|"
905
- " N6 6 3 0 7 5 1 4 2 N2|"
906
- " N4 4 6 2 1 7 0 5 3 N3|"
907
- " N5 5 2 7 3 1 6 0 4 N4|"
908
- " N1 1 0 2 4 3 5 7 6 N6|"
909
- " N0 0 1 2 3 4 5 6 7 N7|"
910
- " N2 2 5 0 3 6 4 7 1 N1|"
911
- " N3 3 7 2 6 1 5 4 0 N0|"
912
-
913
- " N7 7 4 1 3 2 0 6 5 N5|"
914
- " N6 6 3 0 7 5 1 4 2 N2|"
915
- " N4 4 6 2 1 7 0 5 3 N3|"
916
- " N5 5 2 7 3 1 6 0 4 N4|"
917
- " N1 1 0 2 4 3 5 7 6 N6|"
918
- " N0 0 1 2 3 4 5 6 7 N7|"
919
- " N2 2 5 0 3 6 4 7 1 N1|"
920
- " N3 3 7 2 6 1 5 4 0 N0" ,
921
-
922
-
923
- .ringTail1 = " N5 5 4 2 7 1 6 3 0 N0|"
924
- " N2 2 5 0 3 7 4 6 1 N1|"
925
- " N3 3 6 4 0 5 1 7 2 N2|"
926
- " N4 4 7 0 6 5 2 1 3 N3|"
927
- " N6 6 2 0 7 5 3 1 4 N4|"
928
- " N7 7 3 2 6 0 4 1 5 N5|"
929
- " N1 1 0 2 4 3 5 7 6 N6|"
930
- " N0 0 1 2 3 4 5 6 7 N7|"
931
-
932
- " N5 5 4 2 7 1 6 3 0 N0|"
933
- " N2 2 5 0 3 7 4 6 1 N1|"
934
- " N3 3 6 4 0 5 1 7 2 N2|"
935
- " N4 4 7 0 6 5 2 1 3 N3|"
936
- " N6 6 2 0 7 5 3 1 4 N4|"
937
- " N7 7 3 2 6 0 4 1 5 N5|"
938
- " N1 1 0 2 4 3 5 7 6 N6|"
939
- " N0 0 1 2 3 4 5 6 7 N7|"
940
-
941
- " N5 5 4 2 7 1 6 3 0 N0|"
942
- " N2 2 5 0 3 7 4 6 1 N1|"
943
- " N3 3 6 4 0 5 1 7 2 N2|"
944
- " N4 4 7 0 6 5 2 1 3 N3|"
945
- " N6 6 2 0 7 5 3 1 4 N4|"
946
- " N7 7 3 2 6 0 4 1 5 N5|"
947
- " N1 1 0 2 4 3 5 7 6 N6|"
948
- " N0 0 1 2 3 4 5 6 7 N7" ,
949
-
950
- .options = " noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=1" ,
951
- };
952
-
953
844
954
845
static struct rcclRomeModel romeTopoModels[] = {
955
846
rome_model_22, /* 0 */
@@ -995,7 +886,6 @@ static struct rcclRomeModel romeTopoModels[] = {
995
886
rome_model_81, /* 40 */
996
887
rome_model_84, /* 41 */
997
888
rome_model_85, /* 42 */
998
- rome_model_86, /* 43 */
999
889
};
1000
890
1001
891
/* Parse user defined rings. Format is like :
@@ -1283,27 +1173,6 @@ static bool checkOption(const char *options, const char *name) {
1283
1173
return false ;
1284
1174
}
1285
1175
1286
- static int checkOptionValue (const char *options, const char *name) {
1287
- if (strcmp (options, " " )) {
1288
- char *str_temp = (char *)malloc (strlen (options) + 1 );
1289
- strcpy (str_temp, options);
1290
- char * tokens[MAX_OPT_TOKENS];
1291
- int numTokens = 0 ;
1292
- char * state;
1293
- tokens[numTokens] = strtok_r (str_temp, " =, " , &state);
1294
- numTokens++;
1295
- while (tokens[numTokens-1 ] != NULL && numTokens < MAX_OPT_TOKENS)
1296
- tokens[numTokens++] = strtok_r (NULL , " =, " , &state);
1297
- for (int i = 0 ; i < numTokens/2 ; i++) {
1298
- if (strcmp (tokens[i*2 ], name) == 0 ) {
1299
- return atol (tokens[i*2 +1 ]);
1300
- }
1301
- }
1302
- free (str_temp);
1303
- }
1304
- return -2 ;
1305
- }
1306
-
1307
1176
ncclResult_t parseChordalRing (struct ncclTopoSystem * system, struct ncclTopoGraph * graph) {
1308
1177
static const char *ringBase = " 0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4|0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3" ;
1309
1178
int id[8 ], dist[8 ];
@@ -1667,24 +1536,12 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
1667
1536
}
1668
1537
if (i < romeTopo.nGpus ) match_nbio = false ;
1669
1538
1670
- // check if NCCL_IB_GID_INDEX=3 -- needed for RoCE systems
1671
- const char * ncclIbGidIndex = ncclGetEnv (" NCCL_IB_GID_INDEX" );
1672
- int gid_index = 0 ;
1673
- if (ncclIbGidIndex) gid_index = atoi (ncclIbGidIndex);
1674
- int isRoCE = gid_index == 3 ? 1 : 0 ;
1675
-
1676
1539
for (i = 0 ; i < sizeof (romeTopoModels)/sizeof (romeTopoModels[0 ]); i++) {
1677
1540
bool ignore_cpu = checkOption (romeTopoModels[i].options , " noCpuCheck" );
1678
1541
if (!ignore_cpu && (arch != NCCL_TOPO_CPU_ARCH_X86 || vendor != NCCL_TOPO_CPU_VENDOR_AMD || model != NCCL_TOPO_CPU_TYPE_ROME))
1679
1542
continue ;
1680
-
1681
1543
bool ignore_numa = checkOption (romeTopoModels[i].options , " disableNumaMatching" );
1682
1544
if (!ignore_numa && romeTopo.nCpus != romeTopoModels[i].nCpus ) continue ;
1683
-
1684
- // check if "isRoCE=1" is defined in model struct options
1685
- int optionsIsRoCE = checkOptionValue (romeTopoModels[i].options , " isRoCE" );
1686
- if (optionsIsRoCE != -2 && optionsIsRoCE != isRoCE) continue ;
1687
-
1688
1545
if (romeTopo.nGpus != romeTopoModels[i].nGpus ||
1689
1546
romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks ) continue ;
1690
1547
if (!ignore_numa && strcmp (romeTopoModels[i].pattern , pattern)) continue ;
0 commit comments