@@ -16,7 +16,6 @@ namespace RcclUnitTesting
16
16
std::vector<int > const numElements = {1048576 , 53327 , 1024 , 0 };
17
17
bool const inPlace = false ;
18
18
bool const useManagedMem = false ;
19
- int const groupCallId = 0 ;
20
19
21
20
OptionalColArgs options;
22
21
bool isCorrect = true ;
@@ -28,7 +27,10 @@ namespace RcclUnitTesting
28
27
int ranksPerGpu = rpg == 0 ? 1 : testBed.ev .maxRanksPerGpu ;
29
28
int totalRanks = numGpus * ranksPerGpu;
30
29
int const numProcesses = isMultiProcess ? numGpus : 1 ;
31
- testBed.InitComms (TestBed::GetDeviceIdsList (numProcesses, numGpus, ranksPerGpu), 1 );
30
+ testBed.InitComms (TestBed::GetDeviceIdsList (numProcesses, numGpus, ranksPerGpu),
31
+ {1 ,2 }, // two group, second group sendrecv to self, has 2 coll
32
+ testBed.GetNumStreamsPerGroup (1 ,2 ),
33
+ 2 );
32
34
33
35
for (int dataIdx = 0 ; dataIdx < dataTypes.size () && isCorrect; ++dataIdx)
34
36
for (int numIdx = 0 ; numIdx < numElements.size () && isCorrect; ++numIdx)
@@ -37,6 +39,8 @@ namespace RcclUnitTesting
37
39
for (int recvRank = 0 ; recvRank < totalRanks; ++recvRank)
38
40
{
39
41
options.root = recvRank;
42
+ int groupCallId = sendRank == recvRank; // self sendrecv group has two coll
43
+ int recvId = sendRank == recvRank; // where recv will be second coll
40
44
testBed.SetCollectiveArgs (ncclCollSend,
41
45
dataTypes[dataIdx],
42
46
numElements[numIdx],
@@ -47,36 +51,46 @@ namespace RcclUnitTesting
47
51
sendRank);
48
52
if (recvRank == 0 )
49
53
{
50
- testBed.AllocateMem (inPlace, useManagedMem, groupCallId, 0 , sendRank);
51
- testBed.PrepareData (groupCallId, 0 , sendRank);
52
- }
53
- if (recvRank != sendRank)
54
- {
55
- if (testBed.ev .showNames ) // Show test names
56
- INFO (" %s Datatype: %s SendReceive test Rank %d -> Rank %d for %d Elements\n " ,
57
- isMultiProcess ? " MP" : " SP" ,
58
- ncclDataTypeNames[dataTypes[dataIdx]],
59
- sendRank,
60
- recvRank,
61
- numElements[numIdx]);
62
-
63
- options.root = sendRank;
64
- testBed.SetCollectiveArgs (ncclCollRecv,
54
+ // set up the collArg slot to make sure AllocateMem is called once and correctly
55
+ testBed.SetCollectiveArgs (ncclCollSend,
65
56
dataTypes[dataIdx],
66
57
numElements[numIdx],
67
58
numElements[numIdx],
68
59
options,
69
60
0 ,
70
- groupCallId,
71
- recvRank);
72
- testBed.AllocateMem (inPlace, useManagedMem, groupCallId, 0 , recvRank);
73
- testBed.PrepareData (groupCallId, 0 , recvRank);
74
- testBed.ExecuteCollectives ({sendRank, recvRank});
75
- testBed.ValidateResults (isCorrect, groupCallId, 0 , recvRank);
76
- testBed.DeallocateMem (groupCallId, 0 , recvRank);
61
+ !groupCallId,
62
+ sendRank);
63
+ testBed.AllocateMem (inPlace, useManagedMem, 0 , 0 , sendRank);
64
+ testBed.PrepareData (0 , 0 , sendRank);
65
+ testBed.AllocateMem (inPlace, useManagedMem, 1 , 0 , sendRank);
66
+ testBed.PrepareData (1 , 0 , sendRank);
77
67
}
68
+
69
+ if (testBed.ev .showNames ) // Show test names
70
+ INFO (" %s Datatype: %s SendReceive test Rank %d -> Rank %d for %d Elements\n " ,
71
+ isMultiProcess ? " MP" : " SP" ,
72
+ ncclDataTypeNames[dataTypes[dataIdx]],
73
+ sendRank,
74
+ recvRank,
75
+ numElements[numIdx]);
76
+ options.root = sendRank;
77
+
78
+ testBed.SetCollectiveArgs (ncclCollRecv,
79
+ dataTypes[dataIdx],
80
+ numElements[numIdx],
81
+ numElements[numIdx],
82
+ options,
83
+ recvId,
84
+ groupCallId,
85
+ recvRank);
86
+ testBed.AllocateMem (inPlace, useManagedMem, groupCallId, recvId, recvRank);
87
+ testBed.PrepareData (groupCallId, recvId, recvRank);
88
+ testBed.ExecuteCollectives ({sendRank, recvRank}, groupCallId);
89
+ testBed.ValidateResults (isCorrect, groupCallId, recvId, recvRank);
90
+ testBed.DeallocateMem (groupCallId, recvId, recvRank);
78
91
}
79
- testBed.DeallocateMem (groupCallId, 0 , sendRank);
92
+ testBed.DeallocateMem (0 , 0 , sendRank);
93
+ testBed.DeallocateMem (1 , 0 , sendRank);
80
94
}
81
95
testBed.DestroyComms ();
82
96
}
@@ -94,7 +108,6 @@ namespace RcclUnitTesting
94
108
bool const inPlace = false ;
95
109
bool const useManagedMem = false ;
96
110
bool const userRegistered = true ;
97
- int const groupCallId = 0 ;
98
111
99
112
OptionalColArgs options;
100
113
bool isCorrect = true ;
@@ -106,7 +119,10 @@ namespace RcclUnitTesting
106
119
int ranksPerGpu = rpg == 0 ? 1 : testBed.ev .maxRanksPerGpu ;
107
120
int totalRanks = numGpus * ranksPerGpu;
108
121
int const numProcesses = isMultiProcess ? numGpus : 1 ;
109
- testBed.InitComms (TestBed::GetDeviceIdsList (numProcesses, numGpus, ranksPerGpu), 1 );
122
+ testBed.InitComms (TestBed::GetDeviceIdsList (numProcesses, numGpus, ranksPerGpu),
123
+ {1 ,2 }, // two group, second group sendrecv to self, has 2 coll
124
+ testBed.GetNumStreamsPerGroup (1 ,2 ),
125
+ 2 );
110
126
111
127
for (int dataIdx = 0 ; dataIdx < dataTypes.size () && isCorrect; ++dataIdx)
112
128
for (int numIdx = 0 ; numIdx < numElements.size () && isCorrect; ++numIdx)
@@ -115,6 +131,8 @@ namespace RcclUnitTesting
115
131
for (int recvRank = 0 ; recvRank < totalRanks; ++recvRank)
116
132
{
117
133
options.root = recvRank;
134
+ int groupCallId = sendRank == recvRank;
135
+ int recvId = sendRank == recvRank;
118
136
testBed.SetCollectiveArgs (ncclCollSend,
119
137
dataTypes[dataIdx],
120
138
numElements[numIdx],
@@ -125,36 +143,45 @@ namespace RcclUnitTesting
125
143
sendRank);
126
144
if (recvRank == 0 )
127
145
{
128
- testBed.AllocateMem (inPlace, useManagedMem, groupCallId, 0 , sendRank, userRegistered);
129
- testBed.PrepareData (groupCallId, 0 , sendRank);
130
- }
131
- if (recvRank != sendRank)
132
- {
133
- if (testBed.ev .showNames ) // Show test names
134
- INFO (" %s Datatype: %s SendReceive test Rank %d -> Rank %d for %d Elements\n " ,
135
- isMultiProcess ? " MP" : " SP" ,
136
- ncclDataTypeNames[dataTypes[dataIdx]],
137
- sendRank,
138
- recvRank,
139
- numElements[numIdx]);
140
-
141
- options.root = sendRank;
142
- testBed.SetCollectiveArgs (ncclCollRecv,
146
+ testBed.SetCollectiveArgs (ncclCollSend,
143
147
dataTypes[dataIdx],
144
148
numElements[numIdx],
145
149
numElements[numIdx],
146
150
options,
147
151
0 ,
148
- groupCallId,
149
- recvRank);
150
- testBed.AllocateMem (inPlace, useManagedMem, groupCallId, 0 , recvRank, userRegistered);
151
- testBed.PrepareData (groupCallId, 0 , recvRank);
152
- testBed.ExecuteCollectives ({sendRank, recvRank});
153
- testBed.ValidateResults (isCorrect, groupCallId, 0 , recvRank);
154
- testBed.DeallocateMem (groupCallId, 0 , recvRank);
152
+ !groupCallId,
153
+ sendRank);
154
+ testBed.AllocateMem (inPlace, useManagedMem, 0 , 0 , sendRank, userRegistered);
155
+ testBed.PrepareData (0 , 0 , sendRank);
156
+ testBed.AllocateMem (inPlace, useManagedMem, 1 , 0 , sendRank, userRegistered);
157
+ testBed.PrepareData (1 , 0 , sendRank);
155
158
}
159
+
160
+ if (testBed.ev .showNames ) // Show test names
161
+ INFO (" %s Datatype: %s SendReceive test Rank %d -> Rank %d for %d Elements\n " ,
162
+ isMultiProcess ? " MP" : " SP" ,
163
+ ncclDataTypeNames[dataTypes[dataIdx]],
164
+ sendRank,
165
+ recvRank,
166
+ numElements[numIdx]);
167
+
168
+ options.root = sendRank;
169
+ testBed.SetCollectiveArgs (ncclCollRecv,
170
+ dataTypes[dataIdx],
171
+ numElements[numIdx],
172
+ numElements[numIdx],
173
+ options,
174
+ recvId,
175
+ groupCallId,
176
+ recvRank);
177
+ testBed.AllocateMem (inPlace, useManagedMem, groupCallId, recvId, recvRank, userRegistered);
178
+ testBed.PrepareData (groupCallId, recvId, recvRank);
179
+ testBed.ExecuteCollectives ({sendRank, recvRank}, groupCallId);
180
+ testBed.ValidateResults (isCorrect, groupCallId, recvId, recvRank);
181
+ testBed.DeallocateMem (groupCallId, recvId, recvRank);
156
182
}
157
- testBed.DeallocateMem (groupCallId, 0 , sendRank);
183
+ testBed.DeallocateMem (0 , 0 , sendRank);
184
+ testBed.DeallocateMem (1 , 0 , sendRank);
158
185
}
159
186
testBed.DestroyComms ();
160
187
}
0 commit comments