@@ -94,16 +94,18 @@ Status RPCClient::Connect(const std::string& rpc_endpoint) {
94
94
Status RPCClient::Connect (const std::string& rpc_endpoint,
95
95
std::string const & username,
96
96
std::string const & password,
97
- const std::string& rdma_endpoint) {
97
+ const std::string& rdma_endpoint,
98
+ std::string src_rdma_ednpoint) {
98
99
return this ->Connect (rpc_endpoint, RootSessionID (), username, password,
99
- rdma_endpoint);
100
+ rdma_endpoint, src_rdma_ednpoint );
100
101
}
101
102
102
103
Status RPCClient::Connect (const std::string& rpc_endpoint,
103
104
const SessionID session_id,
104
105
std::string const & username,
105
106
std::string const & password,
106
- const std::string& rdma_endpoint) {
107
+ const std::string& rdma_endpoint,
108
+ std::string src_rdma_ednpoint) {
107
109
size_t pos = rpc_endpoint.find (" :" );
108
110
std::string host, port;
109
111
if (pos == std::string::npos) {
@@ -125,28 +127,32 @@ Status RPCClient::Connect(const std::string& rpc_endpoint,
125
127
126
128
return this ->Connect (host, static_cast <uint32_t >(std::stoul (port)),
127
129
session_id, username, password, rdma_host,
128
- static_cast <uint32_t >(std::stoul (rdma_port)));
130
+ static_cast <uint32_t >(std::stoul (rdma_port)),
131
+ src_rdma_ednpoint);
129
132
}
130
133
131
134
Status RPCClient::Connect (const std::string& host, uint32_t port,
132
- const std::string& rdma_host, uint32_t rdma_port) {
135
+ const std::string& rdma_host, uint32_t rdma_port,
136
+ std::string src_rdma_ednpoint) {
133
137
return this ->Connect (host, port, RootSessionID (), " " , " " , rdma_host,
134
- rdma_port);
138
+ rdma_port, src_rdma_ednpoint );
135
139
}
136
140
137
141
Status RPCClient::Connect (const std::string& host, uint32_t port,
138
142
std::string const & username,
139
143
std::string const & password,
140
- const std::string& rdma_host, uint32_t rdma_port) {
144
+ const std::string& rdma_host, uint32_t rdma_port,
145
+ std::string src_rdma_ednpoint) {
141
146
return this ->Connect (host, port, RootSessionID (), username, password,
142
- rdma_host, rdma_port);
147
+ rdma_host, rdma_port, src_rdma_ednpoint );
143
148
}
144
149
145
150
Status RPCClient::Connect (const std::string& host, uint32_t port,
146
151
const SessionID session_id,
147
152
std::string const & username,
148
153
std::string const & password,
149
- const std::string& rdma_host, uint32_t rdma_port) {
154
+ const std::string& rdma_host, uint32_t rdma_port,
155
+ std::string src_rdma_ednpoint) {
150
156
std::lock_guard<std::recursive_mutex> guard (client_mutex_);
151
157
std::string rpc_endpoint = host + " :" + std::to_string (port);
152
158
RETURN_ON_ASSERT (!connected_ || rpc_endpoint == rpc_endpoint_);
@@ -183,7 +189,8 @@ Status RPCClient::Connect(const std::string& host, uint32_t port,
183
189
instance_id_ = UnspecifiedInstanceID () - 1 ;
184
190
185
191
if (rdma_host.length () > 0 ) {
186
- Status status = ConnectRDMA (rdma_host, rdma_port);
192
+ src_rdma_endpoint_ = src_rdma_ednpoint;
193
+ Status status = ConnectRDMA (rdma_host, rdma_port, src_rdma_ednpoint);
187
194
if (status.ok ()) {
188
195
rdma_endpoint_ = rdma_host + " :" + std::to_string (rdma_port);
189
196
std::cout << " Connected to RPC server: " << rpc_endpoint
@@ -192,33 +199,38 @@ Status RPCClient::Connect(const std::string& host, uint32_t port,
192
199
} else {
193
200
std::cout << " Connect RDMA server failed! Fall back to RPC mode. Error:"
194
201
<< status.message () << std::endl;
202
+ std::cout << " Failed src_rdma_ednpoint: " << src_rdma_ednpoint
203
+ << std::endl;
195
204
}
196
205
}
197
206
198
207
return Status::OK ();
199
208
}
200
209
201
- Status RPCClient::ConnectRDMA (const std::string& rdma_host,
202
- uint32_t rdma_port ) {
210
+ Status RPCClient::ConnectRDMA (const std::string& rdma_host, uint32_t rdma_port,
211
+ std::string src_rdma_endpoint ) {
203
212
if (this ->rdma_connected_ ) {
204
213
return Status::OK ();
205
214
}
206
215
207
216
RETURN_ON_ERROR (RDMAClientCreator::Create (this ->rdma_client_ , rdma_host,
208
- static_cast <int >(rdma_port)));
217
+ static_cast <int >(rdma_port),
218
+ src_rdma_endpoint));
209
219
210
220
int retry = 0 ;
211
221
do {
212
- if (this ->rdma_client_ ->Connect ().ok ()) {
222
+ Status status = this ->rdma_client_ ->Connect ();
223
+ if (status.ok ()) {
213
224
break ;
214
225
}
215
226
if (retry == 10 ) {
216
227
return Status::Invalid (" Failed to connect to RDMA server." );
217
228
}
218
229
retry++;
219
230
usleep (300 * 1000 );
220
- std::cout << " Connect rdma server failed! retry: " << retry << " times."
221
- << std::endl;
231
+ std::cout << " Connect rdma server failed! Error:" + status.message () +
232
+ " retry: "
233
+ << retry << " times." << std::endl;
222
234
} while (true );
223
235
this ->rdma_connected_ = true ;
224
236
return Status::OK ();
@@ -272,6 +284,9 @@ Status RPCClient::RDMAReleaseMemInfo(RegisterMemInfo& remote_info) {
272
284
273
285
Status RPCClient::StopRDMA () {
274
286
if (!rdma_connected_) {
287
+ RETURN_ON_ERROR (
288
+ RDMAClientCreator::Release (RDMAClientCreator::buildConnectionKey (
289
+ rdma_endpoint_, src_rdma_endpoint_)));
275
290
return Status::OK ();
276
291
}
277
292
rdma_connected_ = false ;
@@ -285,7 +300,9 @@ Status RPCClient::StopRDMA() {
285
300
286
301
RETURN_ON_ERROR (rdma_client_->Stop ());
287
302
RETURN_ON_ERROR (rdma_client_->Close ());
288
- RETURN_ON_ERROR (RDMAClientCreator::Release (rdma_endpoint_));
303
+ RETURN_ON_ERROR (
304
+ RDMAClientCreator::Release (RDMAClientCreator::buildConnectionKey (
305
+ rdma_endpoint_, src_rdma_endpoint_)));
289
306
290
307
return Status::OK ();
291
308
}
0 commit comments