Skip to content

Commit 1e58e8e

Browse files
committed
Created separate programs for master, client, and server.
Code cleaned up, system tested with 100k inserts and lookups.
1 parent e31b764 commit 1e58e8e

14 files changed

+448
-50
lines changed

core/modules/loader/BufferUdp.cc

+32-21
Original file line numberDiff line numberDiff line change
@@ -160,30 +160,41 @@ bool BufferUdp::retrieveString(std::string& out, size_t len) {
160160

161161

162162
std::string BufferUdp::dumpStr(bool hexDump, bool charDump) const {
163-
std::stringstream os;
164-
os << "maxLength=" << _length;
165-
os << " buffer=" << (void*)_buffer;
166-
os << " wCurLen=" << getAvailableWriteLength();
167-
os << " wCursor=" << (void*)_wCursor;
168-
os << " rCurLen=" << getBytesLeftToRead();
169-
os << " rCursor=" << (void*)_rCursor;
170-
os << " end=" << (void*)_end;
171-
172-
// hex dump
173-
if (hexDump) {
174-
os << "(";
175-
for (const char* j=_buffer; j < _wCursor; ++j) {
176-
os << std::hex << (int)*j << " ";
177-
}
178-
os << ")";
179-
}
163+
std::stringstream os;
164+
dump(os, hexDump, charDump);
165+
return os.str();
166+
}
167+
180168

181-
// character dump
182-
if (charDump) {
183-
os << "(" << std::string(_buffer, _wCursor) << ")";
169+
std::ostream& BufferUdp::dump(std::ostream &os, bool hexDump, bool charDump) const {
170+
os << "maxLength=" << _length;
171+
os << " buffer=" << (void*)_buffer;
172+
os << " wCurLen=" << getAvailableWriteLength();
173+
os << " wCursor=" << (void*)_wCursor;
174+
os << " rCurLen=" << getBytesLeftToRead();
175+
os << " rCursor=" << (void*)_rCursor;
176+
os << " end=" << (void*)_end;
177+
178+
// hex dump
179+
if (hexDump) {
180+
os << "(";
181+
for (const char* j=_buffer; j < _wCursor; ++j) {
182+
os << std::hex << (int)*j << " ";
184183
}
184+
os << ")";
185+
}
185186

186-
return os.str();
187+
// character dump
188+
if (charDump) {
189+
os << "(" << std::string(_buffer, _wCursor) << ")";
187190
}
188191

192+
return os;
193+
}
194+
195+
196+
std::ostream& operator<<(std::ostream& os, BufferUdp const& buf) {
197+
return buf.dump(os, false, false);
198+
}
199+
189200
}}} // namespace lsst:qserv:loader

core/modules/loader/BufferUdp.h

+5
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ class BufferUdp {
138138
/// in ascii.
139139
std::string dumpStr(bool hexDump, bool charDump) const;
140140

141+
std::ostream& dump(std::ostream &os, bool hexDump, bool charDump) const;
142+
141143
private:
142144
void _setupBuffer() {
143145
_end = _buffer + _length;
@@ -161,6 +163,9 @@ class BufferUdp {
161163
bool _ourBuffer{false}; ///< true if this class object is responsible for deleting the buffer.
162164
};
163165

166+
/// Print basic buffer information. Use BufferUdp::dump() directly if the buffer contents are needed.
167+
std::ostream& operator<<(std::ostream& os, BufferUdp const& buf);
168+
164169
}}} // namespace lsst:qserv:loader
165170

166171
#endif // LSST_QSERV_LOADER_BUFFERUDP_H

core/modules/loader/Central.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class Central {
8080
int getErrCount() const { return _server->getErrCount(); }
8181

8282
/// Send the contents of 'sendBuf' to 'host:port'. This waits for the message to be
83-
/// sent before returning.
83+
/// sent before returning. Throws boost::system::system_error on failure.
8484
void sendBufferTo(std::string const& host, int port, BufferUdp& sendBuf) {
8585
_server->sendBufferTo(host, port, sendBuf);
8686
}

core/modules/loader/CentralClient.cc

+19-5
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ CentralClient::CentralClient(boost::asio::io_service& ioService_,
5555
_defWorkerHost(cfg.getDefWorkerHost()),
5656
_defWorkerPortUdp(cfg.getDefWorkerPortUdp()),
5757
_doListMaxLookups(cfg.getMaxLookups()),
58-
_doListMaxInserts(cfg.getMaxInserts()) {
58+
_doListMaxInserts(cfg.getMaxInserts()),
59+
_maxRequestSleepTime(cfg.getMaxRequestSleepTime()){
5960
}
6061

6162

@@ -89,7 +90,7 @@ void CentralClient::_handleKeyInfo(LoaderMsg const& inMsg, std::unique_ptr<proto
8990
CompositeKey key(protoData->keyint(), protoData->keystr());
9091
ChunkSubchunk chunkInfo(protoData->chunk(), protoData->subchunk());
9192

92-
LOGS(_log, LOG_LVL_INFO, "trying to remove oneShot for lookup key=" << key << " " << chunkInfo);
93+
LOGS(_log, LOG_LVL_DEBUG, "trying to remove oneShot for lookup key=" << key << " " << chunkInfo);
9394
/// Locate the original one shot and mark it as done.
9495
CentralClient::KeyInfoReqOneShot::Ptr keyInfoOneShot;
9596
{
@@ -227,8 +228,14 @@ void CentralClient::_keyInsertReq(CompositeKey const& key, int chunk, int subchu
227228
StringElement strElem;
228229
protoKeyInsert.SerializeToString(&(strElem.element));
229230
strElem.appendToData(msgData);
230-
231-
sendBufferTo(getDefWorkerHost(), getDefWorkerPortUdp(), msgData);
231+
try {
232+
sendBufferTo(getDefWorkerHost(), getDefWorkerPortUdp(), msgData);
233+
} catch (boost::system::system_error e) {
234+
LOGS(_log, LOG_LVL_ERROR, "CentralClient::_keyInsertReq boost system_error=" << e.what() <<
235+
" key=" << key << " chunk=" << chunk << " sub=" << subchunk);
236+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
237+
// so just blow up so it's unmistakable something bad happened for now.
238+
}
232239
}
233240

234241

@@ -296,7 +303,14 @@ void CentralClient::_keyInfoReq(CompositeKey const& key) {
296303
protoKeyInsert.SerializeToString(&(strElem.element));
297304
strElem.appendToData(msgData);
298305

299-
sendBufferTo(getDefWorkerHost(), getDefWorkerPortUdp(), msgData);
306+
try {
307+
sendBufferTo(getDefWorkerHost(), getDefWorkerPortUdp(), msgData);
308+
} catch (boost::system::system_error e) {
309+
LOGS(_log, LOG_LVL_ERROR, "CentralClient::_keyInfoReq boost system_error=" << e.what() <<
310+
" key=" << key);
311+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought.
312+
// So just blow up so it's unmistakable something bad happened for now.
313+
}
300314
}
301315

302316

core/modules/loader/CentralClient.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,9 @@ class CentralClient : public Central {
164164
const int _defWorkerPortUdp; ///< Default worker UDP port
165165

166166

167-
size_t _doListMaxLookups; ///< Maximum number of concurrent lookups in DoList DM-16555 &&&
168-
size_t _doListMaxInserts; ///< Maximum number of concurrent inserts in DoList DM-16555 &&&
169-
int _maxRequestSleepTime{100000}; ///< Time to sleep between checking requests when at max length &&& add config file entry
167+
size_t _doListMaxLookups{1000}; ///< Maximum number of concurrent lookups in DoList (set by config)
168+
size_t _doListMaxInserts{1000}; ///< Maximum number of concurrent inserts in DoList (set by config)
169+
int _maxRequestSleepTime{100000}; ///< Time to sleep between checking requests when at max length (set by config)
170170

171171
std::map<CompositeKey, KeyInsertReqOneShot::Ptr> _waitingKeyInsertMap;
172172
std::mutex _waitingKeyInsertMtx; ///< protects _waitingKeyInsertMap, _doListMaxInserts

core/modules/loader/CentralMaster.cc

+18-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,15 @@ void CentralMaster::setWorkerNeighbor(MWorkerListItem::WPtr const& target, int m
9797
UInt32Element neighborIdElem(neighborId);
9898
neighborIdElem.appendToData(msgData);
9999
auto addr = targetWorker->getUdpAddress();
100-
sendBufferTo(addr.ip, addr.port, msgData);
100+
try {
101+
sendBufferTo(addr.ip, addr.port, msgData);
102+
} catch (boost::system::system_error e) {
103+
LOGS(_log, LOG_LVL_ERROR, "CentralMaster::setWorkerNeighbor boost system_error=" << e.what() <<
104+
" targ=" << *targetWorker << " msg=" << message <<
105+
" neighborId=" << neighborId);
106+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
107+
// so just blow up so it's unmistakable something bad happened for now.
108+
}
101109
}
102110

103111

@@ -196,7 +204,15 @@ void CentralMaster::reqWorkerKeysInfo(uint64_t msgId, std::string const& targetI
196204
LoaderMsg reqMsg(LoaderMsg::WORKER_KEYS_INFO_REQ, msgId, ourHostName, ourPort);
197205
BufferUdp data;
198206
reqMsg.appendToData(data);
199-
sendBufferTo(targetIp, targetPort, data);
207+
try {
208+
sendBufferTo(targetIp, targetPort, data);
209+
} catch (boost::system::system_error e) {
210+
LOGS(_log, LOG_LVL_ERROR, "CentralMaster::reqWorkerKeysInfo boost system_error=" << e.what() <<
211+
" msgId=" << msgId << " tIp=" << targetIp << " tPort=" << targetPort <<
212+
" ourHost=" << ourHostName << " ourPort=" << ourPort);
213+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
214+
// so just blow up so it's unmistakable something bad happened for now.
215+
}
200216
}
201217

202218
}}} // namespace lsst::qserv::loader

core/modules/loader/CentralWorker.cc

+53-7
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,14 @@ void CentralWorker::_workerKeyInsertReq(LoaderMsg const& inMsg, std::unique_ptr<
804804
protoReply.SerializeToString(&(strElem.element));
805805
strElem.appendToData(msgData);
806806
LOGS(_log, LOG_LVL_INFO, "sending complete " << key << " to " << nAddr << " from " << _ourId);
807-
sendBufferTo(nAddr.ip, nAddr.port, msgData);
807+
try {
808+
sendBufferTo(nAddr.ip, nAddr.port, msgData);
809+
} catch (boost::system::system_error e) {
810+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_workerKeyInsertReq boost system_error=" << e.what() <<
811+
" msg=" << inMsg);
812+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
813+
// so just blow up so it's unmistakable something bad happened for now.
814+
}
808815
} else {
809816
lck.unlock();
810817
// Find the target range in the list and send the request there
@@ -842,7 +849,14 @@ void CentralWorker::_forwardKeyInsertRequest(NetworkAddress const& targetAddr, L
842849
StringElement strElem;
843850
protoData->SerializeToString(&(strElem.element));
844851
strElem.appendToData(msgData);
845-
sendBufferTo(targetAddr.ip, targetAddr.port, msgData);
852+
try {
853+
sendBufferTo(targetAddr.ip, targetAddr.port, msgData);
854+
} catch (boost::system::system_error e) {
855+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_forwardKeyInsertRequest boost system_error=" << e.what() <<
856+
" tAddr=" << targetAddr << " inMsg=" << inMsg);
857+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
858+
// so just blow up so it's unmistakable something bad happened for now.
859+
}
846860
}
847861

848862

@@ -909,7 +923,14 @@ void CentralWorker::_workerKeyInfoReq(LoaderMsg const& inMsg, std::unique_ptr<pr
909923
protoReply.SerializeToString(&(strElem.element));
910924
strElem.appendToData(msgData);
911925
LOGS(_log, LOG_LVL_INFO, "sending key lookup " << key << " to " << nAddr << " from " << _ourId);
912-
sendBufferTo(nAddr.ip, nAddr.port, msgData);
926+
try {
927+
sendBufferTo(nAddr.ip, nAddr.port, msgData);
928+
}catch (boost::system::system_error e) {
929+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_workerKeyInfoReq boost system_error=" << e.what() <<
930+
" inMsg=" << inMsg);
931+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
932+
// so just blow up so it's unmistakable something bad happened for now.
933+
}
913934
} else {
914935
// Find the target range in the list and send the request there
915936
auto targetWorker = _wWorkerList->findWorkerForKey(key);
@@ -986,7 +1007,14 @@ void CentralWorker::_sendWorkerKeysInfo(NetworkAddress const& nAddr, uint64_t ms
9861007
LOGS(_log, LOG_LVL_INFO, "sending WorkerKeysInfo name=" << _ourId <<
9871008
" mapsize=" << protoWKI->mapsize() << " recentAdds=" << protoWKI->recentadds() <<
9881009
" to " << nAddr);
989-
sendBufferTo(nAddr.ip, nAddr.port, msgData);
1010+
try {
1011+
sendBufferTo(nAddr.ip, nAddr.port, msgData);
1012+
} catch (boost::system::system_error e) {
1013+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_sendWorkerKeysInfo boost system_error=" << e.what() <<
1014+
" nAddr=" << nAddr << "msgId=" << msgId);
1015+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
1016+
// so just blow up so it's unmistakable something bad happened for now.
1017+
}
9901018
}
9911019

9921020

@@ -1035,7 +1063,14 @@ void CentralWorker::_forwardKeyInfoRequest(WWorkerListItem::Ptr const& target, L
10351063
strElem.appendToData(msgData);
10361064

10371065
auto nAddr = target->getUdpAddress();
1038-
sendBufferTo(nAddr.ip, nAddr.port, msgData);
1066+
try {
1067+
sendBufferTo(nAddr.ip, nAddr.port, msgData);
1068+
} catch (boost::system::system_error e) {
1069+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_forwardKeyInfoRequest boost system_error=" << e.what() <<
1070+
" target=" << target << " inMsg=" << inMsg);
1071+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
1072+
// so just blow up so it's unmistakable something bad happened for now.
1073+
}
10391074
}
10401075

10411076

@@ -1053,7 +1088,13 @@ void CentralWorker::_registerWithMaster() {
10531088
protoBuf.SerializeToString(&(strElem.element));
10541089
strElem.appendToData(msgData);
10551090

1056-
sendBufferTo(getMasterHostName(), getMasterPort(), msgData);
1091+
try {
1092+
sendBufferTo(getMasterHostName(), getMasterPort(), msgData);
1093+
} catch (boost::system::system_error e) {
1094+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_registerWithMaster boost system_error=" << e.what());
1095+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
1096+
// so just blow up so it's unmistakable something bad happened for now.
1097+
}
10571098
}
10581099

10591100

@@ -1063,7 +1104,12 @@ void CentralWorker::testSendBadMessage() {
10631104
LOGS(_log, LOG_LVL_INFO, "testSendBadMessage msg=" << msg);
10641105
BufferUdp msgData(128);
10651106
msg.appendToData(msgData);
1066-
sendBufferTo(getMasterHostName(), getMasterPort(), msgData);
1107+
try {
1108+
sendBufferTo(getMasterHostName(), getMasterPort(), msgData);
1109+
} catch (boost::system::system_error e) {
1110+
LOGS(_log, LOG_LVL_ERROR, "CentralWorker::testSendBadMessage boost system_error=" << e.what());
1111+
throw e; // This would not be the expected error, re-throw so it is noticed.
1112+
}
10671113
}
10681114

10691115

core/modules/loader/ClientConfig.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class ClientConfig : public ConfigBase {
5252
int getLoopSleepTime() const { return _loopSleepTime->getInt(); } // TODO: Maybe chrono types for times
5353
int getMaxLookups() const { return _maxLookups->getInt(); }
5454
int getMaxInserts() const { return _maxInserts->getInt(); }
55+
int getMaxRequestSleepTime() const { return _maxRequestSleepTime->getInt(); }
5556

5657
std::ostream& dump(std::ostream &os) const override;
5758

@@ -86,7 +87,11 @@ class ClientConfig : public ConfigBase {
8687
/// Maximum number of insert requests allowed in the DoList.
8788
ConfigElement::Ptr _maxInserts{
8889
ConfigElement::create(cfgList, header, "maxInserts", ConfigElement::INT, false, "90000")};
89-
90+
/// When reaching maxInserts or maxLookups, sleep this long before trying to add more,
91+
/// in micro seconds. 100000micro = 0.1sec
92+
ConfigElement::Ptr _maxRequestSleepTime{
93+
ConfigElement::create(cfgList, header,
94+
"maxRequestSleepTime", ConfigElement::INT, false, "100000")};
9095
};
9196

9297

core/modules/loader/MWorkerList.cc

+9-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,15 @@ bool MWorkerList::sendListTo(uint64_t msgId, std::string const& ip, short port,
181181
workerList.appendToData(*_stateListData);
182182
}
183183
}
184-
_central->sendBufferTo(ip, port, *_stateListData);
184+
try {
185+
_central->sendBufferTo(ip, port, *_stateListData);
186+
} catch (boost::system::system_error e) {
187+
LOGS(_log, LOG_LVL_ERROR, "MWorkerList::sendListTo boost system_error=" << e.what() <<
188+
" msgId=" << msgId << " ip=" << ip << " port=" << port <<
189+
" ourName=" << ourHostName << " ourPort=" << ourPort);
190+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
191+
// so just blow up so it's unmistakable something bad happened for now.
192+
}
185193
}
186194

187195
// See if this worker is know.

core/modules/loader/MasterServer.cc

+8-2
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,14 @@ BufferUdp::Ptr MasterServer::workerInfoRequest(LoaderMsg const& inMsg, BufferUdp
262262
seItem.appendToData(sendBuf);
263263

264264
// Send the response to the worker that asked for it.
265-
_centralMaster->sendBufferTo(requestorAddr->ip, requestorAddr->port, sendBuf);
266-
265+
try {
266+
_centralMaster->sendBufferTo(requestorAddr->ip, requestorAddr->port, sendBuf);
267+
} catch (boost::system::system_error e) {
268+
LOGS(_log, LOG_LVL_ERROR, "MasterServer::workerInfoRequest boost system_error=" << e.what() <<
269+
" inMsg=" << inMsg);
270+
exit(-1); // TODO:&&& The correct course of action is unclear and requires thought,
271+
// so just blow up so it's unmistakable something bad happened for now.
272+
}
267273
} catch (LoaderMsgErr &msgErr) {
268274
LOGS(_log, LOG_LVL_ERROR, msgErr.what());
269275
return prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, msgErr.what());

core/modules/loader/ServerUdpBase.cc

+20-5
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,15 @@ void ServerUdpBase::sendBufferTo(std::string const& hostName, int port, BufferUd
111111
cv.wait(uLock, [&done](){return done;});
112112
#else
113113
using namespace boost::asio;
114-
LOGS(_log, LOG_LVL_INFO, "ServerUdpBase::sendBufferTo hostName=" << hostName << " port=" << port); // &&&
115-
ip::udp::endpoint dest(boost::asio::ip::address::from_string(hostName), port);
116-
_socket.send_to(buffer(sendBuf.getReadCursor(), sendBuf.getBytesLeftToRead()), dest);
114+
LOGS(_log, LOG_LVL_DEBUG, "ServerUdpBase::sendBufferTo hostName=" << hostName << " port=" << port);
115+
try {
116+
ip::udp::endpoint dest = resolve(hostName, port);
117+
_socket.send_to(buffer(sendBuf.getReadCursor(), sendBuf.getBytesLeftToRead()), dest);
118+
} catch (boost::system::system_error const& e) {
119+
LOGS(_log, LOG_LVL_ERROR, "ServerUdpBase::sendBufferTo boost system_error=" << e.what() <<
120+
" host=" << hostName << " port=" << port << " buf=" << sendBuf);
121+
throw e;
122+
}
117123
#endif
118124
}
119125

@@ -147,15 +153,24 @@ void ServerUdpBase::_receivePrepare() {
147153

148154

149155
boost::asio::ip::udp::endpoint ServerUdpBase::resolve(std::string const& hostName, int port) {
150-
/* More flexible version
156+
#if 1 // &&&
157+
std::lock_guard<std::mutex> lg(_resolveMtx);
158+
/* &&&
151159
using namespace boost::asio;
152160
io_context ioContext;
153161
ip::udp::resolver resolver(ioContext);
154-
return *resolver.resolve(udp::v4(), hostName, std::to_string(port)).begin();
162+
return *resolver.resolve(ip::udp::v4(), hostName, std::to_string(port)).begin();
155163
*/
164+
using namespace boost::asio;
165+
// resolver returns an iterator. This uses the first item only.
166+
ip::udp::endpoint dest =
167+
*_resolver.resolve(ip::udp::v4(), hostName, std::to_string(port)).begin();
168+
return dest;
169+
#else
156170
using namespace boost::asio;
157171
ip::udp::endpoint dest(ip::address::from_string(hostName), port);
158172
return dest;
173+
#endif
159174
}
160175

161176

0 commit comments

Comments
 (0)