Skip to content

Commit e091f2c

Browse files
committed
Added memory/disk hybrid for transfering csv files.
1 parent 20bc31b commit e091f2c

File tree

19 files changed

+669
-131
lines changed

19 files changed

+669
-131
lines changed

admin/local/docker/compose/docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ volumes:
3636
volume_czar_xrootd:
3737
volume_czar_home:
3838
volume_czar_cfg:
39+
volume_czar_transfer:
3940

4041
volume_czar_mariadb_data:
4142
volume_czar_mariadb_cfg:
@@ -272,6 +273,10 @@ services:
272273
- type: volume
273274
source: volume_czar_mariadb_run
274275
target: /qserv/mariadb/run
276+
- type: volume
277+
source: volume_czar_transfer
278+
target: /tmp
279+
275280
- << : *log-volume
276281
expose:
277282
- "3306" # for czar-mariadb
@@ -306,6 +311,9 @@ services:
306311
- type: volume
307312
source: volume_czar_cfg
308313
target: /config-etc
314+
- type: volume
315+
source: volume_czar_transfer
316+
target: /tmp
309317
- type: volume
310318
source: volume_czar_home
311319
target: /home/qserv

src/admin/templates/proxy/etc/qserv-czar.cnf.jinja

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,29 @@ port = {{ czar_db_port }}
2525
# Any table in resultdb that hasn't been updated in this many days is deleted.
2626
oldestResultKeptDays = 7
2727

28+
# Either this should be changed to a high performance docker volume directory
29+
# or /tmp should be mounted as a high performance docker volume directory
30+
# to avoid using limited docker memory to store the contents.
31+
transferDir = /tmp
32+
2833
# maximum number of connection retries to SQL databse (per connection attempt)
2934
maxsqlconnectionattempts = 10
3035

3136
# maximum user query result size in MB
3237
maxtablesize_mb = 5100
3338

39+
# maximum number of MB of concurrent csv transfer files allowed to be kept in
40+
# memory, after this point the will be temporarily written to disk.
41+
# 0 is used for testing. 10000 is usually reasonable.
42+
maxTransferMemMB = 0
43+
44+
# minimum number of MB for each csv transfer file to be kept in memory
45+
# before possibly going to disk.
46+
# 0 for testing, up to 10 should be reasonable.
47+
transferMinMBInMem = 0
48+
49+
50+
3451

3552
# database connection for QMeta database
3653
[qmeta]

src/cconfig/CzarConfig.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ class CzarConfig {
124124
/// Getters for result aggregation options.
125125
int getMaxTableSizeMB() const { return _maxTableSizeMB->getVal(); }
126126
int getMaxSqlConnectionAttempts() const { return _maxSqlConnectionAttempts->getVal(); }
127+
unsigned int getMaxTransferMemMB() const { return _resultMaxTransferMemMB->getVal(); }
128+
/// Return the transfer directory. This is customizable to allow for a
129+
/// high performance volume.
130+
std::string getTransferDir() const { return _resultTransferDir->getVal(); }
131+
132+
/// Return the minimum amount of memory per UberJob to keep in memory. This much transfer
133+
/// data will be stored in memory regardless of other conditions.
134+
unsigned int getTransferMinMBInMem() const { return _resultTransferMinMBInMem->getVal(); }
127135

128136
/// The size of the TCP connection pool within the client API that is used
129137
/// by the merger to pool result files from workers via the HTTP protocol.
@@ -288,6 +296,13 @@ class CzarConfig {
288296
util::ConfigValTInt::create(_configValMap, "resultdb", "maxhttpconnections", notReq, 2000);
289297
CVTIntPtr _oldestResultKeptDays =
290298
util::ConfigValTInt::create(_configValMap, "resultdb", "oldestResultKeptDays", notReq, 30);
299+
// This must be larger than _maxTableSizeMB when using the "memory" TransferMethod
300+
CVTUIntPtr _resultMaxTransferMemMB =
301+
util::ConfigValTUInt::create(_configValMap, "resultdb", "maxTransferMemMB", notReq, 10000);
302+
CVTStrPtr _resultTransferDir =
303+
util::ConfigValTStr::create(_configValMap, "resultdb", "transferDir", notReq, "/tmp");
304+
CVTUIntPtr _resultTransferMinMBInMem =
305+
util::ConfigValTUInt::create(_configValMap, "resultdb", "transferMinMBInMem", notReq, 10);
291306

292307
/// Get all the elements in the css section.
293308
CVTStrPtr _cssTechnology =

src/ccontrol/MergingHandler.cc

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
#include "http/Client.h"
4646
#include "http/ClientConnPool.h"
4747
#include "http/Method.h"
48-
#include "mysql/CsvBuffer.h"
48+
#include "mysql/CsvMemDisk.h"
4949
#include "qdisp/CzarStats.h"
5050
#include "qdisp/Executive.h"
5151
#include "qdisp/JobQuery.h"
@@ -195,13 +195,8 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
195195
}
196196

197197
if (fileSize == 0) return qdisp::MergeEndStatus(true);
198-
199-
// Read from the http stream and push records into the CSV stream in a separate thread.
200-
// Note the fixed capacity of the stream which allows up to 2 records to be buffered
201-
// in the stream. This is enough to hide the latency of the HTTP connection and
202-
// the time needed to read the file.
203-
auto csvStream = mysql::CsvStream::create(2);
204-
_csvStream = csvStream;
198+
auto csvMemDisk = mysql::CsvMemDisk::create(fileSize, uberJob->getQueryId(), uberJob->getUjId());
199+
_csvMemDisk = csvMemDisk;
205200

206201
// This must be after setting _csvStream to avoid cancelFileMerge()
207202
// race issues, and it needs to be before the thread starts.
@@ -211,46 +206,46 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
211206
}
212207

213208
string fileReadErrorMsg;
214-
thread csvThread([uberJob, csvStream, fileUrl, fileSize, &fileReadErrorMsg]() {
209+
auto transferFunc = [&]() {
215210
size_t bytesRead = 0;
216211
fileReadErrorMsg = ::readHttpFileAndMerge(
217212
uberJob, fileUrl, fileSize,
218-
[uberJob, csvStream, fileSize, &bytesRead](char const* buf, uint32_t size) {
213+
[&](char const* buf, uint32_t size) {
219214
bool last = false;
220215
if (buf == nullptr || size == 0) {
221216
last = true;
222217
} else {
223-
csvStream->push(buf, size);
218+
csvMemDisk->push(buf, size);
224219
bytesRead += size;
225220
last = bytesRead >= fileSize;
226221
}
227222
if (last) {
228-
csvStream->push(nullptr, 0);
223+
csvMemDisk->push(nullptr, 0);
229224
}
230225
},
231226
MergingHandler::_getHttpConnPool());
232227
// Push the stream terminator to indicate the end of the stream.
233228
// It may be needed to unblock the table merger which may be still attempting to read
234229
// from the CSV stream.
235230
if (!fileReadErrorMsg.empty()) {
236-
csvStream->push(nullptr, 0);
231+
csvMemDisk->push(nullptr, 0);
237232
}
238-
});
233+
};
234+
csvMemDisk->transferDataFromWorker(transferFunc);
239235

240236
// Attempt the actual merge.
241-
bool fileMergeSuccess = _infileMerger->mergeHttp(uberJob, fileSize, csvStream);
237+
bool fileMergeSuccess = _infileMerger->mergeHttp(uberJob, fileSize, csvMemDisk);
242238
if (!fileMergeSuccess) {
243239
LOGS(_log, LOG_LVL_WARN, __func__ << " merge failed");
244240
util::Error const& err = _infileMerger->getError();
245241
_setError(ccontrol::MSG_RESULT_ERROR, err.getMsg(), util::ErrorCode::RESULT_IMPORT);
246242
}
247-
if (csvStream->getContaminated()) {
243+
if (csvMemDisk->getContaminated()) {
248244
LOGS(_log, LOG_LVL_ERROR, __func__ << " merge stream contaminated");
249245
fileMergeSuccess = false;
250246
_setError(ccontrol::MSG_RESULT_ERROR, "merge stream contaminated", util::ErrorCode::RESULT_IMPORT);
251247
}
252248

253-
csvThread.join();
254249
if (!fileReadErrorMsg.empty()) {
255250
LOGS(_log, LOG_LVL_WARN, __func__ << " result file read failed");
256251
_setError(ccontrol::MSG_HTTP_RESULT, fileReadErrorMsg, util::ErrorCode::RESULT_IMPORT);
@@ -261,15 +256,14 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
261256
if (!mergeEStatus.success) {
262257
// This error check needs to come after the csvThread.join() to ensure writing
263258
// is finished. If any bytes were written, the result table is ruined.
264-
mergeEStatus.contaminated = csvStream->getBytesWritten() > 0;
259+
mergeEStatus.contaminated = csvMemDisk->getBytesFetched() > 0;
265260
}
266-
// TODO:UJ Make it impossible to contaminate the result table for all errors
267-
// short of czar or mariadb crash.
261+
268262
return mergeEStatus;
269263
}
270264

271265
void MergingHandler::cancelFileMerge() {
272-
auto csvStrm = _csvStream.lock();
266+
auto csvStrm = _csvMemDisk.lock();
273267
if (csvStrm != nullptr) {
274268
csvStrm->cancel();
275269
}
@@ -295,9 +289,6 @@ qdisp::MergeEndStatus MergingHandler::flushHttp(string const& fileUrl, uint64_t
295289
"MergingHandler::" << __func__ << " uberJob=" << uberJob->getIdStr() << " fileUrl=" << fileUrl);
296290

297291
qdisp::MergeEndStatus mergeStatus = _mergeHttp(uberJob, fileUrl, fileSize);
298-
if (mergeStatus.success) {
299-
_infileMerger->mergeCompleteFor(uberJob->getUjId());
300-
}
301292
return mergeStatus;
302293
}
303294

src/ccontrol/MergingHandler.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class ClientConnPool;
3838
} // namespace lsst::qserv::http
3939

4040
namespace lsst::qserv::mysql {
41-
class CsvStream;
41+
class CsvMemDisk;
4242
} // namespace lsst::qserv::mysql
4343

4444
namespace lsst::qserv::qdisp {
@@ -105,8 +105,8 @@ class MergingHandler : public qdisp::ResponseHandler {
105105
bool _flushed{false}; ///< flushed to InfileMerger?
106106
std::string _wName{"~"}; ///< worker name
107107

108-
std::weak_ptr<qdisp::Executive> _executive; ///< Weak pointer to the executive for errors.
109-
std::weak_ptr<mysql::CsvStream> _csvStream; ///< Weak pointer to cancel infile merge.
108+
std::weak_ptr<qdisp::Executive> _executive; ///< Weak pointer to the executive for errors.
109+
std::weak_ptr<mysql::CsvMemDisk> _csvMemDisk; ///< Weak pointer to cancel infile merge.
110110
};
111111

112112
} // namespace lsst::qserv::ccontrol

src/czar/ActiveWorker.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,8 @@ void ActiveWorkerMap::updateMap(protojson::WorkerContactInfo::WCMap const& wcMap
288288
auto iter = _awMap.find(wcKey);
289289
if (iter == _awMap.end()) {
290290
auto newAW = ActiveWorker::create(wcVal, czInfo, replicationInstanceId, replicationAuthKey);
291-
LOGS(_log, LOG_LVL_INFO, cName(__func__) << " ActiveWorker created for " << wcKey);
291+
LOGS(_log, LOG_LVL_INFO,
292+
cName(__func__) << " ActiveWorker created for " << wcKey << " " << newAW->dump());
292293
_awMap[wcKey] = newAW;
293294
if (_czarCancelAfterRestart) {
294295
newAW->setCzarCancelAfterRestart(_czarCancelAfterRestartCzId, _czarCancelAfterRestartQId);

src/czar/Czar.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "http/ClientConnPool.h"
5555
#include "http/MetaModule.h"
5656
#include "http/Method.h"
57+
#include "mysql/CsvMemDisk.h"
5758
#include "qdisp/CzarStats.h"
5859
#include "qdisp/Executive.h"
5960
#include "qproc/DatabaseModels.h"
@@ -181,6 +182,15 @@ Czar::Czar(string const& configFilePath, string const& czarName)
181182
// the name of the Czar gets translated into a numeric identifier.
182183
_czarConfig->setId(_uqFactory->userQuerySharedResources()->qMetaCzarId);
183184

185+
CzarIdType czarId = _czarConfig->id();
186+
size_t const MB_SIZE_BYTES = 1024 * 1024;
187+
size_t maxResultTableSizeBytes = _czarConfig->getMaxTableSizeMB() * MB_SIZE_BYTES;
188+
size_t maxMemToUse = _czarConfig->getMaxTransferMemMB() * MB_SIZE_BYTES;
189+
string const transferDirectory = _czarConfig->getTransferDir();
190+
std::size_t const transferMinBytesInMem = _czarConfig->getTransferMinMBInMem() * MB_SIZE_BYTES;
191+
mysql::TransferTracker::setup(maxMemToUse, transferDirectory, transferMinBytesInMem,
192+
maxResultTableSizeBytes, czarId);
193+
184194
// Tell workers to cancel any queries that were submitted before this restart of Czar.
185195
// Figure out which query (if any) was recorded in Czar databases before the restart.
186196
// The id will be used as the high-watermark for queries that need to be cancelled.

src/czar/CzarChunkMap.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,10 @@ void CzarChunkMap::verify(string const& familyName) const {
139139
LOGS(_log, LOG_LVL_WARN, cName(__func__) << " family=" << familyName << " verified");
140140
}
141141

142-
string CzarChunkMap::dumpChunkMap(ChunkMap const& chunkMap) {
142+
string CzarChunkMap::dumpChunkMap() const {
143143
stringstream os;
144144
os << "ChunkMap{";
145-
for (auto const& [cId, cDataPtr] : chunkMap) {
145+
for (auto const& [cId, cDataPtr] : *_chunkMap) {
146146
os << "(cId=" << cId << ":";
147147
os << ((cDataPtr == nullptr) ? "null" : cDataPtr->dump()) << ")";
148148
}
@@ -355,6 +355,10 @@ bool CzarFamilyMap::_read() {
355355

356356
verify(familyMapPtr);
357357

358+
for (auto const& [fam, ccMap] : *familyMapPtr) {
359+
LOGS(_log, LOG_LVL_INFO, "{family=" << fam << "{" << ccMap->dumpChunkMap() << "}}");
360+
}
361+
358362
_familyMap = familyMapPtr;
359363

360364
_lastUpdateTime = qChunkMap.updateTime;

src/czar/CzarChunkMap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ class CzarChunkMap {
205205
/// @throws ChunkMapException
206206
void verify(std::string const& familyName) const;
207207

208-
static std::string dumpChunkMap(ChunkMap const& chunkMap);
208+
std::string dumpChunkMap() const;
209209

210210
static std::string dumpWorkerChunkMap(WorkerChunkMap const& wcMap);
211211

src/czar/CzarRegistry.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,13 @@ void CzarRegistry::_registryUpdateLoop() {
8080
string const url = "http://" + _czarConfig->replicationRegistryHost() + ":" +
8181
to_string(_czarConfig->replicationRegistryPort()) + "/czar";
8282
vector<string> const headers = {"Content-Type: application/json"};
83-
string const fqdn = util::getCurrentHostFqdnBlocking();
8483
json const request = json::object({{"instance_id", _czarConfig->replicationInstanceId()},
8584
{"auth_key", _czarConfig->replicationAuthKey()},
8685
{"czar",
8786
{{"name", _czarConfig->name()},
8887
{"id", _czarConfig->id()},
8988
{"management-port", _czarConfig->replicationHttpPort()},
90-
{"management-host-name", fqdn}}}});
89+
{"management-host-name", util::getCurrentHostFqdnBlocking()}}}});
9190
string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'";
9291
LOGS(_log, LOG_LVL_TRACE,
9392
__func__ << " czarPost url=" << url << " request=" << request.dump() << " headers=" << headers[0]);

0 commit comments

Comments
 (0)