Skip to content

Commit 935e1b8

Browse files
jgates108fritzm
authored andcommitted
Added memory/disk hybrid for transfering csv files.
1 parent a90210f commit 935e1b8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+868
-379
lines changed

deploy/compose/docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ volumes:
3636
volume_czar_xrootd:
3737
volume_czar_home:
3838
volume_czar_cfg:
39+
volume_czar_transfer:
3940

4041
volume_czar_mariadb_data:
4142
volume_czar_mariadb_cfg:
@@ -272,6 +273,10 @@ services:
272273
- type: volume
273274
source: volume_czar_mariadb_run
274275
target: /qserv/mariadb/run
276+
- type: volume
277+
source: volume_czar_transfer
278+
target: /tmp
279+
275280
- << : *log-volume
276281
expose:
277282
- "3306" # for czar-mariadb
@@ -308,6 +313,9 @@ services:
308313
- type: volume
309314
source: volume_czar_cfg
310315
target: /config-etc
316+
- type: volume
317+
source: volume_czar_transfer
318+
target: /tmp
311319
- type: volume
312320
source: volume_czar_home
313321
target: /home/qserv

src/admin/templates/proxy/etc/qserv-czar.cnf.jinja

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,29 @@ port = {{ czar_db_port }}
2525
# Any table in resultdb that hasn't been updated in this many days is deleted.
2626
oldestResultKeptDays = 7
2727

28+
# Either this should be changed to a high performance docker volume directory
29+
# or /tmp should be mounted as a high performance docker volume directory
30+
# to avoid using limited docker memory to store the contents.
31+
transferDir = /tmp
32+
2833
# maximum number of connection retries to SQL databse (per connection attempt)
2934
maxsqlconnectionattempts = 10
3035

3136
# maximum user query result size in MB
3237
maxtablesize_mb = 5100
3338

39+
# maximum number of MB of concurrent csv transfer files allowed to be kept in
40+
# memory, after this point the will be temporarily written to disk.
41+
# 0 is used for testing. 10000 is usually reasonable.
42+
maxTransferMemMB = 0
43+
44+
# minimum number of MB for each csv transfer file to be kept in memory
45+
# before possibly going to disk.
46+
# 0 for testing, up to 10 should be reasonable.
47+
transferMinMBInMem = 0
48+
49+
50+
3451

3552
# database connection for QMeta database
3653
[qmeta]

src/cconfig/CzarConfig.h

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,16 @@ class CzarConfig {
130130
/// Getters for result aggregation options.
131131
int getMaxTableSizeMB() const { return _maxTableSizeMB->getVal(); }
132132
int getMaxSqlConnectionAttempts() const { return _maxSqlConnectionAttempts->getVal(); }
133+
unsigned int getMaxTransferMemMB() const { return _resultMaxTransferMemMB->getVal(); }
134+
/// Return the transfer directory. This is customizable to allow for a
135+
/// high performance volume.
136+
std::string getTransferDir() const { return _resultTransferDir->getVal(); }
133137

134-
/// The size of the TCP connection pool witin the client API that is used
138+
/// Return the minimum amount of memory per UberJob to keep in memory. This much transfer
139+
/// data will be stored in memory regardless of other conditions.
140+
unsigned int getTransferMinMBInMem() const { return _resultTransferMinMBInMem->getVal(); }
141+
142+
/// The size of the TCP connection pool within the client API that is used
135143
/// by the merger to pool result files from workers via the HTTP protocol.
136144
int getResultMaxHttpConnections() const { return _resultMaxHttpConnections->getVal(); }
137145

@@ -169,13 +177,6 @@ class CzarConfig {
169177
/// the method then the monitoring will be disabled.
170178
unsigned int czarStatsUpdateIvalSec() const { return _czarStatsUpdateIvalSec->getVal(); }
171179

172-
/// @return The maximum retain period for keeping in memory the relevant metrics
173-
/// captured by the Czar monitoring system. If 0 is returned by the method then
174-
/// query history archiving will be disabled.
175-
/// @note Setting the limit too high may be potentially result in runing onto
176-
/// the OOM situation.
177-
unsigned int czarStatsRetainPeriodSec() const { return _czarStatsRetainPeriodSec->getVal(); }
178-
179180
/// A worker is considered fully ALIVE if the last update from the worker has been
180181
/// heard in less than _activeWorkerTimeoutAliveSecs seconds.
181182
int getActiveWorkerTimeoutAliveSecs() const { return _activeWorkerTimeoutAliveSecs->getVal(); }
@@ -306,6 +307,14 @@ class CzarConfig {
306307
CVTIntPtr _oldestAsyncResultKeptSeconds = util::ConfigValTInt::create(
307308
_configValMap, "resultdb", "oldestAsyncResultKeptSeconds", notReq, 3600);
308309

310+
// This must be larger than _maxTableSizeMB when using the "memory" TransferMethod
311+
CVTUIntPtr _resultMaxTransferMemMB =
312+
util::ConfigValTUInt::create(_configValMap, "resultdb", "maxTransferMemMB", notReq, 10000);
313+
CVTStrPtr _resultTransferDir =
314+
util::ConfigValTStr::create(_configValMap, "resultdb", "transferDir", notReq, "/tmp");
315+
CVTUIntPtr _resultTransferMinMBInMem =
316+
util::ConfigValTUInt::create(_configValMap, "resultdb", "transferMinMBInMem", notReq, 10);
317+
309318
/// Get all the elements in the css section.
310319
CVTStrPtr _cssTechnology =
311320
util::ConfigValTStr::create(_configValMap, "css", "technology", notReq, "mysql");

src/ccontrol/MergingHandler.cc

Lines changed: 15 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,17 @@
3939
#include "lsst/log/Log.h"
4040

4141
// Qserv headers
42+
#include "cconfig/CzarConfig.h"
4243
#include "ccontrol/msgCode.h"
4344
#include "global/clock_defs.h"
4445
#include "global/debugUtil.h"
4546
#include "http/Client.h"
4647
#include "http/ClientConnPool.h"
4748
#include "http/Method.h"
48-
#include "mysql/CsvBuffer.h"
49+
#include "mysql/CsvMemDisk.h"
4950
#include "qdisp/CzarStats.h"
5051
#include "qdisp/Executive.h"
5152
#include "qdisp/JobQuery.h"
52-
#include "qdisp/QueryRequest.h"
5353
#include "qdisp/UberJob.h"
5454
#include "rproc/InfileMerger.h"
5555
#include "util/Bug.h"
@@ -84,7 +84,6 @@ lsst::qserv::TimeCountTracker<double>::CALLBACKFUNC const reportFileRecvRate =
8484
}
8585
};
8686

87-
8887
string readHttpFileAndMerge(lsst::qserv::qdisp::UberJob::Ptr const& uberJob, string const& httpUrl,
8988
size_t fileSize, function<void(char const*, uint32_t)> const& messageIsReady,
9089
shared_ptr<http::ClientConnPool> const& httpConnPool) {
@@ -180,52 +179,6 @@ MergingHandler::MergingHandler(std::shared_ptr<rproc::InfileMerger> const& merge
180179

181180
MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_TRACE, __func__); }
182181

183-
184-
bool MergingHandler::flush(proto::ResponseSummary const& resp) {
185-
_wName = resp.wname();
186-
187-
// This is needed to ensure the job query would be staying alive for the duration
188-
// of the operation to prevent inconsistency within the application.
189-
auto const jobQuery = getJobQuery().lock();
190-
if (jobQuery == nullptr) {
191-
LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, jobQuery was NULL");
192-
return false;
193-
}
194-
auto const jobQuery = std::dynamic_pointer_cast<qdisp::JobQuery>(jobBase);
195-
196-
LOGS(_log, LOG_LVL_TRACE,
197-
"MergingHandler::" << __func__ << " jobid=" << resp.jobid() << " transmitsize="
198-
<< resp.transmitsize() << " rowcount=" << resp.rowcount() << " rowSize="
199-
<< " attemptcount=" << resp.attemptcount() << " errorcode=" << resp.errorcode()
200-
<< " errormsg=" << resp.errormsg());
201-
202-
if (resp.errorcode() != 0 || !resp.errormsg().empty()) {
203-
_error = util::Error(resp.errorcode(), resp.errormsg(), util::ErrorCode::MYSQLEXEC);
204-
_setError(ccontrol::MSG_RESULT_ERROR, _error.getMsg());
205-
LOGS(_log, LOG_LVL_ERROR,
206-
"MergingHandler::" << __func__ << " error from worker:" << resp.wname() << " error: " << _error);
207-
// This way we can track if the worker has reported this error. The current implementation
208-
// requires the large result size to be reported as an error via the InfileMerger regardless
209-
// of an origin of the error (Czar or the worker). Note that large results can be produced
210-
// by the Czar itself, e.g., when the aggregate result of multiple worker queries is too large
211-
// or by the worker when the result set of a single query is too large.
212-
// The error will be reported to the Czar as a part of the response summary.
213-
if (resp.errorcode() == util::ErrorCode::WORKER_RESULT_TOO_LARGE) {
214-
_infileMerger->setResultSizeLimitExceeded();
215-
}
216-
return false;
217-
}
218-
219-
bool const success = _merge(resp, jobQuery);
220-
221-
if (success) {
222-
_infileMerger->mergeCompleteFor(resp.jobid());
223-
qdisp::CzarStats::get()->addTotalRowsRecv(resp.rowcount());
224-
qdisp::CzarStats::get()->addTotalBytesRecv(resp.transmitsize());
225-
}
226-
return success;
227-
}
228-
229182
void MergingHandler::errorFlush(std::string const& msg, int code) {
230183
_setError(code, msg, util::ErrorCode::RESULT_IMPORT);
231184
// Might want more info from result service.
@@ -244,13 +197,8 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
244197
}
245198

246199
if (fileSize == 0) return qdisp::MergeEndStatus(true);
247-
248-
// Read from the http stream and push records into the CSV stream in a separate thread.
249-
// Note the fixed capacity of the stream which allows up to 2 records to be buffered
250-
// in the stream. This is enough to hide the latency of the HTTP connection and
251-
// the time needed to read the file.
252-
auto csvStream = mysql::CsvStream::create(2);
253-
_csvStream = csvStream;
200+
auto csvMemDisk = mysql::CsvMemDisk::create(fileSize, uberJob->getQueryId(), uberJob->getUjId());
201+
_csvMemDisk = csvMemDisk;
254202

255203
// This must be after setting _csvStream to avoid cancelFileMerge()
256204
// race issues, and it needs to be before the thread starts.
@@ -260,46 +208,46 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
260208
}
261209

262210
string fileReadErrorMsg;
263-
thread csvThread([uberJob, csvStream, fileUrl, fileSize, &fileReadErrorMsg]() {
211+
auto transferFunc = [&]() {
264212
size_t bytesRead = 0;
265213
fileReadErrorMsg = ::readHttpFileAndMerge(
266214
uberJob, fileUrl, fileSize,
267-
[uberJob, csvStream, fileSize, &bytesRead](char const* buf, uint32_t size) {
215+
[&](char const* buf, uint32_t size) {
268216
bool last = false;
269217
if (buf == nullptr || size == 0) {
270218
last = true;
271219
} else {
272-
csvStream->push(buf, size);
220+
csvMemDisk->push(buf, size);
273221
bytesRead += size;
274222
last = bytesRead >= fileSize;
275223
}
276224
if (last) {
277-
csvStream->push(nullptr, 0);
225+
csvMemDisk->push(nullptr, 0);
278226
}
279227
},
280228
MergingHandler::_getHttpConnPool());
281229
// Push the stream terminator to indicate the end of the stream.
282230
// It may be needed to unblock the table merger which may be still attempting to read
283231
// from the CSV stream.
284232
if (!fileReadErrorMsg.empty()) {
285-
csvStream->push(nullptr, 0);
233+
csvMemDisk->push(nullptr, 0);
286234
}
287-
});
235+
};
236+
csvMemDisk->transferDataFromWorker(transferFunc);
288237

289238
// Attempt the actual merge.
290-
bool fileMergeSuccess = _infileMerger->mergeHttp(uberJob, fileSize, csvStream);
239+
bool fileMergeSuccess = _infileMerger->mergeHttp(uberJob, fileSize, csvMemDisk);
291240
if (!fileMergeSuccess) {
292241
LOGS(_log, LOG_LVL_WARN, __func__ << " merge failed");
293242
util::Error const& err = _infileMerger->getError();
294243
_setError(ccontrol::MSG_RESULT_ERROR, err.getMsg(), util::ErrorCode::RESULT_IMPORT);
295244
}
296-
if (csvStream->getContaminated()) {
245+
if (csvMemDisk->getContaminated()) {
297246
LOGS(_log, LOG_LVL_ERROR, __func__ << " merge stream contaminated");
298247
fileMergeSuccess = false;
299248
_setError(ccontrol::MSG_RESULT_ERROR, "merge stream contaminated", util::ErrorCode::RESULT_IMPORT);
300249
}
301250

302-
csvThread.join();
303251
if (!fileReadErrorMsg.empty()) {
304252
LOGS(_log, LOG_LVL_WARN, __func__ << " result file read failed");
305253
_setError(ccontrol::MSG_HTTP_RESULT, fileReadErrorMsg, util::ErrorCode::RESULT_IMPORT);
@@ -310,14 +258,14 @@ qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uber
310258
if (!mergeEStatus.success) {
311259
// This error check needs to come after the csvThread.join() to ensure writing
312260
// is finished. If any bytes were written, the result table is ruined.
313-
mergeEStatus.contaminated = csvStream->getBytesWritten() > 0;
261+
mergeEStatus.contaminated = csvMemDisk->getBytesFetched() > 0;
314262
}
315263

316264
return mergeEStatus;
317265
}
318266

319267
void MergingHandler::cancelFileMerge() {
320-
auto csvStrm = _csvStream.lock();
268+
auto csvStrm = _csvMemDisk.lock();
321269
if (csvStrm != nullptr) {
322270
csvStrm->cancel();
323271
}
@@ -343,9 +291,6 @@ qdisp::MergeEndStatus MergingHandler::flushHttp(string const& fileUrl, uint64_t
343291
"MergingHandler::" << __func__ << " uberJob=" << uberJob->getIdStr() << " fileUrl=" << fileUrl);
344292

345293
qdisp::MergeEndStatus mergeStatus = _mergeHttp(uberJob, fileUrl, fileSize);
346-
if (mergeStatus.success) {
347-
_infileMerger->mergeCompleteFor(uberJob->getUjId());
348-
}
349294
return mergeStatus;
350295
}
351296

src/ccontrol/MergingHandler.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class ClientConnPool;
3737
} // namespace lsst::qserv::http
3838

3939
namespace lsst::qserv::mysql {
40-
class CsvStream;
40+
class CsvMemDisk;
4141
} // namespace lsst::qserv::mysql
4242

4343
namespace lsst::qserv::qdisp {
@@ -91,20 +91,21 @@ class MergingHandler : public qdisp::ResponseHandler {
9191
/// Set error code and string.
9292
void _setError(int code, std::string const& msg, int errorState);
9393

94-
/// Check if the query is no longer active.
95-
/// This is used to prevent the query from being processed after it has been cancelled
96-
/// or finished for any reason.
97-
/// @param jobQuery the query to check
98-
/// @return true if the query is no longer active
99-
bool _queryIsNoLongerActive(std::shared_ptr<qdisp::JobQuery> const& jobQuery) const;
94+
// All instances of the HTTP client class are members of the same pool. This allows
95+
// connection reuse and a significant reduction of the kernel memory pressure.
96+
// Note that the pool gets instantiated at the very first call to method _getHttpConnPool()
97+
// because the instantiation depends on the availability of the Czar configuration.
98+
static std::shared_ptr<http::ClientConnPool> const& _getHttpConnPool();
99+
static std::shared_ptr<http::ClientConnPool> _httpConnPool;
100+
static std::mutex _httpConnPoolMutex;
100101

101102
std::shared_ptr<rproc::InfileMerger> _infileMerger; ///< Merging delegate
102103
std::atomic<bool> _errorSet{false}; ///< Set to true when an error is set.
103104
bool _flushed{false}; ///< flushed to InfileMerger?
104105
std::string _wName{"~"}; ///< worker name
105106

106-
std::weak_ptr<qdisp::Executive> _executive; ///< Weak pointer to the executive for errors.
107-
std::weak_ptr<mysql::CsvStream> _csvStream; ///< Weak pointer to cancel infile merge.
107+
std::weak_ptr<qdisp::Executive> _executive; ///< Weak pointer to the executive for errors.
108+
std::weak_ptr<mysql::CsvMemDisk> _csvMemDisk; ///< Weak pointer to cancel infile merge.
108109
};
109110

110111
} // namespace lsst::qserv::ccontrol

src/ccontrol/UserQueryFactory.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ std::shared_ptr<UserQuery> _makeUserQueryProcessList(query::SelectStmt::Ptr& stm
135135
LOGS(_log, LOG_LVL_DEBUG, "SELECT query is a PROCESSLIST");
136136
try {
137137
return std::make_shared<UserQueryProcessList>(stmt, sharedResources->qMetaSelect,
138-
sharedResources->qMetaCzarId, userQueryId, resultDb);
138+
sharedResources->czarId, userQueryId, resultDb);
139139
} catch (std::exception const& exc) {
140140
return std::make_shared<UserQueryInvalid>(exc.what());
141141
}
@@ -297,6 +297,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
297297
}
298298
auto stmt = parser->getSelectStmt();
299299

300+
std::lock_guard focatoryLock(_factoryMtx);
300301
// handle special database/table names
301302
if (_stmtRefersToProcessListTable(stmt, defaultDb)) {
302303
return _makeUserQueryProcessList(stmt, _userQuerySharedResources, userQueryId, resultDb, aQuery,
@@ -331,7 +332,6 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
331332
}
332333

333334
// This is a regular SELECT for qserv
334-
335335
// Currently using the database for results to get schema information.
336336
auto qs = std::make_shared<qproc::QuerySession>(_userQuerySharedResources->css,
337337
_userQuerySharedResources->databaseModels, defaultDb,
@@ -372,7 +372,8 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
372372
qs, messageStore, executive, _userQuerySharedResources->databaseModels, infileMergerConfig,
373373
_userQuerySharedResources->secondaryIndex, _userQuerySharedResources->queryMetadata,
374374
_userQuerySharedResources->queryProgress, _userQuerySharedResources->czarId, errorExtra,
375-
async, resultDb);
375+
async, resultDb, uberJobMaxChunks);
376+
376377
if (sessionValid) {
377378
uq->qMetaRegister(resultLocation, msgTableName);
378379
uq->setupMerger();
@@ -381,11 +382,13 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
381382
}
382383
return uq;
383384
} else if (UserQueryType::isSelectResult(query, userJobId)) {
385+
std::lock_guard factoryLock(_factoryMtx);
384386
auto uq = std::make_shared<UserQueryAsyncResult>(userJobId, _userQuerySharedResources->czarId,
385387
_userQuerySharedResources->queryMetadata);
386388
LOGS(_log, LOG_LVL_DEBUG, "make UserQueryAsyncResult: userJobId=" << userJobId);
387389
return uq;
388390
} else if (UserQueryType::isShowProcessList(query, full)) {
391+
std::lock_guard factoryLock(_factoryMtx);
389392
LOGS(_log, LOG_LVL_DEBUG, "make UserQueryProcessList: full=" << (full ? 'y' : 'n'));
390393
try {
391394
return std::make_shared<UserQueryProcessList>(full, _userQuerySharedResources->qMetaSelect,
@@ -395,6 +398,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
395398
return std::make_shared<UserQueryInvalid>(exc.what());
396399
}
397400
} else if (UserQueryType::isCall(query)) {
401+
std::lock_guard factoryLock(_factoryMtx);
398402
auto parser = std::make_shared<ParseRunner>(
399403
query, _userQuerySharedResources->makeUserQueryResources(userQueryId, resultDb));
400404
return parser->getUserQuery();
@@ -406,6 +410,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
406410
return std::make_shared<UserQueryInvalid>(std::string("ParseException:") + e.what());
407411
}
408412
auto uq = parser->getUserQuery();
413+
std::lock_guard factoryLock(_factoryMtx);
409414
auto setQuery = std::static_pointer_cast<UserQuerySet>(uq);
410415
if (setQuery->varName() == "QSERV_ROW_COUNTER_OPTIMIZATION") {
411416
_useQservRowCounterOptimization = setQuery->varValue() != "0";
@@ -417,6 +422,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st
417422
}
418423
return uq;
419424
} else {
425+
std::lock_guard factoryLock(_factoryMtx);
420426
// something that we don't recognize
421427
auto uq = std::make_shared<UserQueryInvalid>("Invalid or unsupported query: " + query);
422428
return uq;

0 commit comments

Comments
 (0)