Skip to content

Commit b9800e5

Browse files
authored
Merge pull request #911 from lsst/tickets/DM-49661
Tickets/dm 49661
2 parents 4680b1f + 378cad9 commit b9800e5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+742
-358
lines changed

admin/tools/docker/base/Dockerfile

+19-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ RUN dnf install -y 'dnf-command(config-manager)' \
3737
glib2-devel \
3838
glibc-langpack-en \
3939
java-devel \
40-
jemalloc \
4140
libcurl-devel \
4241
libevent-devel \
4342
libtool \
@@ -53,6 +52,7 @@ RUN dnf install -y 'dnf-command(config-manager)' \
5352
protobuf-devel \
5453
python3.12 \
5554
python3.12-devel \
55+
jemalloc \
5656
tree \
5757
vim \
5858
zip \
@@ -73,6 +73,17 @@ RUN dnf update -y \
7373
RUN curl -s "https://cmake.org/files/v3.31/cmake-3.31.5-linux-x86_64.tar.gz" \
7474
| tar --strip-components=1 -xz -C /usr/local
7575

76+
RUN cd /tmp \
77+
&& git clone https://github.com/jemalloc/jemalloc \
78+
&& cd jemalloc \
79+
&& git checkout tags/5.3.0 \
80+
&& ./autogen.sh \
81+
&& ./configure --enable-prof \
82+
&& make -j8 \
83+
&& make install \
84+
&& cd /tmp \
85+
&& rm -rf jemalloc
86+
7687
RUN cd /tmp \
7788
&& git clone https://github.com/apache/logging-log4cxx \
7889
&& cd logging-log4cxx \
@@ -127,6 +138,9 @@ RUN cd /tmp \
127138
&& git clone https://github.com/xrootd/xrootd.git \
128139
&& cd xrootd \
129140
&& git checkout tags/v5.6.2 \
141+
&& git config --global user.email "[email protected]" \
142+
&& git config --global user.name "Qserv" \
143+
&& git cherry-pick d85915a3927261e49859c3e13075bce1dfefcbe4 \
130144
&& mkdir build \
131145
&& cd build \
132146
&& cmake -DENABLE_PYTHON=off .. \
@@ -220,7 +234,6 @@ RUN dnf install -y 'dnf-command(config-manager)' \
220234
boost-thread \
221235
glib2 \
222236
glibc-langpack-en \
223-
jemalloc \
224237
libevent \
225238
libuuid \
226239
lua5.1 \
@@ -262,6 +275,10 @@ RUN mkdir -p /qserv/data && \
262275
RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
263276
ENV PYTHONPATH "${PYTHONPATH}:/usr/local/python"
264277

278+
COPY --from=lite-build /usr/local/bin/jemalloc* /usr/local/bin/
279+
COPY --from=lite-build /usr/local/lib/pkgconfig/jemalloc.pc /usr/local/lib/pkgconfig/
280+
COPY --from=lite-build /usr/local/lib/libjemalloc* /usr/local/lib
281+
265282
COPY --from=lite-build /usr/local/lib64/liblog4cxx.so /usr/local/lib64/
266283
COPY --from=lite-build /usr/local/lib/libantlr4-runtime.so /usr/local/lib/
267284
COPY --from=lite-build /usr/local/bin/mysql-proxy /usr/local/bin/

src/CMakeLists.txt

+1-56
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ find_package(Protobuf REQUIRED)
2121
find_package(pybind11 REQUIRED)
2222
find_package(Threads REQUIRED)
2323
find_package(XRootD REQUIRED)
24-
find_package(httplib REQUIRED)
24+
#&&& find_package(httplib REQUIRED)
2525
find_package(aws-c-auth REQUIRED)
2626
find_package(aws-c-cal REQUIRED)
2727
find_package(aws-c-common REQUIRED)
@@ -94,58 +94,3 @@ add_subdirectory(xrdsvc)
9494

9595
#-----------------------------------------------------------------------------
9696

97-
add_library(qserv_common SHARED)
98-
99-
target_link_libraries(qserv_common PUBLIC
100-
global
101-
memman
102-
proto
103-
mysql
104-
sql
105-
util
106-
protojson
107-
)
108-
109-
install(
110-
TARGETS qserv_common
111-
)
112-
113-
#-----------------------------------------------------------------------------
114-
115-
add_library(xrdsvc SHARED)
116-
117-
target_link_libraries(xrdsvc PUBLIC
118-
wbase
119-
wcontrol
120-
wconfig
121-
wdb
122-
wpublish
123-
wsched
124-
qserv_xrdsvc
125-
qserv_common
126-
)
127-
128-
install(
129-
TARGETS xrdsvc
130-
)
131-
132-
#-----------------------------------------------------------------------------
133-
134-
add_library(qserv_czar SHARED)
135-
136-
target_link_libraries(qserv_czar PUBLIC
137-
ccontrol
138-
czar
139-
parser
140-
qana
141-
query
142-
qdisp
143-
qproc
144-
rproc
145-
qserv_css
146-
qserv_meta
147-
)
148-
149-
install(
150-
TARGETS qserv_czar
151-
)

src/cconfig/CMakeLists.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
add_library(cconfig OBJECT)
1+
add_library(cconfig SHARED)
22

33
target_sources(cconfig PRIVATE
44
CzarConfig.cc
55
)
66

7-
target_include_directories(cconfig PRIVATE
8-
${XROOTD_INCLUDE_DIRS}
9-
)
107

118
target_link_libraries(cconfig PUBLIC
129
log
13-
XrdSsiLib
10+
)
11+
12+
install(
13+
TARGETS cconfig
1414
)

src/cconfig/CzarConfig.cc

-15
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
#include <stdexcept>
2929

3030
// Third party headers
31-
#include "XrdSsi/XrdSsiLogger.hh"
3231

3332
// LSST headers
3433
#include "lsst/log/Log.h"
@@ -42,20 +41,6 @@ namespace {
4241

4342
LOG_LOGGER _log = LOG_GET("lsst.qserv.cconfig.CzarConfig");
4443

45-
void QservLogger(struct timeval const& mtime, unsigned long tID, const char* msg, int mlen) {
46-
static log4cxx::spi::LocationInfo xrdLoc(
47-
"client", log4cxx::spi::LocationInfo::calcShortFileName("client"), "<xrdssi>", 0);
48-
static LOG_LOGGER myLog = LOG_GET("lsst.qserv.xrdssi.msgs");
49-
50-
if (myLog.isInfoEnabled()) {
51-
while (mlen && msg[mlen - 1] == '\n') --mlen; // strip all trailing newlines
52-
std::string theMsg(msg, mlen);
53-
lsst::log::Log::MDC("LWP", std::to_string(tID));
54-
myLog.logMsg(log4cxx::Level::getInfo(), xrdLoc, theMsg);
55-
}
56-
}
57-
58-
bool dummy = XrdSsiLogger::SetMCB(QservLogger, XrdSsiLogger::mcbClient);
5944
} // namespace
6045

6146
namespace lsst::qserv::cconfig {

src/ccontrol/CMakeLists.txt

+12-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
add_library(ccontrol OBJECT)
1+
add_library(ccontrol SHARED)
22
add_dependencies(ccontrol proto)
33

44
target_include_directories(ccontrol PRIVATE
55
${ANTLR4_INCLUDE_DIR}
6-
${XROOTD_INCLUDE_DIRS}
76
)
87

98
target_sources(ccontrol PRIVATE
@@ -29,11 +28,15 @@ target_sources(ccontrol PRIVATE
2928
target_link_libraries(ccontrol PUBLIC
3029
boost_regex
3130
cconfig
31+
css
3232
log
3333
parser
34-
replica
34+
proto
3535
sphgeom
36-
XrdCl
36+
)
37+
38+
install(
39+
TARGETS ccontrol
3740
)
3841

3942
FUNCTION(ccontrol_tests)
@@ -47,8 +50,8 @@ FUNCTION(ccontrol_tests)
4750
qana
4851
qdisp
4952
qproc
50-
qserv_css
51-
qserv_meta
53+
css
54+
qmeta
5255
query
5356
rproc
5457
Boost::unit_test_framework
@@ -63,3 +66,6 @@ ccontrol_tests(
6366
testCControl
6467
testUserQueryType
6568
)
69+
70+
# set_tests_properties(testCControl PROPERTIES WILL_FAIL 1)
71+

src/ccontrol/MergingHandler.cc

+14-13
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333

3434
// Third-party headers
3535
#include "curl/curl.h"
36-
#include "XrdCl/XrdClFile.hh"
3736

3837
// LSST headers
3938
#include "lsst/log/Log.h"
@@ -49,6 +48,7 @@
4948
#include "proto/ProtoHeaderWrap.h"
5049
#include "proto/worker.pb.h"
5150
#include "qdisp/CzarStats.h"
51+
#include "qdisp/Executive.h"
5252
#include "qdisp/JobQuery.h"
5353
#include "qdisp/UberJob.h"
5454
#include "rproc/InfileMerger.h"
@@ -273,13 +273,14 @@ shared_ptr<http::ClientConnPool> const& MergingHandler::_getHttpConnPool() {
273273
return _httpConnPool;
274274
}
275275

276-
MergingHandler::MergingHandler(std::shared_ptr<rproc::InfileMerger> merger, std::string const& tableName)
277-
: _infileMerger{merger}, _tableName{tableName} {}
276+
MergingHandler::MergingHandler(std::shared_ptr<rproc::InfileMerger> const& merger,
277+
std::shared_ptr<qdisp::Executive> const& exec)
278+
: _infileMerger(merger), _executive(exec) {}
278279

279-
MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_TRACE, __func__ << " " << _tableName); }
280+
MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_TRACE, __func__); }
280281

281282
void MergingHandler::errorFlush(std::string const& msg, int code) {
282-
_setError(code, msg);
283+
_setError(code, msg, util::ErrorCode::RESULT_IMPORT);
283284
// Might want more info from result service.
284285
// Do something about the error. FIXME.
285286
LOGS(_log, LOG_LVL_ERROR, "Error receiving result.");
@@ -293,7 +294,7 @@ void MergingHandler::prepScrubResults(int jobId, int attemptCount) {
293294
}
294295

295296
std::ostream& MergingHandler::print(std::ostream& os) const {
296-
return os << "MergingRequester(" << _tableName << ", flushed=" << (_flushed ? "true)" : "false)");
297+
return os << "MergingRequester(flushed=" << (_flushed ? "true)" : "false)");
297298
}
298299

299300
bool MergingHandler::_mergeHttp(shared_ptr<qdisp::UberJob> const& uberJob,
@@ -305,15 +306,16 @@ bool MergingHandler::_mergeHttp(shared_ptr<qdisp::UberJob> const& uberJob,
305306
if (!success) {
306307
LOGS(_log, LOG_LVL_WARN, __func__ << " failed");
307308
util::Error const& err = _infileMerger->getError();
308-
_setError(ccontrol::MSG_RESULT_ERROR, err.getMsg());
309+
_setError(ccontrol::MSG_RESULT_ERROR, err.getMsg(), util::ErrorCode::RESULT_IMPORT);
309310
}
310311
return success;
311312
}
312313

313-
void MergingHandler::_setError(int code, std::string const& msg) {
314+
void MergingHandler::_setError(int code, std::string const& msg, int errorState) {
314315
LOGS(_log, LOG_LVL_DEBUG, "_setError: code: " << code << ", message: " << msg);
315-
std::lock_guard<std::mutex> lock(_errorMutex);
316-
_error = Error(code, msg);
316+
auto exec = _executive.lock();
317+
if (exec == nullptr) return;
318+
exec->addMultiError(code, msg, errorState);
317319
}
318320

319321
tuple<bool, bool> MergingHandler::flushHttp(string const& fileUrl, uint64_t expectedRows,
@@ -363,10 +365,9 @@ tuple<bool, bool> MergingHandler::flushHttp(string const& fileUrl, uint64_t expe
363365
return {success, shouldCancel};
364366
}
365367

366-
void MergingHandler::flushHttpError(int errorCode, std::string const& errorMsg, int status) {
368+
void MergingHandler::flushHttpError(int errorCode, std::string const& errorMsg, int errState) {
367369
if (!_errorSet.exchange(true)) {
368-
_error = util::Error(errorCode, errorMsg, util::ErrorCode::MYSQLEXEC);
369-
_setError(ccontrol::MSG_RESULT_ERROR, _error.getMsg());
370+
_setError(errorCode, errorMsg, errState);
370371
}
371372
}
372373

src/ccontrol/MergingHandler.h

+7-13
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class ResponseSummary;
4343
} // namespace lsst::qserv::proto
4444

4545
namespace lsst::qserv::qdisp {
46+
class Executive;
4647
class JobQuery;
4748
class UberJob;
4849
} // namespace lsst::qserv::qdisp
@@ -68,29 +69,23 @@ class MergingHandler : public qdisp::ResponseHandler {
6869
virtual ~MergingHandler();
6970

7071
/// @param merger downstream merge acceptor
71-
/// @param tableName target table for incoming data
72-
MergingHandler(std::shared_ptr<rproc::InfileMerger> merger, std::string const& tableName);
72+
MergingHandler(std::shared_ptr<rproc::InfileMerger> const& merger,
73+
std::shared_ptr<qdisp::Executive> const& exec);
7374

7475
/// @see ResponseHandler::flushHttp
7576
/// @see MerginHandler::_mergeHttp
7677
std::tuple<bool, bool> flushHttp(std::string const& fileUrl, uint64_t expectedRows,
7778
uint64_t& resultRows) override;
7879

7980
/// @see ResponseHandler::flushHttpError
80-
void flushHttpError(int errorCode, std::string const& errorMsg, int status) override;
81+
void flushHttpError(int errorCode, std::string const& errorMsg, int errState) override;
8182

8283
/// Signal an unrecoverable error condition. No further calls are expected.
8384
void errorFlush(std::string const& msg, int code) override;
8485

8586
/// Print a string representation of the receiver to an ostream
8687
std::ostream& print(std::ostream& os) const override;
8788

88-
/// @return an error code and description
89-
Error getError() const override {
90-
std::lock_guard<std::mutex> lock(_errorMutex);
91-
return _error;
92-
}
93-
9489
/// Prepare to scrub the results from jobId-attempt from the result table.
9590
void prepScrubResults(int jobId, int attempt) override;
9691

@@ -99,7 +94,7 @@ class MergingHandler : public qdisp::ResponseHandler {
9994
bool _mergeHttp(std::shared_ptr<qdisp::UberJob> const& uberJob, proto::ResponseData const& responseData);
10095

10196
/// Set error code and string.
102-
void _setError(int code, std::string const& msg);
97+
void _setError(int code, std::string const& msg, int errorState);
10398

10499
// All instances of the HTTP client class are members of the same pool. This allows
105100
// connection reuse and a significant reduction of the kernel memory pressure.
@@ -110,12 +105,11 @@ class MergingHandler : public qdisp::ResponseHandler {
110105
static std::mutex _httpConnPoolMutex;
111106

112107
std::shared_ptr<rproc::InfileMerger> _infileMerger; ///< Merging delegate
113-
std::string _tableName; ///< Target table name
114-
Error _error; ///< Error description
115108
std::atomic<bool> _errorSet{false}; ///< Set to true when an error is set.
116-
mutable std::mutex _errorMutex; ///< Protect readers from partial updates
117109
bool _flushed{false}; ///< flushed to InfileMerger?
118110
std::string _wName{"~"}; ///< worker name
111+
112+
std::weak_ptr<qdisp::Executive> _executive; ///< Weak pointer to the executive for errors.
119113
};
120114

121115
} // namespace lsst::qserv::ccontrol

0 commit comments

Comments
 (0)