Skip to content

Commit c4c7499

Browse files
trondndaverigby
authored andcommitted
MB-43622: Fix race between DCP Open and scheduleDcpStep
Previously there was a possibility of a race for DCP connections by the fact that DCP Open would create the handler but it wouldn't set the state for the connection to DCP until the method returned. In the mean time the cookie was available for DCP to try to schedule a step. This change sets the connection up as a DCP connection as part of creating the the ConnHandler object. Unfortunately we can't reserve the cookie as part of the constructor without a massive change working through all of the unit tests as they don't clean up the reference by calling release. To work around the race condition we'll add an extra ref count to the cookie before calling DCP Open Change-Id: Ifa9b87b984af6be53934c8100e1a8d584c423c13 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/143492 Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent 4df733f commit c4c7499

File tree

7 files changed

+53
-24
lines changed

7 files changed

+53
-24
lines changed

daemon/connection.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,6 @@ nlohmann::json Connection::toJSON() const {
214214
return ret;
215215
}
216216

217-
void Connection::setDCP(bool enable) {
218-
dcp = enable;
219-
}
220-
221217
void Connection::restartAuthentication() {
222218
if (authenticated && user.domain == cb::sasl::Domain::External) {
223219
externalAuthManager->logoff(user.name);

daemon/connection.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,9 @@ class Connection : public DcpMessageProducersIface {
330330
void setTerminationReason(std::string reason);
331331

332332
bool isDCP() const {
333-
return dcp;
333+
return dcpConnHandlerIface.load() != nullptr;
334334
}
335335

336-
void setDCP(bool enable);
337-
338336
bool isDcpXattrAware() const {
339337
return dcpXattrAware;
340338
}
@@ -964,9 +962,6 @@ class Connection : public DcpMessageProducersIface {
964962
/// The reason why the session was terminated
965963
std::string terminationReason;
966964

967-
/** Is this connection used by a DCP connection? */
968-
bool dcp = false;
969-
970965
/** Is this DCP channel XAttrAware */
971966
bool dcpXattrAware = false;
972967

daemon/cookie.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ class Cookie : public cb::tracing::Traceable {
694694
/// Previously reserve would lock the connection, but with OOO we
695695
/// might have multiple cookies in flight and needs to be able to
696696
/// lock them independently
697-
uint8_t refcount = 0;
697+
std::atomic<uint8_t> refcount = 0;
698698

699699
/// see isAuthorized/setAuthorized
700700
bool authorized = false;

daemon/protocol/mcbp/dcp_open_executor.cc

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,42 @@ void dcp_open_executor(Cookie& cookie) {
4949
auto key = request.getKey();
5050
auto value = request.getValue();
5151

52-
ret = dcpOpen(
53-
cookie,
54-
request.getOpaque(),
55-
payload->getSeqno(),
56-
flags,
57-
{reinterpret_cast<const char*>(key.data()), key.size()},
58-
{reinterpret_cast<const char*>(value.data()),
59-
value.size()});
52+
// MB-43622 There is a race condition in the creation and
53+
// notification of the DCP connections. Initially
54+
// I tried to reserve the cookie from the constructor
55+
// of the ConnHandler, but that caused a ton of problems
56+
// in the unit tests as they didn't explicitly release
57+
// the reference (and trying to clean up all of that was
58+
// a lot of work). In addition to that we could end up
59+
// with a memory allocation failure trying to insert
60+
// the new cookie in the connection array which would
61+
// also make it hard to figure out when to release
62+
// the reference (and the engine is not allowed to call
63+
// release from a workerthread as it tries to reschedule
64+
// the cookie). The workaround for used is to bump
65+
// the refcount before calling DCP Open so that the
66+
// the checks in scheduleDcpStep can see that the ref
67+
// count is correct if we get a notification before
68+
// this thread call reserve.
69+
cookie.incrementRefcount();
70+
try {
71+
ret = dcpOpen(
72+
cookie,
73+
request.getOpaque(),
74+
payload->getSeqno(),
75+
flags,
76+
{reinterpret_cast<const char*>(key.data()), key.size()},
77+
{reinterpret_cast<const char*>(value.data()),
78+
value.size()});
79+
} catch (const std::exception& e) {
80+
LOG_WARNING(
81+
"{}: Received an exception as part DCP Open: {}, "
82+
"disconnect client",
83+
connection.getId(),
84+
e.what());
85+
ret = ENGINE_DISCONNECT;
86+
}
87+
cookie.decrementRefcount();
6088
}
6189
}
6290

@@ -74,9 +102,15 @@ void dcp_open_executor(Cookie& cookie) {
74102
connection.setDcpDeletedUserXattr(dcpDeletedUserXattr);
75103
connection.setDcpNoValue(dcpNoValue);
76104
connection.setDcpDeleteTimeEnabled(dcpDeleteTimes);
77-
connection.setDCP(true);
78105
connection.disableSaslAuth();
79106

107+
if (!connection.getDcpConnHandlerIface()) {
108+
throw std::logic_error(
109+
"dcp_open_executor(): The underlying engine returned "
110+
"success but did not set up a DCP connection handler "
111+
"interface");
112+
}
113+
80114
// String buffer with max length = total length of all possible contents
81115
std::string logBuffer;
82116

engines/ep/src/connhandler.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ ConnHandler::ConnHandler(EventuallyPersistentEngine& e,
5858
logger = BucketLogger::createBucketLogger(
5959
std::to_string(reinterpret_cast<uintptr_t>(this)));
6060

61-
auto connId = e.getServerApi()->cookie->get_log_info(c).first;
61+
auto* cookie_api = e.getServerApi()->cookie;
62+
cookie_api->setDcpConnHandler(c, this);
63+
auto connId = cookie_api->get_log_info(c).first;
6264
logger->setConnectionId(connId);
6365
}
6466

engines/ep/src/ep_engine.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6195,8 +6195,6 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::dcpOpen(
61956195

61966196
// Success creating dcp object which has stored the cookie, now reserve it.
61976197
reserveCookie(cookie);
6198-
setDcpConnHandler(cookie, handler);
6199-
62006198
return ENGINE_SUCCESS;
62016199
}
62026200

engines/ewouldblock_engine/ewouldblock_engine.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ class BlockMonitorThread : public Couchbase::Thread {
166166
};
167167

168168
/** ewouldblock_engine class */
169-
class EWB_Engine : public EngineIface, public DcpIface {
169+
class EWB_Engine : public EngineIface,
170+
public DcpIface,
171+
public DcpConnHandlerIface {
170172
private:
171173
enum class Cmd {
172174
NONE,
@@ -1387,6 +1389,8 @@ ENGINE_ERROR_CODE EWB_Engine::open(gsl::not_null<const void*> cookie,
13871389
dcp_stream[cookie] =
13881390
std::make_pair(false, std::numeric_limits<uint64_t>::max());
13891391
}
1392+
1393+
real_api->cookie->setDcpConnHandler(cookie, this);
13901394
return ENGINE_SUCCESS;
13911395
}
13921396

0 commit comments

Comments
 (0)