diff --git a/src/server/main_service.cc b/src/server/main_service.cc index 20efa1a4a4ec..44926ffebaf7 100644 --- a/src/server/main_service.cc +++ b/src/server/main_service.cc @@ -1435,6 +1435,7 @@ size_t Service::DispatchManyCommands(absl::Span args_list, SinkReply MultiCommandSquasher::Opts opts; opts.verify_commands = true; opts.max_squash_size = ss->max_squash_cmd_num; + opts.is_mult_non_atomic = true; size_t squashed_num = MultiCommandSquasher::Execute(absl::MakeSpan(stored_cmds), static_cast(builder), diff --git a/src/server/multi_command_squasher.cc b/src/server/multi_command_squasher.cc index 0a9369f3fcb3..1ea877e241c0 100644 --- a/src/server/multi_command_squasher.cc +++ b/src/server/multi_command_squasher.cc @@ -6,6 +6,7 @@ #include +#include "base/flags.h" #include "base/logging.h" #include "core/overloaded.h" #include "facade/dragonfly_connection.h" @@ -15,6 +16,10 @@ #include "server/transaction.h" #include "server/tx_base.h" +ABSL_FLAG(size_t, squashed_reply_size_limit, 0, + "Max bytes allowed for squashing_current_reply_size. If this limit is reached, " + "connections dispatching via pipelines will block until this value is decremented."); + namespace dfly { using namespace std; @@ -63,6 +68,9 @@ size_t Size(const facade::CapturingReplyBuilder::Payload& payload) { } // namespace atomic_uint64_t MultiCommandSquasher::current_reply_size_ = 0; +thread_local size_t MultiCommandSquasher::reply_size_limit_ = + absl::GetFlag(FLAGS_squashed_reply_size_limit); +util::fb2::EventCount MultiCommandSquasher::ec_; MultiCommandSquasher::MultiCommandSquasher(absl::Span cmds, ConnectionContext* cntx, Service* service, const Opts& opts) @@ -208,6 +216,15 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) { if (order_.empty()) return true; + // Multi non atomic does not lock ahead. So it's safe to preempt while we haven't + // really started the transaction. + // This is not true for `multi/exec` which uses `Execute()` but locks ahead before it + // calls `ScheduleSingleHop` below. + // TODO Investigate what are the side effects for allowing it `lock ahead` mode. + if (opts_.is_mult_non_atomic) { + MultiCommandSquasher::ec_.await([]() { return !MultiCommandSquasher::IsReplySizeOverLimit(); }); + } + unsigned num_shards = 0; for (auto& sd : sharded_) { if (!sd.dispatched.empty()) @@ -246,6 +263,7 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) { uint64_t after_hop = proactor->GetMonotonicTimeNs(); bool aborted = false; + size_t size = 0; for (auto idx : order_) { auto& sinfo = sharded_[idx]; DCHECK_LT(sinfo.reply_id, sinfo.dispatched.size()); @@ -258,6 +276,9 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) { if (aborted) break; } + current_reply_size_.fetch_sub(size, std::memory_order_relaxed); + MultiCommandSquasher::ec_.notifyAll(); + uint64_t after_reply = proactor->GetMonotonicTimeNs(); ServerState::SafeTLocal()->stats.multi_squash_exec_hop_usec += (after_hop - start) / 1000; ServerState::SafeTLocal()->stats.multi_squash_exec_reply_usec += (after_reply - after_hop) / 1000; diff --git a/src/server/multi_command_squasher.h b/src/server/multi_command_squasher.h index 9aea56880359..638f1b166fa9 100644 --- a/src/server/multi_command_squasher.h +++ b/src/server/multi_command_squasher.h @@ -7,6 +7,7 @@ #include "facade/reply_capture.h" #include "server/conn_context.h" #include "server/main_service.h" +#include "util/fibers/synchronization.h" namespace dfly { @@ -23,11 +24,12 @@ namespace dfly { class MultiCommandSquasher { public: struct Opts { - bool verify_commands = false; // Whether commands need to be verified before execution - bool error_abort = false; // Abort upon receiving error + bool verify_commands = false; // Whether commands need to be verified before execution + bool error_abort = false; // Abort upon receiving error + // If MultiCommandSquasher was used from a pipeline and not from multi/exec block + bool is_mult_non_atomic = false; unsigned max_squash_size = 32; // How many commands to squash at once }; - // Returns number of processed commands. static size_t Execute(absl::Span cmds, facade::RedisReplyBuilder* rb, ConnectionContext* cntx, Service* service, const Opts& opts) { @@ -38,6 +40,14 @@ class MultiCommandSquasher { return current_reply_size_.load(std::memory_order_relaxed); } + static bool IsReplySizeOverLimit() { + const bool over_limit = reply_size_limit_ > 0 && + current_reply_size_.load(std::memory_order_relaxed) > reply_size_limit_; + VLOG_IF(2, over_limit) << "MultiCommandSquasher overlimit: " << reply_size_limit_ + << " current reply size " << current_reply_size_; + return over_limit; + } + private: // Per-shard execution info. struct ShardExecInfo { @@ -97,6 +107,10 @@ class MultiCommandSquasher { // we increase size in one thread and decrease in another static atomic_uint64_t current_reply_size_; + // Used to throttle when memory is tight + static util::fb2::EventCount ec_; + + static thread_local size_t reply_size_limit_; }; } // namespace dfly diff --git a/tests/dragonfly/memory_test.py b/tests/dragonfly/memory_test.py index d871371dfb01..655a7400f072 100644 --- a/tests/dragonfly/memory_test.py +++ b/tests/dragonfly/memory_test.py @@ -1,4 +1,5 @@ import pytest +import asyncio from redis import asyncio as aioredis from .utility import * import logging @@ -222,3 +223,38 @@ async def test_cache_eviction_with_rss_deny_oom( ) stats_info = await async_client.info("stats") logging.info(f'Current evicted: {stats_info["evicted_keys"]}. Total keys: {num_keys}.') + + +@pytest.mark.asyncio +async def test_throttle_on_commands_squashing_replies_bytes(df_factory: DflyInstanceFactory): + df = df_factory.create( + proactor_threads=2, + squashed_reply_size_limit=500_000_000, + vmodule="multi_command_squasher=2", + ) + df.start() + + client = df.client() + # 0.5gb + await client.execute_command("debug populate 64 test 3125 rand type hash elements 500") + + async def poll(): + # At any point we should not cross this limit + assert df.rss < 1_500_000_000 + cl = df.client() + pipe = cl.pipeline(transaction=False) + for i in range(64): + pipe.execute_command(f"hgetall test:{i}") + + await pipe.execute() + + tasks = [] + for i in range(20): + tasks.append(asyncio.create_task(poll())) + + for task in tasks: + await task + + df.stop() + found = df.find_in_logs("MultiCommandSquasher overlimit: ") + assert len(found) > 0