diff --git a/src/server/main_service.cc b/src/server/main_service.cc
index 20efa1a4a4ec..44926ffebaf7 100644
--- a/src/server/main_service.cc
+++ b/src/server/main_service.cc
@@ -1435,6 +1435,7 @@ size_t Service::DispatchManyCommands(absl::Span<CmdArgList> args_list, SinkReply
     MultiCommandSquasher::Opts opts;
     opts.verify_commands = true;
     opts.max_squash_size = ss->max_squash_cmd_num;
+    opts.is_mult_non_atomic = true;
 
     size_t squashed_num = MultiCommandSquasher::Execute(absl::MakeSpan(stored_cmds),
                                                         static_cast<RedisReplyBuilder*>(builder),
diff --git a/src/server/multi_command_squasher.cc b/src/server/multi_command_squasher.cc
index 0a9369f3fcb3..1ea877e241c0 100644
--- a/src/server/multi_command_squasher.cc
+++ b/src/server/multi_command_squasher.cc
@@ -6,6 +6,7 @@
 
 #include <absl/container/inlined_vector.h>
 
+#include "base/flags.h"
 #include "base/logging.h"
 #include "core/overloaded.h"
 #include "facade/dragonfly_connection.h"
@@ -15,6 +16,10 @@
 #include "server/transaction.h"
 #include "server/tx_base.h"
 
+ABSL_FLAG(size_t, squashed_reply_size_limit, 0,
+          "Max bytes allowed for squashing_current_reply_size. If this limit is reached, "
+          "connections dispatching via pipelines will block until this value is decremented.");
+
 namespace dfly {
 
 using namespace std;
@@ -63,6 +68,9 @@ size_t Size(const facade::CapturingReplyBuilder::Payload& payload) {
 }  // namespace
 
 atomic_uint64_t MultiCommandSquasher::current_reply_size_ = 0;
+thread_local size_t MultiCommandSquasher::reply_size_limit_ =
+    absl::GetFlag(FLAGS_squashed_reply_size_limit);
+util::fb2::EventCount MultiCommandSquasher::ec_;
 
 MultiCommandSquasher::MultiCommandSquasher(absl::Span<StoredCmd> cmds, ConnectionContext* cntx,
                                            Service* service, const Opts& opts)
@@ -208,6 +216,15 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
   if (order_.empty())
     return true;
 
+  // Multi non atomic does not lock ahead. So it's safe to preempt while we haven't
+  // really started the transaction.
+  // This is not true for `multi/exec` which uses `Execute()` but locks ahead before it
+  // calls `ScheduleSingleHop` below.
+  // TODO Investigate what are the side effects for allowing it `lock ahead` mode.
+  if (opts_.is_mult_non_atomic) {
+    MultiCommandSquasher::ec_.await([]() { return !MultiCommandSquasher::IsReplySizeOverLimit(); });
+  }
+
   unsigned num_shards = 0;
   for (auto& sd : sharded_) {
     if (!sd.dispatched.empty())
@@ -246,6 +263,7 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
   uint64_t after_hop = proactor->GetMonotonicTimeNs();
   bool aborted = false;
 
+  size_t size = 0;
   for (auto idx : order_) {
     auto& sinfo = sharded_[idx];
     DCHECK_LT(sinfo.reply_id, sinfo.dispatched.size());
@@ -258,6 +276,9 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
     if (aborted)
       break;
   }
+  current_reply_size_.fetch_sub(size, std::memory_order_relaxed);
+  MultiCommandSquasher::ec_.notifyAll();
+
   uint64_t after_reply = proactor->GetMonotonicTimeNs();
   ServerState::SafeTLocal()->stats.multi_squash_exec_hop_usec += (after_hop - start) / 1000;
   ServerState::SafeTLocal()->stats.multi_squash_exec_reply_usec += (after_reply - after_hop) / 1000;
diff --git a/src/server/multi_command_squasher.h b/src/server/multi_command_squasher.h
index 9aea56880359..638f1b166fa9 100644
--- a/src/server/multi_command_squasher.h
+++ b/src/server/multi_command_squasher.h
@@ -7,6 +7,7 @@
 #include "facade/reply_capture.h"
 #include "server/conn_context.h"
 #include "server/main_service.h"
+#include "util/fibers/synchronization.h"
 
 namespace dfly {
 
@@ -23,11 +24,12 @@ namespace dfly {
 class MultiCommandSquasher {
  public:
   struct Opts {
-    bool verify_commands = false;   // Whether commands need to be verified before execution
-    bool error_abort = false;       // Abort upon receiving error
+    bool verify_commands = false;  // Whether commands need to be verified before execution
+    bool error_abort = false;      // Abort upon receiving error
+    // If MultiCommandSquasher was used from a pipeline and not from multi/exec block
+    bool is_mult_non_atomic = false;
     unsigned max_squash_size = 32;  // How many commands to squash at once
   };
-
   // Returns number of processed commands.
   static size_t Execute(absl::Span<StoredCmd> cmds, facade::RedisReplyBuilder* rb,
                         ConnectionContext* cntx, Service* service, const Opts& opts) {
@@ -38,6 +40,14 @@ class MultiCommandSquasher {
     return current_reply_size_.load(std::memory_order_relaxed);
   }
 
+  static bool IsReplySizeOverLimit() {
+    const bool over_limit = reply_size_limit_ > 0 &&
+                            current_reply_size_.load(std::memory_order_relaxed) > reply_size_limit_;
+    VLOG_IF(2, over_limit) << "MultiCommandSquasher overlimit: " << reply_size_limit_
+                           << " current reply size " << current_reply_size_;
+    return over_limit;
+  }
+
  private:
   // Per-shard execution info.
   struct ShardExecInfo {
@@ -97,6 +107,10 @@ class MultiCommandSquasher {
 
   // we increase size in one thread and decrease in another
   static atomic_uint64_t current_reply_size_;
+  // Used to throttle when memory is tight
+  static util::fb2::EventCount ec_;
+
+  static thread_local size_t reply_size_limit_;
 };
 
 }  // namespace dfly
diff --git a/tests/dragonfly/memory_test.py b/tests/dragonfly/memory_test.py
index d871371dfb01..655a7400f072 100644
--- a/tests/dragonfly/memory_test.py
+++ b/tests/dragonfly/memory_test.py
@@ -1,4 +1,5 @@
 import pytest
+import asyncio
 from redis import asyncio as aioredis
 from .utility import *
 import logging
@@ -222,3 +223,38 @@ async def test_cache_eviction_with_rss_deny_oom(
         )
         stats_info = await async_client.info("stats")
         logging.info(f'Current evicted: {stats_info["evicted_keys"]}. Total keys: {num_keys}.')
+
+
+@pytest.mark.asyncio
+async def test_throttle_on_commands_squashing_replies_bytes(df_factory: DflyInstanceFactory):
+    df = df_factory.create(
+        proactor_threads=2,
+        squashed_reply_size_limit=500_000_000,
+        vmodule="multi_command_squasher=2",
+    )
+    df.start()
+
+    client = df.client()
+    # 0.5gb
+    await client.execute_command("debug populate 64 test 3125 rand type hash elements 500")
+
+    async def poll():
+        # At any point we should not cross this limit
+        assert df.rss < 1_500_000_000
+        cl = df.client()
+        pipe = cl.pipeline(transaction=False)
+        for i in range(64):
+            pipe.execute_command(f"hgetall test:{i}")
+
+        await pipe.execute()
+
+    tasks = []
+    for i in range(20):
+        tasks.append(asyncio.create_task(poll()))
+
+    for task in tasks:
+        await task
+
+    df.stop()
+    found = df.find_in_logs("MultiCommandSquasher overlimit: ")
+    assert len(found) > 0