77#include < absl/cleanup/cleanup.h>
88#include < absl/strings/str_cat.h>
99
10+ #include < utility>
11+
1012#include " base/flags.h"
1113#include " base/logging.h"
1214#include " cluster_utility.h"
15+ #include " server/cluster/cluster_defs.h"
16+ #include " server/common.h"
1317#include " server/error.h"
1418#include " server/journal/executor.h"
1519#include " server/journal/tx_executor.h"
@@ -70,6 +74,22 @@ class ClusterShardMigration {
7074 break ;
7175 }
7276
77+ auto memory_limit_check = [&]() -> bool {
78+ auto used_mem = used_mem_current.load (memory_order_relaxed);
79+ if ((used_mem + tx_data->command .cmd_len ) > max_memory_limit) {
80+ std::string error =
81+ absl::StrCat (" Applying incoming slot data is overflowing max memory limit. Closing." );
82+ cntx->ReportError (error);
83+ in_migration_->ChangeToErrorState (error);
84+ return true ;
85+ }
86+ return false ;
87+ };
88+
89+ if (memory_limit_check ()) {
90+ break ;
91+ }
92+
7393 while (tx_data->opcode == journal::Op::LSN) {
7494 VLOG (2 ) << " Attempt to finalize flow " << source_shard_id_ << " attempt " << tx_data->lsn ;
7595 last_attempt_.store (tx_data->lsn );
@@ -79,6 +99,11 @@ class ClusterShardMigration {
7999 VLOG (1 ) << " Finalized flow " << source_shard_id_;
80100 return ;
81101 }
102+ if (memory_limit_check ()) {
103+ VLOG (2 ) << " Flow finalization " << source_shard_id_
104+ << " canceled due memory limit reached" ;
105+ return ;
106+ }
82107 if (!tx_data->command .cmd_args .empty ()) {
83108 VLOG (1 ) << " Flow finalization failed " << source_shard_id_ << " by "
84109 << tx_data->command .cmd_args [0 ];
@@ -181,6 +206,13 @@ bool IncomingSlotMigration::Join(long attempt) {
181206 return false ;
182207 }
183208
209+ // If any of migration shards reported ERROR (OOM) we can return error
210+ if (GetState () == MigrationState::C_ERROR) {
211+ LOG (WARNING) << " Error in incoming slot migration. Can't join migration for " << source_id_;
212+ ReportError (GenericError (" Error in incoming slot migration." ));
213+ return false ;
214+ }
215+
184216 // if data was sent after LSN, WaitFor() always returns false so to reduce wait time
185217 // we check current state and if WaitFor false but GetLastAttempt() == attempt
186218 // the Join is failed and we can return false
@@ -251,7 +283,9 @@ void IncomingSlotMigration::Init(uint32_t shards_num) {
251283
252284void IncomingSlotMigration::StartFlow (uint32_t shard, util::FiberSocketBase* source) {
253285 shard_flows_[shard]->Start (&cntx_, source);
254- VLOG (1 ) << " Incoming flow " << shard << " finished for " << source_id_;
286+ VLOG (1 ) << " Incoming flow " << shard
287+ << (GetState () == MigrationState::C_ERROR ? " cancelled " : " finished " ) << " for "
288+ << source_id_;
255289}
256290
257291size_t IncomingSlotMigration::GetKeyCount () const {
0 commit comments