1212#include " base/logging.h"
1313#include " cluster_family.h"
1414#include " cluster_utility.h"
15- #include " facade/resp_expr.h"
1615#include " server/cluster/cluster_defs.h"
17- #include " server/cluster/incoming_slot_migration.h"
18- #include " server/common.h"
1916#include " server/db_slice.h"
2017#include " server/engine_shard_set.h"
2118#include " server/error.h"
@@ -40,8 +37,7 @@ class OutgoingMigration::SliceSlotMigration : private ProtocolClient {
4037 SliceSlotMigration (DbSlice* slice, ServerContext server_context, SlotSet slots,
4138 journal::Journal* journal, OutgoingMigration* om)
4239 : ProtocolClient(server_context), streamer_(slice, std::move(slots), journal, &exec_st_) {
43- exec_st_.SwitchErrorHandler (
44- [om](auto ge) { om->Finish (MigrationState::C_ERROR, std::move (ge)); });
40+ exec_st_.SwitchErrorHandler ([om](auto ge) { om->Finish (std::move (ge)); });
4541 }
4642
4743 ~SliceSlotMigration () {
@@ -143,8 +139,14 @@ void OutgoingMigration::OnAllShards(
143139 });
144140}
145141
146- void OutgoingMigration::Finish (MigrationState next_state, GenericError error) {
142+ void OutgoingMigration::Finish (GenericError error) {
143+ auto next_state = MigrationState::C_FINISHED;
147144 if (error) {
145+ if (error.Format () == kIncomingMigrationOOM ) {
146+ next_state = MigrationState::C_FATAL;
147+ } else {
148+ next_state = MigrationState::C_ERROR;
149+ }
148150 LOG (WARNING) << " Finish outgoing migration for " << cf_->MyID () << " : "
149151 << migration_info_.node_info .id << " with error: " << error.Format ();
150152 exec_st_.ReportError (std::move (error));
@@ -225,6 +227,15 @@ void OutgoingMigration::SyncFb() {
225227 continue ;
226228 }
227229
230+ // Break outgoing migration if INIT from incoming node responded with OOM. Usually this will
231+ // happen on second iteration after first failed with OOM. Sending second INIT is required to
232+ // cleanup slots on incoming slot migration node.
233+ if (CheckRespFirstTypes ({RespExpr::ERROR}) &&
234+ facade::ToSV (LastResponseArgs ().front ().GetBuf ()) == kIncomingMigrationOOM ) {
235+ ChangeState (MigrationState::C_FATAL);
236+ break ;
237+ }
238+
228239 if (!CheckRespIsSimpleReply (" OK" )) {
229240 if (CheckRespIsSimpleReply (kUnknownMigration )) {
230241 const absl::Duration passed = absl::Now () - start_time;
@@ -275,20 +286,16 @@ void OutgoingMigration::SyncFb() {
275286 }
276287
277288 long attempt = 0 ;
278- bool fatal_state = false ;
279289 while (GetState () != MigrationState::C_FINISHED && !FinalizeMigration (++attempt)) {
280- // Don 't sleep if we ended up in FATAL state
290+ // Break loop and don 't sleep in case of C_FATAL
281291 if (GetState () == MigrationState::C_FATAL) {
282- fatal_state = true ;
283292 break ;
284- } else {
285- // Process commands that were on pause and try again
286- VLOG (1 ) << " Waiting for migration to finalize..." ;
287- ThisFiber::SleepFor (500ms);
288293 }
294+ // Process commands that were on pause and try again
295+ VLOG (1 ) << " Waiting for migration to finalize..." ;
296+ ThisFiber::SleepFor (500ms);
289297 }
290- // End outgoing slot migration if we are FINISHED or are in FATAL state
291- if (!exec_st_.IsRunning () && !fatal_state) {
298+ if (!exec_st_.IsRunning ()) {
292299 continue ;
293300 }
294301 break ;
@@ -371,13 +378,7 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
371378 auto error = facade::ToSV (LastResponseArgs ().front ().GetBuf ());
372379 LOG (WARNING) << " Error response for " << cf_->MyID () << " : " << migration_info_.node_info .id
373380 << " attempt " << attempt << " msg: " << error;
374- auto next_state = MigrationState::C_ERROR;
375- // Check if there is OOM response from incoming slot migration
376- if (error == IncomingSlotMigration::kMigrationOOM ) {
377- SetLastError (GenericError (IncomingSlotMigration::kMigrationOOM ));
378- next_state = MigrationState::C_FATAL;
379- }
380- Finish (next_state, std::string (error));
381+ Finish (std::string (error));
381382 return false ;
382383 }
383384
@@ -397,7 +398,7 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
397398 }
398399
399400 if (!exec_st_.GetError ()) {
400- Finish (MigrationState::C_FINISHED );
401+ Finish ();
401402 keys_number_ = cluster::GetKeyCount (migration_info_.slot_ranges );
402403 cf_->ApplyMigrationSlotRangeToConfig (migration_info_.node_info .id , migration_info_.slot_ranges ,
403404 false );
0 commit comments