lightningdevkit · TheBlueMatt · Jul 14, 2025
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -4208,13 +4208,17 @@ fn test_glacial_peer_cant_hang() {
 	do_test_glacial_peer_cant_hang(true);
 }
 
-#[test]
-fn test_partial_claim_mon_update_compl_actions() {
+fn do_test_partial_claim_mon_update_compl_actions(reload_a: bool, reload_b: bool) {
 	// Test that if we have an MPP claim that we ensure the preimage for the claim is retained in
 	// all the `ChannelMonitor`s until the preimage reaches every `ChannelMonitor` for a channel
 	// which was a part of the MPP.
 	let chanmon_cfgs = create_chanmon_cfgs(4);
 	let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
+
+	let (persister, persister_2, persister_3);
+	let (new_chain_mon, new_chain_mon_2, new_chain_mon_3);
+	let (nodes_3_reload, nodes_3_reload_2, nodes_3_reload_3);
+
 	let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
 	let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);
 
@@ -4243,6 +4247,8 @@ fn test_partial_claim_mon_update_compl_actions() {
 	let paths = &[&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]][..]];
 	send_along_route_with_secret(&nodes[0], route, paths, 200_000, payment_hash, payment_secret);
 
+	// Store the monitor for channel 4 without the preimage to use on reload
+	let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
 	// Claim along both paths, but only complete one of the two monitor updates.
 	chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
 	chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
@@ -4254,7 +4260,13 @@ fn test_partial_claim_mon_update_compl_actions() {
 	// Complete the 1<->3 monitor update and play the commitment_signed dance forward until it
 	// blocks.
 	nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_3_id);
-	expect_payment_claimed!(&nodes[3], payment_hash, 200_000);
+	let payment_claimed = nodes[3].node.get_and_clear_pending_events();
+	assert_eq!(payment_claimed.len(), 1, "{payment_claimed:?}");
+	if let Event::PaymentClaimed { payment_hash: ev_hash, .. } = &payment_claimed[0] {
+		assert_eq!(*ev_hash, payment_hash);
+	} else {
+		panic!("{payment_claimed:?}");
+	}
 	let updates = get_htlc_update_msgs(&nodes[3], &node_b_id);
 
 	nodes[1].node.handle_update_fulfill_htlc(node_d_id, &updates.update_fulfill_htlcs[0]);
@@ -4273,15 +4285,41 @@ fn test_partial_claim_mon_update_compl_actions() {
 	check_added_monitors(&nodes[3], 0);
 	assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
 
+	if reload_a {
+		// After a reload (with the monitor not yet fully updated), the RAA should still be blocked
+		// waiting until the monitor update completes.
+		let node_ser = nodes[3].node.encode();
+		let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
+		let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
+		reload_node!(nodes[3], &node_ser, mons, persister, new_chain_mon, nodes_3_reload);
+		// The final update to channel 4 should be replayed.
+		persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+		assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
+		check_added_monitors(&nodes[3], 1);
+
+		// Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on
+		// restart.
+		let second_payment_claimed = nodes[3].node.get_and_clear_pending_events();
+		assert_eq!(payment_claimed, second_payment_claimed);
+
+		nodes[1].node.peer_disconnected(node_d_id);
+		nodes[2].node.peer_disconnected(node_d_id);
+		reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
+		reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));
+
+		assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
+	}
+
 	// Now double-check that the preimage is still in the 1<->3 channel and complete the pending
 	// monitor update, allowing node 3 to claim the payment on the 2<->3 channel. This also
 	// unblocks the 1<->3 channel, allowing node 3 to release the two blocked monitor updates and
 	// respond to the final commitment_signed.
 	assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));
+	assert!(nodes[3].node.get_and_clear_pending_events().is_empty());
 
 	nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_4_id);
 	let mut ds_msgs = nodes[3].node.get_and_clear_pending_msg_events();
-	assert_eq!(ds_msgs.len(), 2);
+	assert_eq!(ds_msgs.len(), 2, "{ds_msgs:?}");
 	check_added_monitors(&nodes[3], 2);
 
 	match remove_first_msg_event_to_node(&node_b_id, &mut ds_msgs) {
@@ -4324,13 +4362,86 @@ fn test_partial_claim_mon_update_compl_actions() {
 	assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));
 	assert!(get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
 
+	if reload_b {
+		// Ensure that the channel pause logic doesn't accidentally get restarted after a second
+		// reload once the HTLCs for the first payment have been removed and the monitors
+		// completed.
+		let node_ser = nodes[3].node.encode();
+		let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
+		let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
+		let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
+		reload_node!(nodes[3], &node_ser, mons, persister_2, new_chain_mon_2, nodes_3_reload_2);
+		check_added_monitors(&nodes[3], 0);
+
+		nodes[1].node.peer_disconnected(node_d_id);
+		nodes[2].node.peer_disconnected(node_d_id);
+		reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
+		reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));
+
+		assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
+
+		// Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on
+		// restart.
+		let third_payment_claimed = nodes[3].node.get_and_clear_pending_events();
+		assert_eq!(payment_claimed, third_payment_claimed);
+	}
+
 	send_payment(&nodes[1], &[&nodes[3]], 100_000);
 	assert!(!get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));
 
+	if reload_b {
+		// Ensure that the channel pause logic doesn't accidentally get restarted after a second
+		// reload once the HTLCs for the first payment have been removed and the monitors
+		// completed, even if only one of the two monitors still knows about the first payment.
+		let node_ser = nodes[3].node.encode();
+		let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
+		let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
+		let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
+		reload_node!(nodes[3], &node_ser, mons, persister_3, new_chain_mon_3, nodes_3_reload_3);
+		check_added_monitors(&nodes[3], 0);
+
+		nodes[1].node.peer_disconnected(node_d_id);
+		nodes[2].node.peer_disconnected(node_d_id);
+		reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
+		reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));
+
+		assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
+
+		// Because the HTLCs aren't yet cleared, the PaymentClaimed events for both payments will
+		// be replayed on restart.
+		// Use this as an opportunity to check the payment_ids are unique.
+		let mut events = nodes[3].node.get_and_clear_pending_events();
+		assert_eq!(events.len(), 2);
+		events.retain(|ev| *ev != payment_claimed[0]);
+		assert_eq!(events.len(), 1);
+		if let Event::PaymentClaimed { payment_id: original_payment_id, .. } = &payment_claimed[0] {
+			assert!(original_payment_id.is_some());
+			if let Event::PaymentClaimed { amount_msat, payment_id, .. } = &events[0] {
+				assert!(payment_id.is_some());
+				assert_ne!(original_payment_id, payment_id);
+				assert_eq!(*amount_msat, 100_000);
+			} else {
+				panic!("{events:?}");
+			}
+		} else {
+			panic!("{events:?}");
+		}
+
+		send_payment(&nodes[1], &[&nodes[3]], 100_000);
+	}
+
 	send_payment(&nodes[2], &[&nodes[3]], 100_000);
 	assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
 }
 
+#[test]
+fn test_partial_claim_mon_update_compl_actions() {
+	do_test_partial_claim_mon_update_compl_actions(true, true);
+	do_test_partial_claim_mon_update_compl_actions(true, false);
+	do_test_partial_claim_mon_update_compl_actions(false, true);
+	do_test_partial_claim_mon_update_compl_actions(false, false);
+}
+
 #[test]
 fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() {
 	// One of the last features for async persistence we implemented was the correct blocking of

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
@@ -8079,25 +8079,42 @@ where
 								// payment claim from a `ChannelMonitor`. In some cases (MPP or
 								// if the HTLC was only recently removed) we make such claims
 								// after an HTLC has been removed from a channel entirely, and
-								// thus the RAA blocker has long since completed.
+								// thus the RAA blocker may have long since completed.
 								//
-								// In any other case, the RAA blocker must still be present and
-								// blocking RAAs.
-								debug_assert!(
-									during_init
-										|| peer_state
-											.actions_blocking_raa_monitor_updates
-											.get(&chan_id)
-											.unwrap()
-											.contains(&raa_blocker)
-								);
+								// However, its possible that the `ChannelMonitorUpdate` containing
+								// the preimage never completed and is still pending. In that case,
+								// we need to re-add the RAA blocker, which we do here. Handling
+								// the post-update action, below, will remove it again.
+								//
+								// In any other case (i.e. not during startup), the RAA blocker
+								// must still be present and blocking RAAs.
+								let actions = &mut peer_state.actions_blocking_raa_monitor_updates;
+								let actions_list = actions.entry(chan_id).or_insert_with(Vec::new);
+								if !actions_list.contains(&raa_blocker) {
+									debug_assert!(during_init);
+									actions_list.push(raa_blocker);
+								}
 							}
 							let action = if let Some(action) = action_opt {
 								action
 							} else {
 								return;
 							};
 
+							// If there are monitor updates in flight, we may be in the case
+							// described above, replaying a claim on startup which needs an RAA
+							// blocker to remain blocked. Thus, in such a case we simply push the
+							// post-update action to the blocked list and move on.
+							let in_flight_mons = peer_state.in_flight_monitor_updates.get(&chan_id);
+							if in_flight_mons.map(|(_, mons)| !mons.is_empty()).unwrap_or(false) {
+								peer_state
+									.monitor_update_blocked_actions
+									.entry(chan_id)
+									.or_insert_with(Vec::new)
+									.push(action);
+								return;
+							}
+
 							mem::drop(peer_state_opt);
 
 							log_trace!(logger, "Completing monitor update completion action for channel {} as claim was redundant: {:?}",