Skip to content

Ensure partial MPP claims continue to blocks channels on restart #3928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 115 additions & 4 deletions lightning/src/ln/chanmon_update_fail_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4208,13 +4208,17 @@ fn test_glacial_peer_cant_hang() {
do_test_glacial_peer_cant_hang(true);
}

#[test]
fn test_partial_claim_mon_update_compl_actions() {
fn do_test_partial_claim_mon_update_compl_actions(reload_a: bool, reload_b: bool) {
// Test that if we have an MPP claim that we ensure the preimage for the claim is retained in
// all the `ChannelMonitor`s until the preimage reaches every `ChannelMonitor` for a channel
// which was a part of the MPP.
let chanmon_cfgs = create_chanmon_cfgs(4);
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);

let (persister, persister_2, persister_3);
let (new_chain_mon, new_chain_mon_2, new_chain_mon_3);
let (nodes_3_reload, nodes_3_reload_2, nodes_3_reload_3);

let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);

Expand Down Expand Up @@ -4243,6 +4247,8 @@ fn test_partial_claim_mon_update_compl_actions() {
let paths = &[&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]][..]];
send_along_route_with_secret(&nodes[0], route, paths, 200_000, payment_hash, payment_secret);

// Store the monitor for channel 4 without the preimage to use on reload
let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
// Claim along both paths, but only complete one of the two monitor updates.
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
Expand All @@ -4254,7 +4260,13 @@ fn test_partial_claim_mon_update_compl_actions() {
// Complete the 1<->3 monitor update and play the commitment_signed dance forward until it
// blocks.
nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_3_id);
expect_payment_claimed!(&nodes[3], payment_hash, 200_000);
let payment_claimed = nodes[3].node.get_and_clear_pending_events();
assert_eq!(payment_claimed.len(), 1, "{payment_claimed:?}");
if let Event::PaymentClaimed { payment_hash: ev_hash, .. } = &payment_claimed[0] {
assert_eq!(*ev_hash, payment_hash);
} else {
panic!("{payment_claimed:?}");
}
let updates = get_htlc_update_msgs(&nodes[3], &node_b_id);

nodes[1].node.handle_update_fulfill_htlc(node_d_id, &updates.update_fulfill_htlcs[0]);
Expand All @@ -4273,15 +4285,41 @@ fn test_partial_claim_mon_update_compl_actions() {
check_added_monitors(&nodes[3], 0);
assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());

if reload_a {
// After a reload (with the monitor not yet fully updated), the RAA should still be blocked
// waiting until the monitor update completes.
let node_ser = nodes[3].node.encode();
let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
reload_node!(nodes[3], &node_ser, mons, persister, new_chain_mon, nodes_3_reload);
// The final update to channel 4 should be replayed.
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
check_added_monitors(&nodes[3], 1);

// Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on
// restart.
let second_payment_claimed = nodes[3].node.get_and_clear_pending_events();
assert_eq!(payment_claimed, second_payment_claimed);

nodes[1].node.peer_disconnected(node_d_id);
nodes[2].node.peer_disconnected(node_d_id);
reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));

assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());
}

// Now double-check that the preimage is still in the 1<->3 channel and complete the pending
// monitor update, allowing node 3 to claim the payment on the 2<->3 channel. This also
// unblocks the 1<->3 channel, allowing node 3 to release the two blocked monitor updates and
// respond to the final commitment_signed.
assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));
assert!(nodes[3].node.get_and_clear_pending_events().is_empty());

nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_4_id);
let mut ds_msgs = nodes[3].node.get_and_clear_pending_msg_events();
assert_eq!(ds_msgs.len(), 2);
assert_eq!(ds_msgs.len(), 2, "{ds_msgs:?}");
check_added_monitors(&nodes[3], 2);

match remove_first_msg_event_to_node(&node_b_id, &mut ds_msgs) {
Expand Down Expand Up @@ -4324,13 +4362,86 @@ fn test_partial_claim_mon_update_compl_actions() {
assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));
assert!(get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));

if reload_b {
// Ensure that the channel pause logic doesn't accidentally get restarted after a second
// reload once the HTLCs for the first payment have been removed and the monitors
// completed.
let node_ser = nodes[3].node.encode();
let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
reload_node!(nodes[3], &node_ser, mons, persister_2, new_chain_mon_2, nodes_3_reload_2);
check_added_monitors(&nodes[3], 0);

nodes[1].node.peer_disconnected(node_d_id);
nodes[2].node.peer_disconnected(node_d_id);
reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));

assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());

// Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on
// restart.
let third_payment_claimed = nodes[3].node.get_and_clear_pending_events();
assert_eq!(payment_claimed, third_payment_claimed);
}

send_payment(&nodes[1], &[&nodes[3]], 100_000);
assert!(!get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash));

if reload_b {
// Ensure that the channel pause logic doesn't accidentally get restarted after a second
// reload once the HTLCs for the first payment have been removed and the monitors
// completed, even if only one of the two monitors still knows about the first payment.
let node_ser = nodes[3].node.encode();
let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode();
let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode();
let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]];
reload_node!(nodes[3], &node_ser, mons, persister_3, new_chain_mon_3, nodes_3_reload_3);
check_added_monitors(&nodes[3], 0);

nodes[1].node.peer_disconnected(node_d_id);
nodes[2].node.peer_disconnected(node_d_id);
reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3]));
reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3]));

assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty());

// Because the HTLCs aren't yet cleared, the PaymentClaimed events for both payments will
// be replayed on restart.
// Use this as an opportunity to check the payment_ids are unique.
let mut events = nodes[3].node.get_and_clear_pending_events();
assert_eq!(events.len(), 2);
events.retain(|ev| *ev != payment_claimed[0]);
assert_eq!(events.len(), 1);
if let Event::PaymentClaimed { payment_id: original_payment_id, .. } = &payment_claimed[0] {
assert!(original_payment_id.is_some());
if let Event::PaymentClaimed { amount_msat, payment_id, .. } = &events[0] {
assert!(payment_id.is_some());
assert_ne!(original_payment_id, payment_id);
assert_eq!(*amount_msat, 100_000);
} else {
panic!("{events:?}");
}
} else {
panic!("{events:?}");
}

send_payment(&nodes[1], &[&nodes[3]], 100_000);
}

send_payment(&nodes[2], &[&nodes[3]], 100_000);
assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
}

#[test]
fn test_partial_claim_mon_update_compl_actions() {
do_test_partial_claim_mon_update_compl_actions(true, true);
do_test_partial_claim_mon_update_compl_actions(true, false);
do_test_partial_claim_mon_update_compl_actions(false, true);
do_test_partial_claim_mon_update_compl_actions(false, false);
}

#[test]
fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() {
// One of the last features for async persistence we implemented was the correct blocking of
Expand Down
39 changes: 28 additions & 11 deletions lightning/src/ln/channelmanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8079,25 +8079,42 @@ where
// payment claim from a `ChannelMonitor`. In some cases (MPP or
// if the HTLC was only recently removed) we make such claims
// after an HTLC has been removed from a channel entirely, and
// thus the RAA blocker has long since completed.
// thus the RAA blocker may have long since completed.
//
// In any other case, the RAA blocker must still be present and
// blocking RAAs.
debug_assert!(
during_init
|| peer_state
.actions_blocking_raa_monitor_updates
.get(&chan_id)
.unwrap()
.contains(&raa_blocker)
);
// However, its possible that the `ChannelMonitorUpdate` containing
// the preimage never completed and is still pending. In that case,
// we need to re-add the RAA blocker, which we do here. Handling
// the post-update action, below, will remove it again.
//
// In any other case (i.e. not during startup), the RAA blocker
// must still be present and blocking RAAs.
let actions = &mut peer_state.actions_blocking_raa_monitor_updates;
let actions_list = actions.entry(chan_id).or_insert_with(Vec::new);
if !actions_list.contains(&raa_blocker) {
debug_assert!(during_init);
actions_list.push(raa_blocker);
}
}
let action = if let Some(action) = action_opt {
action
} else {
return;
};

// If there are monitor updates in flight, we may be in the case
// described above, replaying a claim on startup which needs an RAA
// blocker to remain blocked. Thus, in such a case we simply push the
// post-update action to the blocked list and move on.
let in_flight_mons = peer_state.in_flight_monitor_updates.get(&chan_id);
if in_flight_mons.map(|(_, mons)| !mons.is_empty()).unwrap_or(false) {
peer_state
.monitor_update_blocked_actions
.entry(chan_id)
.or_insert_with(Vec::new)
.push(action);
return;
}

mem::drop(peer_state_opt);

log_trace!(logger, "Completing monitor update completion action for channel {} as claim was redundant: {:?}",
Expand Down
Loading