Skip to content

Commit cc9db83

Browse files
committed
Re-fail perm-failed HTLCs on startup in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In a previous commit we handled the case of claimed HTLCs by replaying payment preimages on startup to avoid `MonitorEvent` loss causing us to miss an HTLC claim. Here we handle the HTLC-failed case similarly. Unlike with HTLC claims via preimage, we don't already have replay logic in `ChannelManager` startup, but its easy enough to add one. Luckily, we already track when an HTLC reaches permanently-failed state in `ChannelMonitor` (i.e. it has `ANTI_REORG_DELAY` confirmations on-chain on the failing transaction), so all we need to do is add the ability to query for that and fail them on `ChannelManager` startup.
1 parent 18e529f commit cc9db83

File tree

3 files changed

+389
-9
lines changed

3 files changed

+389
-9
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2983,6 +2983,125 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
29832983
res
29842984
}
29852985

2986+
/// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via
2987+
/// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine
2988+
/// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted.
2989+
pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap<HTLCSource, PaymentHash> {
2990+
let mut res = new_hash_map();
2991+
let us = self.inner.lock().unwrap();
2992+
2993+
// We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment
2994+
// transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions
2995+
// to have been confirmed.
2996+
let confirmed_txid = us.funding_spend_confirmed.or_else(|| {
2997+
us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| {
2998+
if let OnchainEvent::FundingSpendConfirmation { .. } = event.event {
2999+
if event.height <= us.best_block.height - ANTI_REORG_DELAY + 1 {
3000+
Some(event.txid)
3001+
} else {
3002+
None
3003+
}
3004+
} else {
3005+
None
3006+
}
3007+
})
3008+
});
3009+
3010+
let confirmed_txid = if let Some(txid) = confirmed_txid {
3011+
txid
3012+
} else {
3013+
return res;
3014+
};
3015+
3016+
macro_rules! walk_htlcs {
3017+
($holder_commitment: expr, $htlc_iter: expr) => {
3018+
let mut walk_candidate_htlcs = |htlcs| {
3019+
for &(ref candidate_htlc, ref candidate_source) in htlcs {
3020+
let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc;
3021+
let candidate_source: &Option<Box<HTLCSource>> = &candidate_source;
3022+
3023+
let source: &HTLCSource = if let Some(source) = candidate_source {
3024+
source
3025+
} else {
3026+
continue;
3027+
};
3028+
let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src);
3029+
if let Some((confirmed_htlc, _)) = confirmed {
3030+
let filter = |v: &&IrrevocablyResolvedHTLC| {
3031+
v.commitment_tx_output_idx
3032+
== confirmed_htlc.transaction_output_index
3033+
};
3034+
3035+
// The HTLC was included in the confirmed commitment transaction, so we
3036+
// need to see if it has been irrevocably failed yet.
3037+
if confirmed_htlc.transaction_output_index.is_none() {
3038+
// Dust HTLCs are always implicitly failed once the commitment
3039+
// transaction reaches ANTI_REORG_DELAY confirmations.
3040+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3041+
} else if let Some(state) =
3042+
us.htlcs_resolved_on_chain.iter().filter(filter).next()
3043+
{
3044+
if state.payment_preimage.is_none() {
3045+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3046+
}
3047+
}
3048+
} else {
3049+
// The HTLC was not included in the confirmed commitment transaction,
3050+
// which has now reached ANTI_REORG_DELAY confirmations and thus the
3051+
// HTLC has been failed.
3052+
res.insert(source.clone(), candidate_htlc.payment_hash);
3053+
}
3054+
}
3055+
};
3056+
3057+
// We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see
3058+
// `fail_unbroadcast_htlcs` for a description of why).
3059+
if let Some(ref txid) = us.funding.current_counterparty_commitment_txid {
3060+
if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) {
3061+
walk_candidate_htlcs(htlcs);
3062+
}
3063+
}
3064+
if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid {
3065+
if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) {
3066+
walk_candidate_htlcs(htlcs);
3067+
}
3068+
}
3069+
};
3070+
}
3071+
3072+
let funding = get_confirmed_funding_scope!(us);
3073+
3074+
if Some(confirmed_txid) == funding.current_counterparty_commitment_txid
3075+
|| Some(confirmed_txid) == funding.prev_counterparty_commitment_txid
3076+
{
3077+
let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap();
3078+
walk_htlcs!(
3079+
false,
3080+
htlcs.iter().filter_map(|(a, b)| {
3081+
if let &Some(ref source) = b {
3082+
Some((a, Some(&**source)))
3083+
} else {
3084+
None
3085+
}
3086+
})
3087+
);
3088+
} else if confirmed_txid == funding.current_holder_commitment_tx.trust().txid() {
3089+
walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES));
3090+
} else if let Some(prev_commitment_tx) = &funding.prev_holder_commitment_tx {
3091+
if confirmed_txid == prev_commitment_tx.trust().txid() {
3092+
walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap());
3093+
} else {
3094+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3095+
walk_htlcs!(false, htlcs_confirmed.iter());
3096+
}
3097+
} else {
3098+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3099+
walk_htlcs!(false, htlcs_confirmed.iter());
3100+
}
3101+
3102+
res
3103+
}
3104+
29863105
/// Gets the set of outbound HTLCs which are pending resolution in this channel or which were
29873106
/// resolved with a preimage from our counterparty.
29883107
///

lightning/src/ln/channelmanager.rs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15739,7 +15739,7 @@ where
1573915739
log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.",
1574015740
&channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number());
1574115741
}
15742-
let mut shutdown_result =
15742+
let shutdown_result =
1574315743
channel.force_shutdown(ClosureReason::OutdatedChannelManager);
1574415744
if shutdown_result.unbroadcasted_batch_funding_txid.is_some() {
1574515745
return Err(DecodeError::InvalidValue);
@@ -15771,7 +15771,10 @@ where
1577115771
},
1577215772
);
1577315773
}
15774-
failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs);
15774+
for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs {
15775+
let reason = LocalHTLCFailureReason::ChannelClosed;
15776+
failed_htlcs.push((source, hash, cp_id, chan_id, reason));
15777+
}
1577515778
channel_closures.push_back((
1577615779
events::Event::ChannelClosed {
1577715780
channel_id: channel.context.channel_id(),
@@ -15813,6 +15816,7 @@ where
1581315816
*payment_hash,
1581415817
channel.context.get_counterparty_node_id(),
1581515818
channel.context.channel_id(),
15819+
LocalHTLCFailureReason::ChannelClosed,
1581615820
));
1581715821
}
1581815822
}
@@ -16537,6 +16541,20 @@ where
1653716541
},
1653816542
}
1653916543
}
16544+
for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() {
16545+
log_info!(
16546+
args.logger,
16547+
"Failing HTLC with payment hash {} as it was resolved on-chain.",
16548+
payment_hash
16549+
);
16550+
failed_htlcs.push((
16551+
htlc_source,
16552+
payment_hash,
16553+
monitor.get_counterparty_node_id(),
16554+
monitor.channel_id(),
16555+
LocalHTLCFailureReason::OnChainTimeout,
16556+
));
16557+
}
1654016558
}
1654116559

1654216560
// Whether the downstream channel was closed or not, try to re-apply any payment
@@ -17217,13 +17235,10 @@ where
1721717235
}
1721817236
}
1721917237

17220-
for htlc_source in failed_htlcs.drain(..) {
17221-
let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source;
17222-
let failure_reason = LocalHTLCFailureReason::ChannelClosed;
17223-
let receiver = HTLCHandlingFailureType::Forward {
17224-
node_id: Some(counterparty_node_id),
17225-
channel_id,
17226-
};
17238+
for htlc_source in failed_htlcs {
17239+
let (source, payment_hash, counterparty_id, channel_id, failure_reason) = htlc_source;
17240+
let receiver =
17241+
HTLCHandlingFailureType::Forward { node_id: Some(counterparty_id), channel_id };
1722717242
let reason = HTLCFailReason::from_failure_code(failure_reason);
1722817243
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
1722917244
}

0 commit comments

Comments
 (0)