Skip to content

Commit 2c9934c

Browse files
committed
Re-fail perm-failed HTLCs on startup in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In a previous commit we handled the case of claimed HTLCs by replaying payment preimages on startup to avoid `MonitorEvent` loss causing us to miss an HTLC claim. Here we handle the HTLC-failed case similarly. Unlike with HTLC claims via preimage, we don't already have replay logic in `ChannelManager` startup, but its easy enough to add one. Luckily, we already track when an HTLC reaches permanently-failed state in `ChannelMonitor` (i.e. it has `ANTI_REORG_DELAY` confirmations on-chain on the failing transaction), so all we need to do is add the ability to query for that and fail them on `ChannelManager` startup.
1 parent 9b604ab commit 2c9934c

File tree

3 files changed

+389
-9
lines changed

3 files changed

+389
-9
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3034,6 +3034,125 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
30343034
res
30353035
}
30363036

3037+
/// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via
3038+
/// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine
3039+
/// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted.
3040+
pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap<HTLCSource, PaymentHash> {
3041+
let mut res = new_hash_map();
3042+
let us = self.inner.lock().unwrap();
3043+
3044+
// We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment
3045+
// transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions
3046+
// to have been confirmed.
3047+
let confirmed_txid = us.funding_spend_confirmed.or_else(|| {
3048+
us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| {
3049+
if let OnchainEvent::FundingSpendConfirmation { .. } = event.event {
3050+
if event.height <= us.best_block.height - ANTI_REORG_DELAY + 1 {
3051+
Some(event.txid)
3052+
} else {
3053+
None
3054+
}
3055+
} else {
3056+
None
3057+
}
3058+
})
3059+
});
3060+
3061+
let confirmed_txid = if let Some(txid) = confirmed_txid {
3062+
txid
3063+
} else {
3064+
return res;
3065+
};
3066+
3067+
macro_rules! walk_htlcs {
3068+
($holder_commitment: expr, $htlc_iter: expr) => {
3069+
let mut walk_candidate_htlcs = |htlcs| {
3070+
for &(ref candidate_htlc, ref candidate_source) in htlcs {
3071+
let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc;
3072+
let candidate_source: &Option<Box<HTLCSource>> = &candidate_source;
3073+
3074+
let source: &HTLCSource = if let Some(source) = candidate_source {
3075+
source
3076+
} else {
3077+
continue;
3078+
};
3079+
let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src);
3080+
if let Some((confirmed_htlc, _)) = confirmed {
3081+
let filter = |v: &&IrrevocablyResolvedHTLC| {
3082+
v.commitment_tx_output_idx
3083+
== confirmed_htlc.transaction_output_index
3084+
};
3085+
3086+
// The HTLC was included in the confirmed commitment transaction, so we
3087+
// need to see if it has been irrevocably failed yet.
3088+
if confirmed_htlc.transaction_output_index.is_none() {
3089+
// Dust HTLCs are always implicitly failed once the commitment
3090+
// transaction reaches ANTI_REORG_DELAY confirmations.
3091+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3092+
} else if let Some(state) =
3093+
us.htlcs_resolved_on_chain.iter().filter(filter).next()
3094+
{
3095+
if state.payment_preimage.is_none() {
3096+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3097+
}
3098+
}
3099+
} else {
3100+
// The HTLC was not included in the confirmed commitment transaction,
3101+
// which has now reached ANTI_REORG_DELAY confirmations and thus the
3102+
// HTLC has been failed.
3103+
res.insert(source.clone(), candidate_htlc.payment_hash);
3104+
}
3105+
}
3106+
};
3107+
3108+
// We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see
3109+
// `fail_unbroadcast_htlcs` for a description of why).
3110+
if let Some(ref txid) = us.funding.current_counterparty_commitment_txid {
3111+
if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) {
3112+
walk_candidate_htlcs(htlcs);
3113+
}
3114+
}
3115+
if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid {
3116+
if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) {
3117+
walk_candidate_htlcs(htlcs);
3118+
}
3119+
}
3120+
};
3121+
}
3122+
3123+
let funding = get_confirmed_funding_scope!(us);
3124+
3125+
if Some(confirmed_txid) == funding.current_counterparty_commitment_txid
3126+
|| Some(confirmed_txid) == funding.prev_counterparty_commitment_txid
3127+
{
3128+
let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap();
3129+
walk_htlcs!(
3130+
false,
3131+
htlcs.iter().filter_map(|(a, b)| {
3132+
if let &Some(ref source) = b {
3133+
Some((a, Some(&**source)))
3134+
} else {
3135+
None
3136+
}
3137+
})
3138+
);
3139+
} else if confirmed_txid == funding.current_holder_commitment_tx.trust().txid() {
3140+
walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES));
3141+
} else if let Some(prev_commitment_tx) = &funding.prev_holder_commitment_tx {
3142+
if confirmed_txid == prev_commitment_tx.trust().txid() {
3143+
walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap());
3144+
} else {
3145+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3146+
walk_htlcs!(false, htlcs_confirmed.iter());
3147+
}
3148+
} else {
3149+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3150+
walk_htlcs!(false, htlcs_confirmed.iter());
3151+
}
3152+
3153+
res
3154+
}
3155+
30373156
/// Gets the set of outbound HTLCs which are pending resolution in this channel or which were
30383157
/// resolved with a preimage from our counterparty.
30393158
///

lightning/src/ln/channelmanager.rs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15804,7 +15804,7 @@ where
1580415804
log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.",
1580515805
&channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number());
1580615806
}
15807-
let mut shutdown_result =
15807+
let shutdown_result =
1580815808
channel.force_shutdown(ClosureReason::OutdatedChannelManager);
1580915809
if shutdown_result.unbroadcasted_batch_funding_txid.is_some() {
1581015810
return Err(DecodeError::InvalidValue);
@@ -15836,7 +15836,10 @@ where
1583615836
},
1583715837
);
1583815838
}
15839-
failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs);
15839+
for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs {
15840+
let reason = LocalHTLCFailureReason::ChannelClosed;
15841+
failed_htlcs.push((source, hash, cp_id, chan_id, reason));
15842+
}
1584015843
channel_closures.push_back((
1584115844
events::Event::ChannelClosed {
1584215845
channel_id: channel.context.channel_id(),
@@ -15878,6 +15881,7 @@ where
1587815881
*payment_hash,
1587915882
channel.context.get_counterparty_node_id(),
1588015883
channel.context.channel_id(),
15884+
LocalHTLCFailureReason::ChannelClosed,
1588115885
));
1588215886
}
1588315887
}
@@ -16602,6 +16606,20 @@ where
1660216606
},
1660316607
}
1660416608
}
16609+
for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() {
16610+
log_info!(
16611+
args.logger,
16612+
"Failing HTLC with payment hash {} as it was resolved on-chain.",
16613+
payment_hash
16614+
);
16615+
failed_htlcs.push((
16616+
htlc_source,
16617+
payment_hash,
16618+
monitor.get_counterparty_node_id(),
16619+
monitor.channel_id(),
16620+
LocalHTLCFailureReason::OnChainTimeout,
16621+
));
16622+
}
1660516623
}
1660616624

1660716625
// Whether the downstream channel was closed or not, try to re-apply any payment
@@ -17282,13 +17300,10 @@ where
1728217300
}
1728317301
}
1728417302

17285-
for htlc_source in failed_htlcs.drain(..) {
17286-
let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source;
17287-
let failure_reason = LocalHTLCFailureReason::ChannelClosed;
17288-
let receiver = HTLCHandlingFailureType::Forward {
17289-
node_id: Some(counterparty_node_id),
17290-
channel_id,
17291-
};
17303+
for htlc_source in failed_htlcs {
17304+
let (source, payment_hash, counterparty_id, channel_id, failure_reason) = htlc_source;
17305+
let receiver =
17306+
HTLCHandlingFailureType::Forward { node_id: Some(counterparty_id), channel_id };
1729217307
let reason = HTLCFailReason::from_failure_code(failure_reason);
1729317308
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
1729417309
}

0 commit comments

Comments
 (0)