@@ -19,12 +19,13 @@ use futures::future::{ready, BoxFuture, Ready};
1919use futures:: sink:: With ;
2020use futures:: stream:: { FuturesUnordered , Map , Stream } ;
2121use futures:: { pin_mut, FutureExt , Sink , SinkExt , StreamExt } ;
22- use libp2p:: gossipsub:: { SubscriptionError , TopicHash } ;
22+ use libp2p:: gossipsub:: { MessageId , PublishError , SubscriptionError , TopicHash } ;
2323use libp2p:: identity:: Keypair ;
2424use libp2p:: swarm:: SwarmEvent ;
2525use libp2p:: { noise, yamux, Multiaddr , PeerId , StreamProtocol , Swarm , SwarmBuilder } ;
2626use metrics:: NetworkMetrics ;
2727use tokio:: time:: { sleep_until, Instant } ;
28+ use tokio_retry:: strategy:: ExponentialBackoff ;
2829use tracing:: { debug, error, trace, warn} ;
2930
3031use self :: swarm_trait:: SwarmTrait ;
@@ -43,6 +44,20 @@ pub enum NetworkError {
4344 #[ error( "Channels for broadcast topic with hash {topic_hash:?} were dropped." ) ]
4445 BroadcastChannelsDropped { topic_hash : TopicHash } ,
4546}
47+
48+ struct BroadcastDetails {
49+ /// Instant of next broadcast
50+ time : Instant ,
51+ /// The number of broadcast tries preformed
52+ count : u64 ,
53+ /// The message to broadcast
54+ message : Bytes ,
55+ /// The topic to broadcast on
56+ topic : TopicHash ,
57+ /// exponential backoff strategy for broadcasting the next message.
58+ broadcast_retry_strategy : ExponentialBackoff ,
59+ }
60+
4661pub struct GenericNetworkManager < SwarmT : SwarmTrait > {
4762 swarm : SwarmT ,
4863 inbound_protocol_to_buffer_size : HashMap < StreamProtocol , usize > ,
@@ -64,29 +79,33 @@ pub struct GenericNetworkManager<SwarmT: SwarmTrait> {
6479 continue_propagation_receiver : Receiver < BroadcastedMessageMetadata > ,
6580 metrics : Option < NetworkMetrics > ,
6681 next_metrics_update : Instant ,
82+ /// Next message to broadcast
83+ next_broadcast : Option < BroadcastDetails > ,
6784}
6885
6986impl < SwarmT : SwarmTrait > GenericNetworkManager < SwarmT > {
7087 pub async fn run ( mut self ) -> Result < ( ) , NetworkError > {
7188 if let Some ( metrics) = self . metrics . as_ref ( ) {
7289 metrics. register ( ) ;
7390 }
91+
7492 loop {
93+ let should_broadcast = self . next_broadcast . is_some ( ) ;
94+ let broadcast_time =
95+ self . next_broadcast . as_ref ( ) . map ( |x| x. time ) . unwrap_or ( Instant :: now ( ) ) ;
7596 tokio:: select! {
7697 Some ( event) = self . swarm. next( ) => self . handle_swarm_event( event) ?,
7798 Some ( res) = self . sqmr_inbound_response_receivers. next( ) => self . handle_response_for_inbound_query( res) ,
7899 Some ( ( protocol, client_payload) ) = self . sqmr_outbound_payload_receivers. next( ) => {
79100 let protocol = StreamProtocol :: try_from_owned( protocol) . expect( "Invalid protocol should not appear" ) ;
80101 self . handle_local_sqmr_payload( protocol, client_payload. expect( "An SQMR client channel should not be terminated." ) )
81102 }
82- Some ( ( topic_hash, message) ) = self . messages_to_broadcast_receivers. next( ) => {
83- self . broadcast_message(
84- message. ok_or( NetworkError :: BroadcastChannelsDropped {
85- topic_hash: topic_hash. clone( )
86- } ) ?,
87- topic_hash,
88- ) ;
89- }
103+ Some ( ( topic_hash, message) ) = self . messages_to_broadcast_receivers. next( ) , if !should_broadcast => {
104+ self . setup_broadcast( topic_hash, message) ?;
105+ } ,
106+ _ = sleep_until( broadcast_time) , if should_broadcast => {
107+ self . do_broadcast( ) ;
108+ } ,
90109 Some ( Some ( peer_id) ) = self . reported_peer_receivers. next( ) => self . swarm. report_peer_as_malicious( peer_id, MisconductScore :: MALICIOUS ) ,
91110 Some ( peer_id) = self . reported_peers_receiver. next( ) => self . swarm. report_peer_as_malicious( peer_id, MisconductScore :: MALICIOUS ) ,
92111 Some ( broadcasted_message_metadata) = self . continue_propagation_receiver. next( ) => {
@@ -138,6 +157,7 @@ impl<SwarmT: SwarmTrait> GenericNetworkManager<SwarmT> {
138157 continue_propagation_receiver,
139158 metrics,
140159 next_metrics_update : Instant :: now ( ) + Duration :: from_secs ( 1 ) ,
160+ next_broadcast : None ,
141161 }
142162 }
143163
@@ -274,6 +294,60 @@ impl<SwarmT: SwarmTrait> GenericNetworkManager<SwarmT> {
274294 } )
275295 }
276296
297+ fn setup_broadcast (
298+ & mut self ,
299+ topic_hash : TopicHash ,
300+ message : Option < Bytes > ,
301+ ) -> Result < ( ) , NetworkError > {
302+ let message = message
303+ . ok_or ( NetworkError :: BroadcastChannelsDropped { topic_hash : topic_hash. clone ( ) } ) ?;
304+ self . next_broadcast = Some ( BroadcastDetails {
305+ time : Instant :: now ( ) ,
306+ count : 0 ,
307+ message,
308+ topic : topic_hash,
309+ broadcast_retry_strategy : ExponentialBackoff :: from_millis ( 2 )
310+ . max_delay ( Duration :: from_secs ( 1 ) ) ,
311+ } ) ;
312+ Ok ( ( ) )
313+ }
314+
315+ fn do_broadcast ( & mut self ) {
316+ let mut details =
317+ self . next_broadcast . take ( ) . expect ( "Broadcasting when next broadcast is None" ) ;
318+ details. count += 1 ;
319+ match self . broadcast_message ( details. message . clone ( ) , details. topic . clone ( ) ) {
320+ Ok ( _) => { }
321+ Err ( e) => match & e {
322+ PublishError :: Duplicate
323+ | PublishError :: SigningError ( _)
324+ | PublishError :: MessageTooLarge => {
325+ error ! (
326+ "Failed to broadcast message: `{e:?}` after {} tries Dropping message." ,
327+ details. count
328+ ) ;
329+ }
330+ PublishError :: AllQueuesFull ( _)
331+ | PublishError :: NoPeersSubscribedToTopic
332+ | PublishError :: TransformFailed ( _) => {
333+ let wait_duration = details. broadcast_retry_strategy . next ( ) . expect (
334+ "Broadcast retry strategy ended even though it's an infinite iterator." ,
335+ ) ;
336+ warn ! (
337+ "Failed to broadcast message: `{e:?}` after {} tries. Trying again in {} \
338+ milliseconds. Not reading more messages until then (Applying \
339+ backpressure).",
340+ details. count,
341+ wait_duration. as_millis( )
342+ ) ;
343+
344+ details. time = Instant :: now ( ) + wait_duration;
345+ self . next_broadcast = Some ( details)
346+ }
347+ } ,
348+ }
349+ }
350+
277351 fn handle_swarm_event (
278352 & mut self ,
279353 event : SwarmEvent < mixed_behaviour:: Event > ,
@@ -661,7 +735,11 @@ impl<SwarmT: SwarmTrait> GenericNetworkManager<SwarmT> {
661735 . insert ( outbound_session_id, report_receiver) ;
662736 }
663737
664- fn broadcast_message ( & mut self , message : Bytes , topic_hash : TopicHash ) {
738+ fn broadcast_message (
739+ & mut self ,
740+ message : Bytes ,
741+ topic_hash : TopicHash ,
742+ ) -> Result < MessageId , PublishError > {
665743 if let Some ( broadcast_metrics_by_topic) =
666744 self . metrics . as_ref ( ) . and_then ( |metrics| metrics. broadcast_metrics_by_topic . as_ref ( ) )
667745 {
@@ -673,7 +751,7 @@ impl<SwarmT: SwarmTrait> GenericNetworkManager<SwarmT> {
673751 }
674752 }
675753 trace ! ( "Sending broadcast message with topic hash: {topic_hash:?}" ) ;
676- self . swarm . broadcast_message ( message, topic_hash) ;
754+ self . swarm . broadcast_message ( message, topic_hash)
677755 }
678756
679757 fn report_session_removed_to_metrics ( & mut self , session_id : SessionId ) {
0 commit comments