@@ -2,75 +2,61 @@ use std::sync::atomic::Ordering;
2
2
use std:: time:: Duration ;
3
3
use std:: { io, thread} ;
4
4
5
- use hashbrown:: HashMap ;
6
- use http:: Uri ;
7
- use magnetic:: Consumer ;
8
- use magnetic:: buffer:: dynamic:: DynamicBufferP2 ;
9
5
use thiserror:: Error ;
10
6
11
7
use crate :: SHUTDOWN ;
12
- use crate :: crawler:: types:: {
13
- Command , CommandSender , Config , LocalId , Status , StatusReceiver , WorkerId ,
14
- } ;
8
+ use crate :: crawler:: types:: { Command , CommandSender , RequestCrawlReceiver } ;
15
9
use crate :: crawler:: worker:: { Worker , WorkerError } ;
16
- use crate :: types:: { MessageSender , RequestCrawlReceiver } ;
10
+ use crate :: types:: MessageSender ;
17
11
18
12
const CAPACITY : usize = 1024 ;
19
13
const SLEEP : Duration = Duration :: from_millis ( 10 ) ;
20
14
21
15
#[ derive( Debug , Error ) ]
22
16
pub enum ManagerError {
23
17
#[ error( "spawn error: {0}" ) ]
24
- SpawnError ( #[ from] io:: Error ) ,
18
+ Spawn ( #[ from] io:: Error ) ,
25
19
#[ error( "worker error: {0}" ) ]
26
- WorkerError ( #[ from] WorkerError ) ,
20
+ Worker ( #[ from] WorkerError ) ,
27
21
#[ error( "rtrb error: {0}" ) ]
28
- PushError ( #[ from] rtrb:: PushError < Command > ) ,
22
+ Push ( #[ from] Box < rtrb:: PushError < Command > > ) ,
29
23
#[ error( "join error" ) ]
30
- JoinError ,
24
+ Join ,
25
+ }
26
+
27
+ impl From < rtrb:: PushError < Command > > for ManagerError {
28
+ fn from ( value : rtrb:: PushError < Command > ) -> Self {
29
+ Box :: new ( value) . into ( )
30
+ }
31
31
}
32
32
33
33
#[ derive( Debug ) ]
34
34
struct WorkerHandle {
35
- pub configs : Vec < Config > ,
36
35
pub command_tx : CommandSender ,
37
36
pub thread_handle : thread:: JoinHandle < Result < ( ) , WorkerError > > ,
38
37
}
39
38
40
39
pub struct Manager {
41
40
workers : Box < [ WorkerHandle ] > ,
42
- next_id : WorkerId ,
43
- configs : HashMap < Uri , Config > ,
44
- status_rx : StatusReceiver ,
41
+ next_id : usize ,
45
42
request_crawl_rx : RequestCrawlReceiver ,
46
43
}
47
44
48
45
impl Manager {
49
46
pub fn new (
50
- n_workers : usize , message_tx : MessageSender , request_crawl_rx : RequestCrawlReceiver ,
47
+ n_workers : usize , message_tx : & MessageSender , request_crawl_rx : RequestCrawlReceiver ,
51
48
) -> Result < Self , ManagerError > {
52
- let ( status_tx, status_rx) =
53
- magnetic:: mpsc:: mpsc_queue ( DynamicBufferP2 :: new ( CAPACITY ) . unwrap ( ) ) ;
54
49
let workers = ( 0 ..n_workers)
55
50
. map ( |worker_id| -> Result < _ , ManagerError > {
56
51
let message_tx = message_tx. clone ( ) ;
57
- let status_tx = status_tx. clone ( ) ;
58
52
let ( command_tx, command_rx) = rtrb:: RingBuffer :: new ( CAPACITY ) ;
59
53
let thread_handle = thread:: Builder :: new ( )
60
54
. name ( format ! ( "rsky-crawl-{worker_id}" ) )
61
- . spawn ( move || {
62
- Worker :: new ( WorkerId ( worker_id) , message_tx, status_tx, command_rx) . run ( )
63
- } ) ?;
64
- Ok ( WorkerHandle { configs : Vec :: new ( ) , command_tx, thread_handle } )
55
+ . spawn ( move || Worker :: new ( worker_id, message_tx, command_rx) ?. run ( ) ) ?;
56
+ Ok ( WorkerHandle { command_tx, thread_handle } )
65
57
} )
66
58
. collect :: < Result < Vec < _ > , _ > > ( ) ?;
67
- Ok ( Self {
68
- workers : workers. into_boxed_slice ( ) ,
69
- next_id : WorkerId ( 0 ) ,
70
- configs : HashMap :: new ( ) ,
71
- status_rx,
72
- request_crawl_rx,
73
- } )
59
+ Ok ( Self { workers : workers. into_boxed_slice ( ) , next_id : 0 , request_crawl_rx } )
74
60
}
75
61
76
62
pub fn run ( mut self ) -> Result < ( ) , ManagerError > {
@@ -87,40 +73,21 @@ impl Manager {
87
73
worker. command_tx . push ( Command :: Shutdown ) ?;
88
74
}
89
75
for ( id, worker) in self . workers . into_iter ( ) . enumerate ( ) {
90
- if let Err ( err) = worker. thread_handle . join ( ) . map_err ( |_| ManagerError :: JoinError ) ? {
76
+ if let Err ( err) = worker. thread_handle . join ( ) . map_err ( |_| ManagerError :: Join ) ? {
91
77
tracing:: warn!( "crawler worker {id} error: {err}" ) ;
92
78
}
93
79
}
94
80
Ok ( ( ) )
95
81
}
96
82
97
- fn handle_status ( & mut self , _status : Status ) -> Result < bool , ManagerError > {
98
- Ok ( true )
99
- }
100
-
101
83
fn update ( & mut self ) -> Result < bool , ManagerError > {
102
84
if SHUTDOWN . load ( Ordering :: Relaxed ) {
103
85
return Ok ( false ) ;
104
86
}
105
87
106
- if let Ok ( status) = self . status_rx . try_pop ( ) {
107
- if !self . handle_status ( status) ? {
108
- return Ok ( false ) ;
109
- }
110
- }
111
-
112
88
if let Ok ( request_crawl) = self . request_crawl_rx . pop ( ) {
113
- if !self . configs . contains_key ( & request_crawl. uri ) {
114
- let config = Config {
115
- uri : request_crawl. uri . clone ( ) ,
116
- hostname : request_crawl. hostname . clone ( ) ,
117
- worker_id : self . next_id ,
118
- local_id : LocalId ( self . workers [ self . next_id . 0 ] . configs . len ( ) ) ,
119
- } ;
120
- self . next_id = WorkerId ( ( self . next_id . 0 + 1 ) % self . workers . len ( ) ) ;
121
- self . configs . insert ( request_crawl. uri , config. clone ( ) ) ;
122
- self . workers [ config. worker_id . 0 ] . command_tx . push ( Command :: Connect ( config) ) . unwrap ( ) ;
123
- }
89
+ self . workers [ self . next_id ] . command_tx . push ( Command :: Connect ( request_crawl) ) ?;
90
+ self . next_id = ( self . next_id + 1 ) % self . workers . len ( ) ;
124
91
}
125
92
126
93
Ok ( true )
0 commit comments