1
+ use std:: sync:: atomic:: Ordering ;
1
2
use std:: thread;
2
3
3
4
use hashbrown:: HashMap ;
@@ -6,11 +7,12 @@ use magnetic::Consumer;
6
7
use magnetic:: buffer:: dynamic:: DynamicBufferP2 ;
7
8
use thiserror:: Error ;
8
9
9
- use crate :: client:: types:: {
10
+ use crate :: SHUTDOWN ;
11
+ use crate :: crawler:: types:: {
10
12
Command , CommandSender , Config , LocalId , Status , StatusReceiver , WorkerId ,
11
13
} ;
12
- use crate :: client :: worker:: { Worker , WorkerError } ;
13
- use crate :: types:: { CrawlRequestReceiver , Cursor , MessageSender } ;
14
+ use crate :: crawler :: worker:: { Worker , WorkerError } ;
15
+ use crate :: types:: { Cursor , MessageSender , RequestCrawlReceiver } ;
14
16
15
17
const CAPACITY : usize = 1024 ;
16
18
@@ -28,20 +30,20 @@ pub enum ManagerError {
28
30
struct WorkerHandle {
29
31
pub configs : Vec < Config > ,
30
32
pub command_tx : CommandSender ,
31
- pub thread_handle : thread:: JoinHandle < ( ) > ,
33
+ pub thread_handle : thread:: JoinHandle < Result < ( ) , WorkerError > > ,
32
34
}
33
35
34
36
pub struct Manager {
35
37
workers : Box < [ WorkerHandle ] > ,
36
38
next_id : WorkerId ,
37
39
configs : HashMap < Uri , Config > ,
38
40
status_rx : StatusReceiver ,
39
- crawl_request_rx : CrawlRequestReceiver ,
41
+ request_crawl_rx : RequestCrawlReceiver ,
40
42
}
41
43
42
44
impl Manager {
43
45
pub fn new (
44
- n_workers : usize , message_tx : MessageSender , crawl_request_rx : CrawlRequestReceiver ,
46
+ n_workers : usize , message_tx : MessageSender , request_crawl_rx : RequestCrawlReceiver ,
45
47
) -> Result < Self , ManagerError > {
46
48
let ( status_tx, status_rx) =
47
49
magnetic:: mpsc:: mpsc_queue ( DynamicBufferP2 :: new ( CAPACITY ) . unwrap ( ) ) ;
@@ -51,7 +53,7 @@ impl Manager {
51
53
let status_tx = status_tx. clone ( ) ;
52
54
let ( command_tx, command_rx) = rtrb:: RingBuffer :: new ( CAPACITY ) ;
53
55
let thread_handle = thread:: spawn ( move || {
54
- Worker :: new ( WorkerId ( worker_id) , message_tx, status_tx, command_rx) . run ( ) ;
56
+ Worker :: new ( WorkerId ( worker_id) , message_tx, status_tx, command_rx) . run ( )
55
57
} ) ;
56
58
WorkerHandle { configs : Vec :: new ( ) , command_tx, thread_handle }
57
59
} )
@@ -61,7 +63,7 @@ impl Manager {
61
63
next_id : WorkerId ( 0 ) ,
62
64
configs : HashMap :: new ( ) ,
63
65
status_rx,
64
- crawl_request_rx ,
66
+ request_crawl_rx ,
65
67
} )
66
68
}
67
69
@@ -74,8 +76,10 @@ impl Manager {
74
76
for worker in & mut self . workers {
75
77
worker. command_tx . push ( Command :: Shutdown ) ?;
76
78
}
77
- for worker in self . workers {
78
- worker. thread_handle . join ( ) . map_err ( |_| ManagerError :: JoinError ) ?;
79
+ for ( id, worker) in self . workers . into_iter ( ) . enumerate ( ) {
80
+ if let Err ( err) = worker. thread_handle . join ( ) . map_err ( |_| ManagerError :: JoinError ) ? {
81
+ tracing:: warn!( "crawler worker {id} error: {err}" ) ;
82
+ }
79
83
}
80
84
Ok ( ( ) )
81
85
}
@@ -86,22 +90,26 @@ impl Manager {
86
90
}
87
91
88
92
fn update ( & mut self ) -> Result < bool , ManagerError > {
93
+ if SHUTDOWN . load ( Ordering :: Relaxed ) {
94
+ return Ok ( false ) ;
95
+ }
96
+
89
97
if let Ok ( status) = self . status_rx . try_pop ( ) {
90
98
if !self . handle_status ( status) ? {
91
99
return Ok ( false ) ;
92
100
}
93
101
}
94
102
95
- if let Ok ( request ) = self . crawl_request_rx . pop ( ) {
96
- if !self . configs . contains_key ( & request . uri ) {
103
+ if let Ok ( request_crawl ) = self . request_crawl_rx . pop ( ) {
104
+ if !self . configs . contains_key ( & request_crawl . uri ) {
97
105
let config = Config {
98
- uri : request . uri . clone ( ) ,
106
+ uri : request_crawl . uri . clone ( ) ,
99
107
cursor : Cursor ( 0 ) ,
100
108
worker_id : self . next_id ,
101
109
local_id : LocalId ( self . workers [ self . next_id . 0 ] . configs . len ( ) ) ,
102
110
} ;
103
111
self . next_id = WorkerId ( ( self . next_id . 0 + 1 ) % self . workers . len ( ) ) ;
104
- self . configs . insert ( request . uri . clone ( ) , config. clone ( ) ) ;
112
+ self . configs . insert ( request_crawl . uri , config. clone ( ) ) ;
105
113
self . workers [ config. worker_id . 0 ] . command_tx . push ( Command :: Connect ( config) ) . unwrap ( ) ;
106
114
}
107
115
}
0 commit comments