1
1
use std:: sync:: atomic:: Ordering ;
2
- use std:: thread;
3
2
use std:: time:: Duration ;
3
+ use std:: { io, thread} ;
4
4
5
5
use hashbrown:: HashMap ;
6
6
use http:: Uri ;
@@ -13,13 +13,15 @@ use crate::crawler::types::{
13
13
Command , CommandSender , Config , LocalId , Status , StatusReceiver , WorkerId ,
14
14
} ;
15
15
use crate :: crawler:: worker:: { Worker , WorkerError } ;
16
- use crate :: types:: { Cursor , MessageSender , RequestCrawlReceiver } ;
16
+ use crate :: types:: { MessageSender , RequestCrawlReceiver } ;
17
17
18
18
const CAPACITY : usize = 1024 ;
19
19
const SLEEP : Duration = Duration :: from_millis ( 10 ) ;
20
20
21
21
#[ derive( Debug , Error ) ]
22
22
pub enum ManagerError {
23
+ #[ error( "spawn error: {0}" ) ]
24
+ SpawnError ( #[ from] io:: Error ) ,
23
25
#[ error( "worker error: {0}" ) ]
24
26
WorkerError ( #[ from] WorkerError ) ,
25
27
#[ error( "rtrb error: {0}" ) ]
@@ -50,16 +52,18 @@ impl Manager {
50
52
let ( status_tx, status_rx) =
51
53
magnetic:: mpsc:: mpsc_queue ( DynamicBufferP2 :: new ( CAPACITY ) . unwrap ( ) ) ;
52
54
let workers = ( 0 ..n_workers)
53
- . map ( |worker_id| {
55
+ . map ( |worker_id| -> Result < _ , ManagerError > {
54
56
let message_tx = message_tx. clone ( ) ;
55
57
let status_tx = status_tx. clone ( ) ;
56
58
let ( command_tx, command_rx) = rtrb:: RingBuffer :: new ( CAPACITY ) ;
57
- let thread_handle = thread:: spawn ( move || {
58
- Worker :: new ( WorkerId ( worker_id) , message_tx, status_tx, command_rx) . run ( )
59
- } ) ;
60
- WorkerHandle { configs : Vec :: new ( ) , command_tx, thread_handle }
59
+ let thread_handle = thread:: Builder :: new ( )
60
+ . name ( format ! ( "rsky-crawl-{worker_id}" ) )
61
+ . spawn ( move || {
62
+ Worker :: new ( WorkerId ( worker_id) , message_tx, status_tx, command_rx) . run ( )
63
+ } ) ?;
64
+ Ok ( WorkerHandle { configs : Vec :: new ( ) , command_tx, thread_handle } )
61
65
} )
62
- . collect :: < Vec < _ > > ( ) ;
66
+ . collect :: < Result < Vec < _ > , _ > > ( ) ? ;
63
67
Ok ( Self {
64
68
workers : workers. into_boxed_slice ( ) ,
65
69
next_id : WorkerId ( 0 ) ,
@@ -73,6 +77,8 @@ impl Manager {
73
77
while self . update ( ) ? {
74
78
thread:: sleep ( SLEEP ) ;
75
79
}
80
+ tracing:: info!( "shutting down crawler" ) ;
81
+ SHUTDOWN . store ( true , Ordering :: Relaxed ) ;
76
82
self . shutdown ( )
77
83
}
78
84
@@ -88,8 +94,7 @@ impl Manager {
88
94
Ok ( ( ) )
89
95
}
90
96
91
- fn handle_status ( & mut self , status : Status ) -> Result < bool , ManagerError > {
92
- match status { }
97
+ fn handle_status ( & mut self , _status : Status ) -> Result < bool , ManagerError > {
93
98
Ok ( true )
94
99
}
95
100
@@ -108,7 +113,7 @@ impl Manager {
108
113
if !self . configs . contains_key ( & request_crawl. uri ) {
109
114
let config = Config {
110
115
uri : request_crawl. uri . clone ( ) ,
111
- cursor : Cursor ( 0 ) ,
116
+ hostname : request_crawl . hostname . clone ( ) ,
112
117
worker_id : self . next_id ,
113
118
local_id : LocalId ( self . workers [ self . next_id . 0 ] . configs . len ( ) ) ,
114
119
} ;
0 commit comments