|
| 1 | +use std::thread; |
| 2 | + |
| 3 | +use hashbrown::HashMap; |
| 4 | +use http::Uri; |
| 5 | +use magnetic::Consumer; |
| 6 | +use magnetic::buffer::dynamic::DynamicBufferP2; |
| 7 | +use thiserror::Error; |
| 8 | + |
| 9 | +use crate::client::types::{ |
| 10 | + Command, CommandSender, Config, LocalId, Status, StatusReceiver, WorkerId, |
| 11 | +}; |
| 12 | +use crate::client::worker::{Worker, WorkerError}; |
| 13 | +use crate::types::{CrawlRequestReceiver, Cursor, MessageSender}; |
| 14 | + |
| 15 | +const CAPACITY: usize = 1024; |
| 16 | + |
| 17 | +#[derive(Debug, Error)] |
| 18 | +pub enum ManagerError { |
| 19 | + #[error("worker error: {0}")] |
| 20 | + WorkerError(#[from] WorkerError), |
| 21 | + #[error("rtrb error: {0}")] |
| 22 | + PushError(#[from] rtrb::PushError<Command>), |
| 23 | + #[error("join error")] |
| 24 | + JoinError, |
| 25 | +} |
| 26 | + |
| 27 | +#[derive(Debug)] |
| 28 | +struct WorkerHandle { |
| 29 | + pub configs: Vec<Config>, |
| 30 | + pub command_tx: CommandSender, |
| 31 | + pub thread_handle: thread::JoinHandle<()>, |
| 32 | +} |
| 33 | + |
| 34 | +pub struct Manager { |
| 35 | + workers: Box<[WorkerHandle]>, |
| 36 | + next_id: WorkerId, |
| 37 | + configs: HashMap<Uri, Config>, |
| 38 | + status_rx: StatusReceiver, |
| 39 | + crawl_request_rx: CrawlRequestReceiver, |
| 40 | +} |
| 41 | + |
| 42 | +impl Manager { |
| 43 | + pub fn new( |
| 44 | + n_workers: usize, message_tx: MessageSender, crawl_request_rx: CrawlRequestReceiver, |
| 45 | + ) -> Result<Self, ManagerError> { |
| 46 | + let (status_tx, status_rx) = |
| 47 | + magnetic::mpsc::mpsc_queue(DynamicBufferP2::new(CAPACITY).unwrap()); |
| 48 | + let workers = (0..n_workers) |
| 49 | + .map(|worker_id| { |
| 50 | + let message_tx = message_tx.clone(); |
| 51 | + let status_tx = status_tx.clone(); |
| 52 | + let (command_tx, command_rx) = rtrb::RingBuffer::new(CAPACITY); |
| 53 | + let thread_handle = thread::spawn(move || { |
| 54 | + Worker::new(WorkerId(worker_id), message_tx, status_tx, command_rx).run(); |
| 55 | + }); |
| 56 | + WorkerHandle { configs: Vec::new(), command_tx, thread_handle } |
| 57 | + }) |
| 58 | + .collect::<Vec<_>>(); |
| 59 | + Ok(Self { |
| 60 | + workers: workers.into_boxed_slice(), |
| 61 | + next_id: WorkerId(0), |
| 62 | + configs: HashMap::new(), |
| 63 | + status_rx, |
| 64 | + crawl_request_rx, |
| 65 | + }) |
| 66 | + } |
| 67 | + |
| 68 | + pub fn run(mut self) -> Result<(), ManagerError> { |
| 69 | + while self.update()? {} |
| 70 | + self.shutdown() |
| 71 | + } |
| 72 | + |
| 73 | + pub fn shutdown(mut self) -> Result<(), ManagerError> { |
| 74 | + for worker in &mut self.workers { |
| 75 | + worker.command_tx.push(Command::Shutdown)?; |
| 76 | + } |
| 77 | + for worker in self.workers { |
| 78 | + worker.thread_handle.join().map_err(|_| ManagerError::JoinError)?; |
| 79 | + } |
| 80 | + Ok(()) |
| 81 | + } |
| 82 | + |
| 83 | + fn handle_status(&mut self, status: Status) -> Result<bool, ManagerError> { |
| 84 | + match status {} |
| 85 | + Ok(true) |
| 86 | + } |
| 87 | + |
| 88 | + fn update(&mut self) -> Result<bool, ManagerError> { |
| 89 | + if let Ok(status) = self.status_rx.try_pop() { |
| 90 | + if !self.handle_status(status)? { |
| 91 | + return Ok(false); |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + if let Ok(request) = self.crawl_request_rx.pop() { |
| 96 | + if !self.configs.contains_key(&request.uri) { |
| 97 | + let config = Config { |
| 98 | + uri: request.uri.clone(), |
| 99 | + cursor: Cursor(0), |
| 100 | + worker_id: self.next_id, |
| 101 | + local_id: LocalId(self.workers[self.next_id.0].configs.len()), |
| 102 | + }; |
| 103 | + self.next_id = WorkerId((self.next_id.0 + 1) % self.workers.len()); |
| 104 | + self.configs.insert(request.uri.clone(), config.clone()); |
| 105 | + self.workers[config.worker_id.0].command_tx.push(Command::Connect(config)).unwrap(); |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + Ok(true) |
| 110 | + } |
| 111 | +} |
0 commit comments