Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,10 @@ impl Manager {
let result = tokio::time::timeout(timeout, client.quorum(request)).await;

match result {
Ok(response) => {
return response;
Ok(Ok(response)) => {
return Ok(response);
}
Err(e) => {
Ok(Err(e)) => {
info_with_replica!(
self.replica_id,
"lighthouse quorum failed. error: {}",
Expand All @@ -271,6 +271,31 @@ impl Manager {
)));
}

// In general, quorum failure will return immediately,
// but not waiting for the timeout.
tokio::time::sleep(timeout).await;

// Reset the client since the lighthouse server might have failed
// If this also fails, consider increasing `connect_timeout`.
let _ = self.create_lighthouse_client().await;

retry_count += 1;
}
Err(e) => {
info_with_replica!(
self.replica_id,
"lighthouse quorum timeout. error: {}",
e.to_string()
);

if retry_count == self.quorum_retries {
return Err(Status::internal(format!(
"lighthouse quorum failed after {} retries. error: {}",
retry_count,
e.to_string(),
)));
}

tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;

// Reset the client since the lighthouse server might have failed
Expand Down