-
Notifications
You must be signed in to change notification settings - Fork 586
chore: OTLP retry post-review cleanup #3204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -7,6 +7,11 @@ | |||||||||||||||||||||||||||
| //! specified retry policy, using exponential backoff and jitter to determine the delay between | ||||||||||||||||||||||||||||
| //! retries. The function uses error classification to determine retry behavior and can honor | ||||||||||||||||||||||||||||
| //! server-provided throttling hints. | ||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| ))] | ||||||||||||||||||||||||||||
| use opentelemetry::otel_info; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
|
|
@@ -17,24 +22,23 @@ use opentelemetry::otel_warn; | |||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| ))] | ||||||||||||||||||||||||||||
| use std::future::Future; | ||||||||||||||||||||||||||||
| use opentelemetry_sdk::runtime::Runtime; | ||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| ))] | ||||||||||||||||||||||||||||
| use std::hash::{DefaultHasher, Hasher}; | ||||||||||||||||||||||||||||
| use std::time::Duration; | ||||||||||||||||||||||||||||
| use std::future::Future; | ||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| ))] | ||||||||||||||||||||||||||||
| use std::time::SystemTime; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| use std::hash::{DefaultHasher, Hasher}; | ||||||||||||||||||||||||||||
| use std::time::Duration; | ||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| ))] | ||||||||||||||||||||||||||||
| use opentelemetry_sdk::runtime::Runtime; | ||||||||||||||||||||||||||||
| use std::time::SystemTime; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Classification of errors for retry purposes. | ||||||||||||||||||||||||||||
| #[derive(Debug, Clone, PartialEq)] | ||||||||||||||||||||||||||||
|
|
@@ -61,26 +65,6 @@ pub struct RetryPolicy { | |||||||||||||||||||||||||||
| pub jitter_ms: u64, | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// A runtime stub for when experimental_async_runtime is not enabled. | ||||||||||||||||||||||||||||
| /// This allows retry policy to be configured but no actual retries occur. | ||||||||||||||||||||||||||||
| #[cfg(not(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| )))] | ||||||||||||||||||||||||||||
| #[derive(Debug, Clone, Default)] | ||||||||||||||||||||||||||||
| pub struct NoOpRuntime; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| #[cfg(not(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
| feature = "experimental-http-retry" | ||||||||||||||||||||||||||||
| )))] | ||||||||||||||||||||||||||||
| impl NoOpRuntime { | ||||||||||||||||||||||||||||
| /// Creates a new no-op runtime. | ||||||||||||||||||||||||||||
| pub fn new() -> Self { | ||||||||||||||||||||||||||||
| Self | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Generates a random jitter value up to max_jitter | ||||||||||||||||||||||||||||
| #[cfg(any( | ||||||||||||||||||||||||||||
| feature = "experimental-grpc-retry", | ||||||||||||||||||||||||||||
|
|
@@ -144,13 +128,13 @@ where | |||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| match error_type { | ||||||||||||||||||||||||||||
| RetryErrorType::NonRetryable => { | ||||||||||||||||||||||||||||
| otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed with non-retryable error: {:?}", operation_name, err)); | ||||||||||||||||||||||||||||
| otel_warn!(name: "OtlpRetryNonRetryable", operation = operation_name, error = format!("{:?}", err)); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
| self.export_http_with_retry( | |
| batch, | |
| OtlpHttpClient::build_logs_export_body, | |
| "HttpLogsClient.Export", | |
| ) |
opentelemetry-rust/opentelemetry-otlp/src/exporter/tonic/trace.rs
Lines 78 to 85 in 16d0863
| match super::tonic_retry_with_backoff( | |
| #[cfg(feature = "experimental-grpc-retry")] | |
| Tokio, | |
| #[cfg(not(feature = "experimental-grpc-retry"))] | |
| (), | |
| self.retry_policy.clone(), | |
| crate::retry_classification::grpc::classify_tonic_status, | |
| "TonicTracesClient.Export", |
We could plumb the things through seperately as &str to structure this, but for logging this feels a bit overwrought imho
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
otel_warn!(
name: "Export.Failed.NonRetryable",
signal: "Logs",
transport: "HTTP",
message = "OTLP export failed with non-retryable error - telemetry data will be lost"
);
^ Suggestion. We should probably include minimum info like erro_code too, but detailed error message should be done at an inner level, not by the retry module.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've taken the name style from here, and left the signal/transport out, favouring operation, based on my comment above (it tells you this stuff already).
I am leery of adding error_code here, as it's generally logged anyway and it would make the interfaces around retry more complicated just for extra logging, but it sounds like you agree with that take ("detailed error message should be done at an inner level") ?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the error here is going to be the stringified/formatted one, likely coming from the underlying transport library, partially defeating structured logging purpose.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also, this is a retry attempt and need to log the delay/jitter etc. to be fully useful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the error here is going to be the stringified/formatted one, likely coming from the underlying transport library, partially defeating structured logging purpose.
Do you have a suggestion here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also mention that there is no ability to customize retry policy.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed code formatting.
You can customize the retry policy, e.g. something like this:
Updated changelog accordingly.