diff --git a/opentelemetry-otlp/CHANGELOG.md b/opentelemetry-otlp/CHANGELOG.md index a4a335e687..03970aa02d 100644 --- a/opentelemetry-otlp/CHANGELOG.md +++ b/opentelemetry-otlp/CHANGELOG.md @@ -11,6 +11,7 @@ Released 2025-Sep-25 - Update `opentelemetry-proto` and `opentelemetry-http` dependency version to 0.31.0 - Add HTTP compression support with `gzip-http` and `zstd-http` feature flags - Add retry with exponential backoff and throttling support for HTTP and gRPC exporters + This behaviour is opt in via the `experimental-grpc-retry` and `experimental-http-retry` flags on this crate. You can customize the retry policy using the `with_retry_policy` on the exporter builders. ## 0.30.0 diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs index 939e44503b..a4cc89f389 100644 --- a/opentelemetry-otlp/src/retry.rs +++ b/opentelemetry-otlp/src/retry.rs @@ -7,6 +7,11 @@ //! specified retry policy, using exponential backoff and jitter to determine the delay between //! retries. The function uses error classification to determine retry behavior and can honor //! server-provided throttling hints. +#[cfg(any( + feature = "experimental-grpc-retry", + feature = "experimental-http-retry" +))] +use opentelemetry::otel_info; #[cfg(any( feature = "experimental-grpc-retry", @@ -17,24 +22,23 @@ use opentelemetry::otel_warn; feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::future::Future; +use opentelemetry_sdk::runtime::Runtime; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::hash::{DefaultHasher, Hasher}; -use std::time::Duration; +use std::future::Future; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::time::SystemTime; - +use std::hash::{DefaultHasher, Hasher}; +use std::time::Duration; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use opentelemetry_sdk::runtime::Runtime; +use std::time::SystemTime; /// Classification of errors for retry purposes. #[derive(Debug, Clone, PartialEq)] @@ -61,26 +65,6 @@ pub struct RetryPolicy { pub jitter_ms: u64, } -/// A runtime stub for when experimental_async_runtime is not enabled. -/// This allows retry policy to be configured but no actual retries occur. -#[cfg(not(any( - feature = "experimental-grpc-retry", - feature = "experimental-http-retry" -)))] -#[derive(Debug, Clone, Default)] -pub struct NoOpRuntime; - -#[cfg(not(any( - feature = "experimental-grpc-retry", - feature = "experimental-http-retry" -)))] -impl NoOpRuntime { - /// Creates a new no-op runtime. - pub fn new() -> Self { - Self - } -} - // Generates a random jitter value up to max_jitter #[cfg(any( feature = "experimental-grpc-retry", @@ -144,15 +128,15 @@ where match error_type { RetryErrorType::NonRetryable => { - otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed with non-retryable error: {:?}", operation_name, err)); + otel_warn!(name: "Export.Failed.NonRetryable", operation = operation_name, error = format!("{:?}", err)); return Err(err); } RetryErrorType::Retryable if attempt < policy.max_retries => { attempt += 1; // Use exponential backoff with jitter - otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to retryable error: {:?}", operation_name, err)); let jitter = generate_jitter(policy.jitter_ms); let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms); + otel_info!(name: "Export.InProgress.Retrying", operation = operation_name, error = format!("{:?}", err), attempt = attempt, delay_ms = delay_with_jitter, jitter_ms = jitter); runtime .delay(Duration::from_millis(delay_with_jitter)) .await; @@ -161,13 +145,13 @@ where RetryErrorType::Throttled(server_delay) if attempt < policy.max_retries => { attempt += 1; // Use server-specified delay (overrides exponential backoff) - otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} after server-specified throttling delay: {:?}", operation_name, server_delay)); + otel_info!(name: "Export.InProgress.Throttled", operation = operation_name, error = format!("{:?}", err), attempt = attempt, delay_ms = server_delay.as_millis()); runtime.delay(server_delay).await; // Don't update exponential backoff delay for next attempt since server provided specific timing } _ => { // Max retries reached - otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed after {} attempts: {:?}", operation_name, attempt, err)); + otel_warn!(name: "Export.Failed.Exhausted", operation = operation_name, error = format!("{:?}", err), retries = attempt); return Err(err); } }