1616//! # Rate limit grouping
1717//!
1818//! Events are rate limited independently based on a combination of:
19- //! - **Callsite **: The code location where the log statement appears
19+ //! - **Message content **: The actual log message text
2020//! - **Contextual fields**: Any fields attached to the event or its parent spans
2121//!
2222//! ## How fields contribute to grouping
2323//!
24- //! **Only these fields create distinct rate limit groups:**
24+ //! **These fields create distinct rate limit groups:**
25+ //! - `message` - Different messages are rate limited independently
2526//! - `component_id` - Different components are rate limited independently
2627//!
2728//! **All other fields are ignored for grouping**, including:
2829//! - `fanout_id`, `input_id`, `output_id` - Not used for grouping to avoid resource/cost implications from high-cardinality tags
29- //! - `message` - The log message itself doesn't differentiate groups
3030//! - `internal_log_rate_limit` - Control field for enabling/disabling rate limiting
3131//! - `internal_log_rate_secs` - Control field for customizing the rate limit window
3232//! - Any custom fields you add
3939//! info!(component_id = "transform_2", "Processing event"); // Group B
4040//! // Even though the message is identical, these are rate limited independently
4141//!
42- //! // Example 2: Only component_id matters for grouping
43- //! info!(component_id = "router", fanout_id = "output_1", "Routing event"); // Group C
44- //! info!(component_id = "router", fanout_id = "output_2", "Routing event"); // Group C (same group!)
45- //! info!(component_id = "router", fanout_id = "output_1", "Routing event"); // Group C (same group!)
46- //! info!(component_id = "router", fanout_id = "output_1", input_id = "kafka", "Routing event"); // Group C (same!)
47- //! // All of these share the same group because they have the same component_id
42+ //! // Example 2: Same message with same component_id = same group
43+ //! info!(message = "Hello", component_id = "router"); // Group C
44+ //! info!("Hello", component_id = "router"); // Group C
45+ //! // Both forms of logging with the same message are now treated identically
46+ //!
47+ //! // Example 3: Only component_id and message matter for grouping
48+ //! info!(component_id = "router", fanout_id = "output_1", "Routing event"); // Group D
49+ //! info!(component_id = "router", fanout_id = "output_2", "Routing event"); // Group D
50+ //! info!(component_id = "router", input_id = "kafka", "Routing event"); // Group D
51+ //! // All of these share the same group because they have the same component_id and message
4852//! // The fanout_id and input_id fields are ignored to avoid resource/cost implications
4953//!
50- //! // Example 3: Span fields contribute to grouping
54+ //! // Example 4: Different messages create different groups
55+ //! info!(component_id = "router", "Message A"); // Group E
56+ //! info!(component_id = "router", "Message B"); // Group F
57+ //! // Different messages are rate limited independently
58+ //!
59+ //! // Example 5: Span fields contribute to grouping
5160//! let span = info_span!("process", component_id = "transform_1");
5261//! let _enter = span.enter();
53- //! info!("Processing event"); // Group E: callsite + component_id from span
62+ //! info!("Processing event"); // Group G: message + component_id from span
5463//! drop(_enter);
5564//!
5665//! let span = info_span!("process", component_id = "transform_2");
5766//! let _enter = span.enter();
58- //! info!("Processing event"); // Group F : same callsite but different component_id
67+ //! info!("Processing event"); // Group H : same message but different component_id
5968//!
60- //! // Example 4 : Nested spans - child span fields take precedence
69+ //! // Example 6 : Nested spans - child span fields take precedence
6170//! let outer = info_span!("outer", component_id = "parent");
6271//! let _outer_guard = outer.enter();
6372//! let inner = info_span!("inner", component_id = "child");
6473//! let _inner_guard = inner.enter();
65- //! info!("Nested event"); // Grouped by component_id = "child"
66- //!
67- //! // Example 5: Same callsite with no fields = single rate limit group
68- //! info!("Simple message"); // Group G
69- //! info!("Simple message"); // Group G
70- //! info!("Simple message"); // Group G
74+ //! info!("Nested event"); // Grouped by component_id = "child" + message = "Nested event"
7175//!
72- //! // Example 6: Custom fields are ignored for grouping
73- //! info!(component_id = "source", input_id = "in_1", "Received data" ); // Group H
74- //! info!(component_id = "source", input_id = "in_2", "Received data" ); // Group H (same group!)
75- //! // The input_id field is ignored - only component_id matters
76+ //! // Example 7: Same message with no component_id = single rate limit group
77+ //! info!("Simple message" ); // Group I
78+ //! info!("Simple message" ); // Group I
79+ //! info!("Simple message"); // Group I
7680//!
77- //! // Example 7 : Disabling rate limiting for specific logs
81+ //! // Example 8 : Disabling rate limiting for specific logs
7882//! // Rate limiting is ON by default - explicitly disable for important logs
7983//! warn!(
8084//! component_id = "critical_component",
8387//! );
8488//! // This event will NEVER be rate limited, regardless of how often it fires
8589//!
86- //! // Example 8 : Custom rate limit window for specific events
90+ //! // Example 9 : Custom rate limit window for specific events
8791//! info!(
8892//! component_id = "noisy_component",
8993//! message = "Frequent status update",
9296//! // Override the default window for this specific log
9397//! ```
9498//!
95- //! This ensures logs from different components are rate limited independently,
99+ //! This ensures logs from different components and with different messages are rate limited independently,
96100//! while avoiding resource/cost implications from high-cardinality tags.
97101
98102use std:: fmt;
99103
100104use dashmap:: DashMap ;
101105use tracing_core:: {
102106 Event , Metadata , Subscriber ,
103- callsite:: Identifier ,
104107 field:: { Field , Value , Visit , display} ,
105108 span,
106109 subscriber:: Interest ,
@@ -127,7 +130,6 @@ const COMPONENT_ID_FIELD: &str = "component_id";
127130
128131#[ derive( Eq , PartialEq , Hash , Clone ) ]
129132struct RateKeyIdentifier {
130- callsite : Identifier ,
131133 rate_limit_key_values : RateLimitedSpanKeys ,
132134}
133135
@@ -243,16 +245,16 @@ where
243245 // Build a composite key from event fields and span context to determine the rate limit group.
244246 // This multi-step process ensures we capture all relevant contextual information:
245247 //
246- // 1. Start with event-level fields (e.g., fields directly on the log macro call)
248+ // 1. Start with event-level fields (e.g., message and fields directly on the log macro call)
247249 // 2. Walk up the span hierarchy from root to current span
248250 // 3. Merge in fields from each span, with child spans taking precedence
249251 //
250252 // This means an event's rate limit group is determined by the combination of:
251- // - Its callsite (handled separately via RateKeyIdentifier )
252- // - All contextual fields from both the event and its span ancestry
253+ // - Its message content (ensures info!(message = "foo") and info!("foo") are grouped together )
254+ // - All contextual fields from both the event and its span ancestry (e.g., component_id)
253255 //
254- // Example: The same `info!("msg")` callsite in different component contexts becomes
255- // distinct rate limit groups, allowing fine-grained control over log flooding.
256+ // Example: The same message in different component contexts becomes distinct rate limit groups,
257+ // allowing fine-grained control over log flooding.
256258 let rate_limit_key_values = {
257259 let mut keys = RateLimitedSpanKeys :: default ( ) ;
258260 // Capture fields directly on this event
@@ -275,7 +277,6 @@ where
275277 // not, we'll initialize an entry for it.
276278 let metadata = event. metadata ( ) ;
277279 let id = RateKeyIdentifier {
278- callsite : metadata. callsite ( ) ,
279280 rate_limit_key_values,
280281 } ;
281282
@@ -480,25 +481,32 @@ impl From<String> for TraceValue {
480481///
481482/// **Tracked fields** (only these create distinct rate limit groups):
482483/// - `component_id` - Different components are rate limited independently
484+ /// - `message` - Different messages are rate limited independently
483485///
484486/// **Ignored fields**: All other fields are ignored for grouping purposes. This avoids resource/cost implications from high-cardinality tags.
485487/// ```
486488#[ derive( Default , Eq , PartialEq , Hash , Clone ) ]
487489struct RateLimitedSpanKeys {
488490 component_id : Option < TraceValue > ,
491+ message : Option < TraceValue > ,
489492}
490493
491494impl RateLimitedSpanKeys {
492495 fn record ( & mut self , field : & Field , value : TraceValue ) {
493- if field. name ( ) == COMPONENT_ID_FIELD {
494- self . component_id = Some ( value) ;
496+ match field. name ( ) {
497+ COMPONENT_ID_FIELD => self . component_id = Some ( value) ,
498+ MESSAGE_FIELD => self . message = Some ( value) ,
499+ _ => { }
495500 }
496501 }
497502
498503 fn merge ( & mut self , other : & Self ) {
499504 if let Some ( component_id) = & other. component_id {
500505 self . component_id = Some ( component_id. clone ( ) ) ;
501506 }
507+ if let Some ( message) = & other. message {
508+ self . message = Some ( message. clone ( ) ) ;
509+ }
502510 }
503511}
504512
@@ -1052,4 +1060,33 @@ mod test {
10521060 ]
10531061 ) ;
10541062 }
1063+
1064+ #[ test]
1065+ #[ serial]
1066+ fn message_field_explicit_vs_implicit_same_bucket ( ) {
1067+ // info!(message = "Hello") and info!("Hello") should be grouped under the same bucket
1068+ let ( events, sub) = setup_test ( 1 ) ;
1069+ tracing:: subscriber:: with_default ( sub, || {
1070+ for _ in 0 ..30 {
1071+ info ! ( message = "Hello" ) ;
1072+ info ! ( "Hello" ) ;
1073+ }
1074+ } ) ;
1075+
1076+ let events = events. lock ( ) . unwrap ( ) ;
1077+
1078+ assert_eq ! (
1079+ events. len( ) ,
1080+ 2 ,
1081+ "Expected 2 events (1 message + 1 suppression warning), got {}" ,
1082+ events. len( )
1083+ ) ;
1084+
1085+ assert_eq ! ( events[ 0 ] . message, "Hello" ) ;
1086+ assert ! (
1087+ events[ 1 ]
1088+ . message
1089+ . contains( "is being suppressed to avoid flooding" )
1090+ ) ;
1091+ }
10551092}
0 commit comments