Skip to content

Commit

Permalink
fix(statsd): Measure time in nanos (#4495)
Browse files Browse the repository at this point in the history
A lot of our instrumented code implicitly assumes sub-millisecond
precision, but we round all timer metrics to milliseconds. This becomes
a problem especially when computing the sum over a large number of
sub-millisecond time frames (e.g. envelope buffer busy time).
  • Loading branch information
jjbayer authored Feb 12, 2025
1 parent bb90af7 commit e13fd30
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

- Add data categories for LogItem and LogByte. ([#4455](https://github.com/getsentry/relay/pull/4455))
- Add option to drop transaction attachments. ([#4466](https://github.com/getsentry/relay/pull/4466))
- Add sub-millisecond precision to internal timer metrics. ([#4495](https://github.com/getsentry/relay/pull/4495))

## 25.1.0

Expand Down
36 changes: 21 additions & 15 deletions relay-server/src/middlewares/body_timing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ mod tests {
}
}

fn captures_match(captures: Vec<String>, expected: &str) -> bool {
let (head, tail) = expected.split_once("|h").unwrap();
captures.len() == 1 && captures[0].starts_with(head) && captures[0].ends_with(tail)
}

#[test]
fn test_empty_body() {
let captures = with_capturing_test_client(|| {
Expand All @@ -182,10 +187,10 @@ mod tests {

let _ = pinned.poll_frame(&mut cx);
});
assert_eq!(
assert!(captures_match(
captures,
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:completed"]
);
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:completed"
));
}

#[test]
Expand All @@ -201,10 +206,10 @@ mod tests {
let _ = pinned.as_mut().poll_frame(&mut cx);
let _ = pinned.as_mut().poll_frame(&mut cx);
});
assert_eq!(
assert!(captures_match(
captures,
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:completed"]
);
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:completed"
));
}

#[test]
Expand All @@ -219,10 +224,11 @@ mod tests {

let _ = pinned.poll_frame(&mut cx);
});
assert_eq!(

assert!(captures_match(
captures,
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:dropped"]
)
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:dropped"
))
}

#[test]
Expand All @@ -246,10 +252,10 @@ mod tests {
let pinned = Pin::new(&mut timed_body);
let _ = pinned.poll_frame(&mut cx);
});
assert_eq!(
assert!(captures_match(
captures,
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:failed"]
)
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:failed"
))
}

#[test]
Expand All @@ -266,10 +272,10 @@ mod tests {
let mut pinned = Pin::new(&mut timed_body);
while let Poll::Ready(Some(Ok(_))) = pinned.as_mut().poll_frame(&mut cx) {}
});
assert_eq!(
assert!(captures_match(
captures,
["requests.body_read.duration:0|ms|#route:unknown,size:lt10KB,status:completed"]
)
"requests.body_read.duration:0|h|#route:unknown,size:lt10KB,status:completed"
))
}

#[test]
Expand Down
46 changes: 37 additions & 9 deletions relay-statsd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,9 @@ macro_rules! metric {
$crate::with_client(|client| {
use $crate::_pred::*;
client.send_metric(
client.time_with_tags(&$crate::TimerMetric::name(&$id), $value)
// NOTE: cadence histograms support Duration out of the box and converts it to nanos,
// but we want milliseconds for historical reasons.
client.histogram_with_tags(&$crate::TimerMetric::name(&$id), $value.as_nanos() as f64 / 1e6)
$(.with_tag(stringify!($k), $v))*
)
})
Expand All @@ -614,22 +616,21 @@ macro_rules! metric {
(timer($id:expr), $($k:ident = $v:expr,)* $block:block) => {{
let now = std::time::Instant::now();
let rv = {$block};
$crate::with_client(|client| {
use $crate::_pred::*;
client.send_metric(
client.time_with_tags(&$crate::TimerMetric::name(&$id), now.elapsed())
$(.with_tag(stringify!($k), $v))*
)
});
$crate::metric!(timer($id) = now.elapsed() $(, $k = $v)*);
rv
}};
}

#[cfg(test)]
mod tests {
use std::time::Duration;

use cadence::{NopMetricSink, StatsdClient};

use crate::{set_client, with_capturing_test_client, with_client, GaugeMetric, MetricsClient};
use crate::{
set_client, with_capturing_test_client, with_client, GaugeMetric, MetricsClient,
TimerMetric,
};

enum TestGauges {
Foo,
Expand Down Expand Up @@ -683,4 +684,31 @@ mod tests {
// After setting the global client,the current client must change:
assert_ne!(client1, client2);
}

struct TestTimer;

impl TimerMetric for TestTimer {
fn name(&self) -> &'static str {
"timer"
}
}

#[test]
fn nanos_rounding_error() {
let one_day = Duration::from_secs(60 * 60 * 24);
let captures = with_capturing_test_client(|| {
metric!(timer(TestTimer) = one_day + Duration::from_nanos(1),);
});

// for "short" durations, precision is preserved:
assert_eq!(captures, ["timer:86400000.000001|h"]); // h is for histogram, not hours

let one_year = Duration::from_secs(60 * 60 * 24 * 365);
let captures = with_capturing_test_client(|| {
metric!(timer(TestTimer) = one_year + Duration::from_nanos(1),);
});

// for very long durations, precision is lost:
assert_eq!(captures, ["timer:31536000000|h"]);
}
}

0 comments on commit e13fd30

Please sign in to comment.