-
Notifications
You must be signed in to change notification settings - Fork 316
feat(multipath): add back basic metrics #3672
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat-multipath
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| use iroh_metrics::{Counter, Histogram, MetricsGroup}; | ||
| use iroh_metrics::{Counter, MetricsGroup}; | ||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| /// Enum of metrics for the module | ||
|
|
@@ -14,11 +14,8 @@ pub struct Metrics { | |
| pub send_ipv4: Counter, | ||
| pub send_ipv6: Counter, | ||
| pub send_relay: Counter, | ||
| pub send_relay_error: Counter, | ||
|
|
||
| // Data packets (non-disco) | ||
| pub send_data: Counter, | ||
| pub send_data_network_down: Counter, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was unused, not sure if we'd want this back? |
||
| pub recv_data_relay: Counter, | ||
| pub recv_data_ipv4: Counter, | ||
| pub recv_data_ipv6: Counter, | ||
|
|
@@ -50,15 +47,20 @@ pub struct Metrics { | |
|
|
||
| /* | ||
| * Connection Metrics | ||
| * | ||
| * These all only count connections that completed the TLS handshake successfully. This means | ||
| * that short lived 0RTT connections are potentially not included in these counts. | ||
| */ | ||
| /// The number of direct connections we have made to peers. | ||
| pub num_direct_conns_added: Counter, | ||
| /// The number of direct connections we have lost to peers. | ||
| pub num_direct_conns_removed: Counter, | ||
| /// The number of connections to peers we have added over relay. | ||
| pub num_relay_conns_added: Counter, | ||
| /// The number of connections to peers we have removed over relay. | ||
| pub num_relay_conns_removed: Counter, | ||
| /// Number of connections opened (only handshaked connections are counted). | ||
| pub num_conns_opened: Counter, | ||
| /// Number of connections closed (only handshaked connections are counted). | ||
| pub num_conns_closed: Counter, | ||
| /// Number of connections that had only relay paths over their lifetime. | ||
| pub num_conns_transport_relay_only: Counter, | ||
| /// Number of connections that had only IP paths over their lifetime. | ||
| pub num_conns_transport_ip_only: Counter, | ||
| /// Number of connections that had both IP and relay paths. | ||
| pub num_conns_transport_ip_and_relay: Counter, | ||
|
|
||
| pub actor_tick_main: Counter, | ||
| pub actor_tick_msg: Counter, | ||
|
|
@@ -67,36 +69,25 @@ pub struct Metrics { | |
| pub actor_tick_direct_addr_heartbeat: Counter, | ||
| pub actor_link_change: Counter, | ||
| pub actor_tick_other: Counter, | ||
|
|
||
| /// Number of endpoints we have attempted to contact. | ||
| pub endpoints_contacted: Counter, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we ever could do this in a good way, because we don't keep lists of endpoint ids over restarts, so this was already a bad metric for a while - removed it. |
||
| /// Number of endpoints we have managed to contact directly. | ||
| pub endpoints_contacted_directly: Counter, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above, I don't think there's a good way to do it meaningfully. |
||
|
|
||
| /// Number of connections with a successful handshake. | ||
| pub connection_handshake_success: Counter, | ||
| /// Number of connections with a successful handshake that became direct. | ||
| pub connection_became_direct: Counter, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two are better handled by the new |
||
| /// Histogram of connection latency in milliseconds across all endpoint connections. | ||
| #[default(Histogram::new(vec![1.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, f64::INFINITY]))] | ||
| pub connection_latency_ms: Histogram, | ||
|
|
||
| /* | ||
| * Path Congestion Metrics | ||
| */ | ||
| /// Number of times a path was marked as outdated due to consecutive ping failures. | ||
| pub path_marked_outdated: Counter, | ||
| /// Number of ping failures recorded across all paths. | ||
| pub path_ping_failures: Counter, | ||
| /// Number of consecutive failure resets (path recovered). | ||
| pub path_failure_resets: Counter, | ||
| /// Histogram of packet loss rates (0.0-1.0) observed on UDP paths. | ||
| #[default(Histogram::new(vec![0.0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0]))] | ||
| pub path_packet_loss_rate: Histogram, | ||
| /// Histogram of RTT variance (in milliseconds) as a congestion indicator. | ||
| #[default(Histogram::new(vec![0.0, 1.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0]))] | ||
| pub path_rtt_variance_ms: Histogram, | ||
| /// Histogram of path quality scores (0.0-1.0). | ||
| #[default(Histogram::new(vec![0.0, 0.3, 0.5, 0.7, 0.85, 0.95, 1.0]))] | ||
| pub path_quality_score: Histogram, | ||
| // /// Histogram of connection latency in milliseconds across all endpoint connections. | ||
| // #[default(Histogram::new(vec![1.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, f64::INFINITY]))] | ||
| // pub connection_latency_ms: Histogram, | ||
| // /* | ||
| // * Path Congestion Metrics | ||
| // */ | ||
| // /// Number of times a path was marked as outdated due to consecutive ping failures. | ||
| // pub path_marked_outdated: Counter, | ||
| // /// Number of ping failures recorded across all paths. | ||
| // pub path_ping_failures: Counter, | ||
| // /// Number of consecutive failure resets (path recovered). | ||
| // pub path_failure_resets: Counter, | ||
| // /// Histogram of packet loss rates (0.0-1.0) observed on UDP paths. | ||
| // #[default(Histogram::new(vec![0.0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0]))] | ||
| // pub path_packet_loss_rate: Histogram, | ||
| // /// Histogram of RTT variance (in milliseconds) as a congestion indicator. | ||
| // #[default(Histogram::new(vec![0.0, 1.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0]))] | ||
| // pub path_rtt_variance_ms: Histogram, | ||
| // /// Histogram of path quality scores (0.0-1.0). | ||
| // #[default(Histogram::new(vec![0.0, 0.3, 0.5, 0.7, 0.85, 0.95, 1.0]))] | ||
| // pub path_quality_score: Histogram, | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was unused, and it would be just the sum of
send_ipv4,send_ipv6,send_relay, so I removed it because the sum can be calculated client-side.