Skip to content

Commit 1ee7cd8

Browse files
authored
feat: add metrics for intent execution time (#607)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added per-intent execution timing histogram and async CU usage reporting. * Failure counters now include intent and error labels for richer metrics. * **Chores** * Introduced labeled-value support for intent/outcome results so metrics carry context. * Metrics collection refactored to a dedicated flow invoked after execution. * **Bug Fixes** * Improved execution failure log formatting and adjusted logging level based on duration. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 2617901 commit 1ee7cd8

File tree

6 files changed

+215
-15
lines changed

6 files changed

+215
-15
lines changed

magicblock-committor-service/src/intent_execution_manager/intent_execution_engine.rs

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use std::sync::{Arc, Mutex};
1+
use std::{
2+
sync::{Arc, Mutex},
3+
time::{Duration, Instant},
4+
};
25

36
use futures_util::{stream::FuturesUnordered, StreamExt};
47
use log::{error, info, trace, warn};
@@ -242,23 +245,27 @@ where
242245
execution_permit: OwnedSemaphorePermit,
243246
result_sender: broadcast::Sender<BroadcastedIntentExecutionResult>,
244247
) {
248+
let instant = Instant::now();
245249
let result = executor
246250
.execute(intent.inner.clone(), persister)
247251
.await
248252
.inspect_err(|err| {
249253
error!(
250-
"Failed to execute BaseIntent. id: {}. {:?}",
254+
"Failed to execute BaseIntent. id: {}. {}",
251255
intent.id, err
252256
)
253-
})
257+
});
258+
259+
// Metrics
260+
Self::execution_metrics(instant.elapsed(), &intent, &result);
261+
262+
let result = result
254263
.map(|output| ExecutionOutputWrapper {
255264
id: intent.id,
256265
trigger_type: intent.trigger_type,
257266
output,
258267
})
259268
.map_err(|err| {
260-
// Increase failed intents metric as well
261-
metrics::inc_committor_failed_intents_count();
262269
(intent.inner.id, intent.trigger_type, Arc::new(err))
263270
});
264271

@@ -280,6 +287,47 @@ where
280287
// Free worker
281288
drop(execution_permit);
282289
}
290+
291+
/// Records metrics related to intent execution
292+
fn execution_metrics(
293+
execution_time: Duration,
294+
intent: &ScheduledBaseIntentWrapper,
295+
result: &IntentExecutorResult<ExecutionOutput>,
296+
) {
297+
const EXECUTION_TIME_THRESHOLD: f64 = 2.0;
298+
299+
let intent_execution_secs = execution_time.as_secs_f64();
300+
metrics::observe_committor_intent_execution_time_histogram(
301+
intent_execution_secs,
302+
intent,
303+
result,
304+
);
305+
if let Err(ref err) = result {
306+
metrics::inc_committor_failed_intents_count(intent, err);
307+
}
308+
309+
// Loki alerts
310+
if intent_execution_secs >= EXECUTION_TIME_THRESHOLD {
311+
info!(
312+
"Intent took too long to execute: {}s. {}",
313+
intent_execution_secs,
314+
result
315+
.as_ref()
316+
.map(|_| "succeeded".to_string())
317+
.unwrap_or_else(|err| format!("{err:?}"))
318+
);
319+
} else {
320+
trace!("Seconds took to execute intent: {}", intent_execution_secs);
321+
}
322+
323+
// Alert
324+
if intent.is_undelegate() && result.is_err() {
325+
warn!(
326+
"Intent execution resulted in stuck accounts: {:?}",
327+
intent.get_committed_pubkeys()
328+
);
329+
}
330+
}
283331
}
284332

285333
/// Worker tests

magicblock-committor-service/src/intent_executor/error.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use log::error;
2+
use magicblock_metrics::metrics;
23
use magicblock_rpc_client::{
34
utils::TransactionErrorMapper, MagicBlockRpcClientError,
45
};
@@ -100,6 +101,17 @@ impl IntentExecutorError {
100101
}
101102
}
102103

104+
impl metrics::LabelValue for IntentExecutorError {
105+
fn value(&self) -> &str {
106+
match self {
107+
IntentExecutorError::ActionsError(_, _) => "actions_failed",
108+
IntentExecutorError::CpiLimitError(_, _) => "cpi_limit_failed",
109+
IntentExecutorError::CommitIDError(_, _) => "commit_nonce_failed",
110+
_ => "failed",
111+
}
112+
}
113+
}
114+
103115
/// Those are the errors that may occur during Commit/Finalize stages on Base layer
104116
#[derive(thiserror::Error, Debug)]
105117
pub enum TransactionStrategyExecutionError {

magicblock-committor-service/src/intent_executor/mod.rs

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::{ops::ControlFlow, sync::Arc, time::Duration};
99
use async_trait::async_trait;
1010
use futures_util::future::try_join_all;
1111
use log::{error, trace, warn};
12+
use magicblock_metrics::metrics;
1213
use magicblock_program::{
1314
magic_scheduled_base_intent::ScheduledBaseIntent,
1415
validator::validator_authority,
@@ -18,8 +19,8 @@ use magicblock_rpc_client::{
1819
decide_rpc_error_flow, map_magicblock_client_error,
1920
send_transaction_with_retries, SendErrorMapper, TransactionErrorMapper,
2021
},
21-
MagicBlockSendTransactionConfig, MagicBlockSendTransactionOutcome,
22-
MagicblockRpcClient,
22+
MagicBlockRpcClientError, MagicBlockSendTransactionConfig,
23+
MagicBlockSendTransactionOutcome, MagicblockRpcClient,
2324
};
2425
use solana_pubkey::Pubkey;
2526
use solana_sdk::{
@@ -68,6 +69,18 @@ pub enum ExecutionOutput {
6869
},
6970
}
7071

72+
impl metrics::LabelValue for ExecutionOutput {
73+
fn value(&self) -> &str {
74+
match self {
75+
Self::SingleStage(_) => "single_stage_succeeded",
76+
Self::TwoStage {
77+
commit_signature: _,
78+
finalize_signature: _,
79+
} => "two_stage_succeeded",
80+
}
81+
}
82+
}
83+
7184
#[async_trait]
7285
pub trait IntentExecutor: Send + Sync + 'static {
7386
/// Executes Message on Base layer
@@ -663,6 +676,59 @@ where
663676
)
664677
.await
665678
}
679+
680+
async fn intent_metrics(
681+
rpc_client: MagicblockRpcClient,
682+
execution_outcome: ExecutionOutput,
683+
) {
684+
use solana_transaction_status_client_types::EncodedTransactionWithStatusMeta;
685+
fn extract_cu(tx: EncodedTransactionWithStatusMeta) -> Option<u64> {
686+
let cu = tx.meta?.compute_units_consumed;
687+
cu.into()
688+
}
689+
690+
let cu_metrics = || async {
691+
match execution_outcome {
692+
ExecutionOutput::SingleStage(signature) => {
693+
let tx =
694+
rpc_client.get_transaction(&signature, None).await?;
695+
Ok::<_, MagicBlockRpcClientError>(extract_cu(
696+
tx.transaction,
697+
))
698+
}
699+
ExecutionOutput::TwoStage {
700+
commit_signature,
701+
finalize_signature,
702+
} => {
703+
let commit_tx = rpc_client
704+
.get_transaction(&commit_signature, None)
705+
.await?;
706+
let finalize_tx = rpc_client
707+
.get_transaction(&finalize_signature, None)
708+
.await?;
709+
let commit_cu = extract_cu(commit_tx.transaction);
710+
let finalize_cu = extract_cu(finalize_tx.transaction);
711+
let (Some(commit_cu), Some(finalize_cu)) =
712+
(commit_cu, finalize_cu)
713+
else {
714+
return Ok(None);
715+
};
716+
Ok(Some(commit_cu + finalize_cu))
717+
}
718+
}
719+
};
720+
721+
match cu_metrics().await {
722+
Ok(Some(cu)) => metrics::set_commmittor_intent_cu_usage(
723+
i64::try_from(cu).unwrap_or(i64::MAX),
724+
),
725+
Err(err) => warn!(
726+
"Failed to fetch CUs for intent: {:?}. {:?}",
727+
err, execution_outcome
728+
),
729+
_ => {}
730+
}
731+
}
666732
}
667733

668734
#[async_trait]
@@ -692,8 +758,12 @@ where
692758
// Write result of intent into Persister
693759
Self::persist_result(&persister, &result, message_id, &pubkeys);
694760
}
695-
696-
result
761+
result.inspect(|output| {
762+
tokio::spawn(Self::intent_metrics(
763+
self.rpc_client.clone(),
764+
*output,
765+
));
766+
})
697767
}
698768
}
699769

magicblock-committor-service/src/types.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use std::ops::Deref;
22

3-
use magicblock_program::magic_scheduled_base_intent::ScheduledBaseIntent;
3+
use magicblock_metrics::metrics;
4+
use magicblock_program::magic_scheduled_base_intent::{
5+
MagicBaseIntent, ScheduledBaseIntent,
6+
};
47

58
// TODO: should be removed once cranks are supported
69
// Ideally even now OffChain/"Manual" commits should be triggered via Tx
@@ -16,6 +19,16 @@ pub struct ScheduledBaseIntentWrapper {
1619
pub trigger_type: TriggerType,
1720
}
1821

22+
impl metrics::LabelValue for ScheduledBaseIntentWrapper {
23+
fn value(&self) -> &str {
24+
match &self.inner.base_intent {
25+
MagicBaseIntent::BaseActions(_) => "actions",
26+
MagicBaseIntent::Commit(_) => "commit",
27+
MagicBaseIntent::CommitAndUndelegate(_) => "commit_and_undelegate",
28+
}
29+
}
30+
}
31+
1932
impl Deref for ScheduledBaseIntentWrapper {
2033
type Target = ScheduledBaseIntent;
2134

magicblock-metrics/src/metrics/mod.rs

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ use prometheus::{
55
Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec,
66
IntGauge, IntGaugeVec, Opts, Registry,
77
};
8-
pub use types::{AccountClone, AccountCommit, Outcome};
8+
pub use types::{AccountClone, AccountCommit, LabelValue, Outcome};
9+
910
mod types;
1011

1112
// -----------------
@@ -181,13 +182,32 @@ lazy_static::lazy_static! {
181182
"committor_intent_backlog_count", "Number of intents in backlog",
182183
).unwrap();
183184

184-
static ref COMMITTOR_FAILED_INTENTS_COUNT: IntCounter = IntCounter::new(
185-
"committor_failed_intents_count", "Number of failed to be executed intents",
185+
static ref COMMITTOR_FAILED_INTENTS_COUNT: IntCounterVec = IntCounterVec::new(
186+
Opts::new("committor_failed_intents_count", "Number of failed to be executed intents"),
187+
&["intent_kind", "error_kind"]
186188
).unwrap();
187189

188190
static ref COMMITTOR_EXECUTORS_BUSY_COUNT: IntGauge = IntGauge::new(
189191
"committor_executors_busy_count", "Number of busy intent executors"
190192
).unwrap();
193+
194+
static ref COMMITTOR_INTENT_EXECUTION_TIME_HISTOGRAM: HistogramVec = HistogramVec::new(
195+
HistogramOpts::new(
196+
"committor_intent_execution_time_histogram",
197+
"Time in seconds spent on intent execution"
198+
)
199+
.buckets(
200+
MILLIS_1_9.iter()
201+
.chain(MILLIS_10_90.iter())
202+
.chain(MILLIS_100_900.iter())
203+
.chain(SECONDS_1_9.iter()).cloned().collect(),
204+
),
205+
&["intent_kind", "outcome_kind"],
206+
).unwrap();
207+
208+
static ref COMMITTOR_INTENT_CU_USAGE: IntGauge = IntGauge::new(
209+
"committor_intent_cu_usage", "Compute units used for Intent"
210+
).unwrap();
191211
}
192212

193213
pub(crate) fn register() {
@@ -223,6 +243,7 @@ pub(crate) fn register() {
223243
register!(COMMITTOR_INTENTS_BACKLOG_COUNT);
224244
register!(COMMITTOR_FAILED_INTENTS_COUNT);
225245
register!(COMMITTOR_EXECUTORS_BUSY_COUNT);
246+
register!(COMMITTOR_INTENT_EXECUTION_TIME_HISTOGRAM);
226247
register!(ENSURE_ACCOUNTS_TIME);
227248
register!(RPC_REQUEST_HANDLING_TIME);
228249
register!(TRANSACTION_PROCESSING_TIME);
@@ -318,10 +339,29 @@ pub fn set_committor_intents_backlog_count(value: i64) {
318339
COMMITTOR_INTENTS_BACKLOG_COUNT.set(value)
319340
}
320341

321-
pub fn inc_committor_failed_intents_count() {
322-
COMMITTOR_FAILED_INTENTS_COUNT.inc()
342+
pub fn inc_committor_failed_intents_count(
343+
intent_kind: &impl LabelValue,
344+
error_kind: &impl LabelValue,
345+
) {
346+
COMMITTOR_FAILED_INTENTS_COUNT
347+
.with_label_values(&[intent_kind.value(), error_kind.value()])
348+
.inc()
323349
}
324350

325351
pub fn set_committor_executors_busy_count(value: i64) {
326352
COMMITTOR_EXECUTORS_BUSY_COUNT.set(value)
327353
}
354+
355+
pub fn observe_committor_intent_execution_time_histogram(
356+
seconds: f64,
357+
kind: &impl LabelValue,
358+
outcome: &impl LabelValue,
359+
) {
360+
COMMITTOR_INTENT_EXECUTION_TIME_HISTOGRAM
361+
.with_label_values(&[kind.value(), outcome.value()])
362+
.observe(seconds);
363+
}
364+
365+
pub fn set_commmittor_intent_cu_usage(value: i64) {
366+
COMMITTOR_INTENT_CU_USAGE.set(value)
367+
}

magicblock-metrics/src/metrics/types.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,20 @@ pub enum AccountCommit<'a> {
6868
CommitOnly { pubkey: &'a str, outcome: Outcome },
6969
CommitAndUndelegate { pubkey: &'a str, outcome: Outcome },
7070
}
71+
72+
pub trait LabelValue {
73+
fn value(&self) -> &str;
74+
}
75+
76+
impl<T, E> LabelValue for Result<T, E>
77+
where
78+
T: LabelValue,
79+
E: LabelValue,
80+
{
81+
fn value(&self) -> &str {
82+
match self {
83+
Ok(ok) => ok.value(),
84+
Err(err) => err.value(),
85+
}
86+
}
87+
}

0 commit comments

Comments
 (0)