bench_tools: add absolute limits to benchmark runs

AvivYossef-starkware · AvivYossef-starkware · commit d505305e4cb9 · 2025-10-30T14:39:22.000+02:00
diff --git a/crates/bench_tools/src/comparison.rs b/crates/bench_tools/src/comparison.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::fs;
 use std::path::PathBuf;
 
@@ -8,7 +9,9 @@ use crate::types::estimates::Estimates;
 pub struct BenchmarkComparison {
     pub name: String,
     pub change_percentage: f64,
-    pub exceeds_limit: bool,
+    pub exceeds_regression_limit: bool,
+    pub absolute_time_ns: f64,
+    pub exceeds_absolute_limit: bool,
 }
 
 type RegressionError = (String, Vec<BenchmarkComparison>);
@@ -37,6 +40,29 @@ fn load_change_estimates(bench_name: &str) -> Estimates {
     })
 }
 
+/// Loads absolute timing estimates from criterion's new directory for a given benchmark.
+/// Panics if the estimates file doesn't exist.
+fn load_absolute_estimates(bench_name: &str) -> Estimates {
+    let estimates_path =
+        PathBuf::from("target/criterion").join(bench_name).join("new/estimates.json");
+
+    if !estimates_path.exists() {
+        panic!(
+            "Estimates file not found for benchmark '{}': {}\nThis likely means the benchmark \
+             hasn't been run yet. Run the benchmark before using comparison features.",
+            bench_name,
+            estimates_path.display()
+        );
+    }
+
+    let data = fs::read_to_string(&estimates_path)
+        .unwrap_or_else(|e| panic!("Failed to read {}: {}", estimates_path.display(), e));
+
+    serde_json::from_str(&data).unwrap_or_else(|e| {
+        panic!("Failed to deserialize {}: {}\nContent: {}", estimates_path.display(), e, data)
+    })
+}
+
 /// Converts change estimates to percentage.
 /// The mean.point_estimate in change/estimates.json represents fractional change
 /// (e.g., 0.0706 = 7.06% change).
@@ -46,33 +72,48 @@ pub(crate) fn get_regression_percentage(change_estimates: &Estimates) -> f64 {
 
 /// Checks all benchmarks for regressions against a specified limit.
 /// Returns a vector of comparison results for all benchmarks.
-/// If any benchmark exceeds the regression limit, returns an error with detailed results.
-/// Panics if change file is not found for any benchmark.
+/// If any benchmark exceeds the regression limit or absolute time threshold, returns an error with
+/// detailed results. Panics if change file is not found for any benchmark.
 pub fn check_regressions(
     bench_names: &[&str],
     regression_limit: f64,
+    absolute_time_ns_limits: &HashMap<String, f64>,
 ) -> BenchmarkComparisonsResult {
     let mut results = Vec::new();
     let mut exceeded_count = 0;
 
     for bench_name in bench_names {
         let change_estimates = load_change_estimates(bench_name);
         let change_percentage = get_regression_percentage(&change_estimates);
-        let exceeds_limit = change_percentage > regression_limit;
+        let exceeds_regression_limit = change_percentage > regression_limit;
+
+        // Load absolute timing estimates.
+        let absolute_estimates = load_absolute_estimates(bench_name);
+        let absolute_time_ns = absolute_estimates.mean.point_estimate;
+
+        // Check if this benchmark has a specific absolute time limit.
+        let exceeds_absolute_limit =
+            if let Some(&threshold) = absolute_time_ns_limits.get(*bench_name) {
+                absolute_time_ns > threshold
+            } else {
+                false
+            };
 
-        if exceeds_limit {
+        if exceeds_regression_limit || exceeds_absolute_limit {
             exceeded_count += 1;
         }
 
         results.push(BenchmarkComparison {
             name: bench_name.to_string(),
             change_percentage,
-            exceeds_limit,
+            exceeds_regression_limit,
+            absolute_time_ns,
+            exceeds_absolute_limit,
         });
     }
 
     if exceeded_count > 0 {
-        let error_msg = format!("{} benchmark(s) exceeded regression threshold!", exceeded_count);
+        let error_msg = format!("{} benchmark(s) exceeded threshold(s)!", exceeded_count);
         Err((error_msg, results))
     } else {
         Ok(results)
diff --git a/crates/bench_tools/src/lib.rs b/crates/bench_tools/src/lib.rs
@@ -8,6 +8,6 @@ pub mod runner;
 #[cfg(test)]
 pub mod test_utils;
 pub mod types;
-pub(crate) mod utils;
+pub mod utils;
 #[cfg(test)]
 mod utils_test;
diff --git a/crates/bench_tools/src/main.rs b/crates/bench_tools/src/main.rs
@@ -6,6 +6,7 @@ use bench_tools::types::benchmark_config::{
     find_benchmarks_by_package,
     BENCHMARKS,
 };
+use bench_tools::utils::parse_absolute_time_limits;
 use clap::{Parser, Subcommand};
 
 #[derive(Parser)]
@@ -45,6 +46,10 @@ enum Commands {
         /// Maximum acceptable regression percentage (e.g., 5.0 for 5%).
         #[arg(long)]
         regression_limit: f64,
+        /// Set absolute time limit for a specific benchmark (can be used multiple times).
+        /// Format: --set-absolute-time-ns-limit <bench_name> <limit_ns>
+        #[arg(long, value_names = ["BENCH_NAME", "LIMIT_NS"], num_args = 2, action = clap::ArgAction::Append)]
+        set_absolute_time_ns_limit: Vec<String>,
     },
     /// List benchmarks for a package.
     List {
@@ -75,18 +80,27 @@ fn main() {
 
             bench_tools::runner::run_benchmarks(&benchmarks, input_dir.as_deref(), &out);
         }
-        Commands::RunAndCompare { package, out, input_dir, regression_limit } => {
+        Commands::RunAndCompare {
+            package,
+            out,
+            input_dir,
+            regression_limit,
+            set_absolute_time_ns_limit,
+        } => {
             let benchmarks = find_benchmarks_by_package(&package);
 
             if benchmarks.is_empty() {
                 panic!("No benchmarks found for package: {}", package);
             }
 
+            let absolute_time_ns_limits = parse_absolute_time_limits(set_absolute_time_ns_limit);
+
             bench_tools::runner::run_and_compare_benchmarks(
                 &benchmarks,
                 input_dir.as_deref(),
                 &out,
                 regression_limit,
+                absolute_time_ns_limits,
             );
         }
         Commands::List { package } => match package {
diff --git a/crates/bench_tools/src/runner.rs b/crates/bench_tools/src/runner.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::fs;
 use std::path::PathBuf;
 
@@ -113,6 +114,7 @@ pub fn run_and_compare_benchmarks(
     input_dir: Option<&str>,
     output_dir: &str,
     regression_limit: f64,
+    absolute_time_ns_limits: HashMap<String, f64>,
 ) {
     // Run benchmarks first.
     run_benchmarks(benchmarks, input_dir, output_dir);
@@ -123,8 +125,15 @@ pub fn run_and_compare_benchmarks(
         bench_names.extend(bench.criterion_benchmark_names.unwrap_or(&[bench.name]));
     }
 
-    println!("\n📊 Checking for performance regressions (limit: {}%):", regression_limit);
-    let regression_result = crate::comparison::check_regressions(&bench_names, regression_limit);
+    print!("\n📊 Checking for performance regressions (limit: {}%", regression_limit);
+    if !absolute_time_ns_limits.is_empty() {
+        print!(", {} benchmark(s) with absolute time limits", absolute_time_ns_limits.len());
+    }
+    let regression_result = crate::comparison::check_regressions(
+        &bench_names,
+        regression_limit,
+        &absolute_time_ns_limits,
+    );
 
     match regression_result {
         Ok(_) => {
@@ -134,15 +143,26 @@ pub fn run_and_compare_benchmarks(
             // Some benchmarks exceeded the limit - print detailed results.
             println!("\nBenchmark Results:");
             for result in results {
-                if result.exceeds_limit {
-                    println!(
-                        "  ❌ {}: {:+.2}% (EXCEEDS {:.1}% limit)",
-                        result.name, result.change_percentage, regression_limit
-                    );
+                if result.exceeds_regression_limit || result.exceeds_absolute_limit {
+                    if result.exceeds_regression_limit {
+                        println!(
+                            "❌ {}: {:+.2}% (EXCEEDS {:.1}% limit)",
+                            result.name, result.change_percentage, regression_limit
+                        );
+                    }
+                    if result.exceeds_absolute_limit {
+                        if let Some(&limit) = absolute_time_ns_limits.get(&result.name) {
+                            println!(
+                                " ❌ {}: {:.2}ns (EXCEEDS {:.0}ns limit)",
+                                result.name, result.absolute_time_ns, limit
+                            );
+                        }
+                    }
+                    println!();
                 } else {
                     println!(
-                        "  ✓ {}: {:+.2}% (within {:.1}% limit)",
-                        result.name, result.change_percentage, regression_limit
+                        "  ✓ {}: {:+.2}% | {:.2}ns",
+                        result.name, result.change_percentage, result.absolute_time_ns
                     );
                 }
             }
diff --git a/crates/bench_tools/src/utils.rs b/crates/bench_tools/src/utils.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::fs;
 use std::path::Path;
 
@@ -61,3 +62,22 @@ pub(crate) fn copy_dir_contents(src: &Path, dst: &Path) {
         }
     }
 }
+
+/// Parses a flat Vec<String> of benchmark names and limits into a HashMap.
+/// The input vector should contain pairs: [bench_name1, limit1, bench_name2, limit2, ...].
+///
+/// # Panics
+/// Panics if any limit value cannot be parsed as f64.
+pub fn parse_absolute_time_limits(args: Vec<String>) -> HashMap<String, f64> {
+    let mut limits = HashMap::new();
+    for chunk in args.chunks(2) {
+        if chunk.len() == 2 {
+            let bench_name = chunk[0].clone();
+            let limit = chunk[1].parse::<f64>().unwrap_or_else(|_| {
+                panic!("Invalid limit value for benchmark '{}': '{}'", bench_name, chunk[1])
+            });
+            limits.insert(bench_name, limit);
+        }
+    }
+    limits
+}