Skip to content

Commit 2e27b8d

Browse files
bench_tools: run and compare benchmark
1 parent fbfbf8e commit 2e27b8d

File tree

4 files changed

+152
-0
lines changed

4 files changed

+152
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
use std::fs;
2+
use std::path::PathBuf;
3+
4+
use crate::types::estimates::Estimates;
5+
6+
/// Result of a benchmark comparison.
7+
#[derive(Debug)]
8+
pub struct BenchmarkComparison {
9+
pub name: String,
10+
pub change_percentage: f64,
11+
pub exceeds_limit: bool,
12+
}
13+
14+
/// Loads change estimates from criterion's change directory for a given benchmark.
15+
/// Panics if the change file doesn't exist.
16+
fn load_change_estimates(bench_name: &str) -> Estimates {
17+
let change_path =
18+
PathBuf::from("target/criterion").join(bench_name).join("change/estimates.json");
19+
20+
if !change_path.exists() {
21+
panic!(
22+
"Change file not found for benchmark '{}': {}\nThis likely means no baseline exists. \
23+
Run the benchmark at least once before using run-and-compare.",
24+
bench_name,
25+
change_path.display()
26+
);
27+
}
28+
29+
let data = fs::read_to_string(&change_path)
30+
.unwrap_or_else(|e| panic!("Failed to read {}: {}", change_path.display(), e));
31+
32+
serde_json::from_str(&data).unwrap_or_else(|e| {
33+
panic!("Failed to deserialize {}: {}\nContent: {}", change_path.display(), e, data)
34+
})
35+
}
36+
37+
/// Converts change estimates to percentage.
38+
/// The mean.point_estimate in change/estimates.json represents fractional change
39+
/// (e.g., 0.0706 = 7.06% change).
40+
pub(crate) fn get_regression_percentage(change_estimates: &Estimates) -> f64 {
41+
change_estimates.mean.point_estimate * 100.0
42+
}
43+
44+
/// Checks all benchmarks for regressions against a specified limit.
45+
/// Returns a vector of comparison results for all benchmarks.
46+
/// If any benchmark exceeds the regression limit, returns an error with detailed results.
47+
/// Panics if change file is not found for any benchmark.
48+
pub fn check_regressions(
49+
bench_names: &[&str],
50+
regression_limit: f64,
51+
) -> Result<Vec<BenchmarkComparison>, (String, Vec<BenchmarkComparison>)> {
52+
let mut results = Vec::new();
53+
let mut exceeded_count = 0;
54+
55+
for bench_name in bench_names {
56+
let change_estimates = load_change_estimates(bench_name);
57+
let change_percentage = get_regression_percentage(&change_estimates);
58+
let exceeds_limit = change_percentage > regression_limit;
59+
60+
if exceeds_limit {
61+
exceeded_count += 1;
62+
}
63+
64+
results.push(BenchmarkComparison {
65+
name: bench_name.to_string(),
66+
change_percentage,
67+
exceeds_limit,
68+
});
69+
}
70+
71+
if exceeded_count > 0 {
72+
let error_msg = format!("{} benchmark(s) exceeded regression threshold!", exceeded_count);
73+
Err((error_msg, results))
74+
} else {
75+
Ok(results)
76+
}
77+
}

crates/bench_tools/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#[cfg(test)]
22
pub(crate) mod benches;
3+
pub mod comparison;
34
pub mod gcs;
45
#[cfg(test)]
56
pub mod gcs_test;

crates/bench_tools/src/main.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,22 @@ enum Commands {
3030
#[arg(long)]
3131
input_dir: Option<String>,
3232
},
33+
/// Run benchmarks, compare to previous run, and fail if regression exceeds limit.
34+
RunAndCompare {
35+
/// Package name to run benchmarks for.
36+
#[arg(short, long)]
37+
package: String,
38+
/// Output directory for results.
39+
#[arg(short, long)]
40+
out: String,
41+
/// Optional: Local directory containing input files. If not provided, inputs will be
42+
/// downloaded from GCS for benchmarks that require them.
43+
#[arg(long)]
44+
input_dir: Option<String>,
45+
/// Maximum acceptable regression percentage (e.g., 5.0 for 5%).
46+
#[arg(long)]
47+
regression_limit: f64,
48+
},
3349
/// List benchmarks for a package.
3450
List {
3551
/// Package name to list benchmarks for. If not provided, lists all benchmarks.
@@ -59,6 +75,20 @@ fn main() {
5975

6076
bench_tools::runner::run_benchmarks(&benchmarks, input_dir.as_deref(), &out);
6177
}
78+
Commands::RunAndCompare { package, out, input_dir, regression_limit } => {
79+
let benchmarks = find_benchmarks_by_package(&package);
80+
81+
if benchmarks.is_empty() {
82+
panic!("No benchmarks found for package: {}", package);
83+
}
84+
85+
bench_tools::runner::run_and_compare_benchmarks(
86+
&benchmarks,
87+
input_dir.as_deref(),
88+
&out,
89+
regression_limit,
90+
);
91+
}
6292
Commands::List { package } => match package {
6393
Some(package_name) => {
6494
let benchmarks = find_benchmarks_by_package(&package_name);

crates/bench_tools/src/runner.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,47 @@ pub fn run_benchmarks(benchmarks: &[&BenchmarkConfig], input_dir: Option<&str>,
106106

107107
println!("\n✓ All benchmarks completed! Results saved to: {}", output_dir);
108108
}
109+
110+
/// Runs benchmarks and compares them against previous results, failing if regression exceeds limit.
111+
pub fn run_and_compare_benchmarks(
112+
benchmarks: &[&BenchmarkConfig],
113+
input_dir: Option<&str>,
114+
output_dir: &str,
115+
regression_limit: f64,
116+
) {
117+
// Run benchmarks first.
118+
run_benchmarks(benchmarks, input_dir, output_dir);
119+
120+
// Collect all criterion benchmark names from configs.
121+
let mut bench_names = Vec::new();
122+
for bench in benchmarks {
123+
bench_names.extend(bench.criterion_benchmark_names.unwrap_or(&[bench.name]));
124+
}
125+
126+
println!("\n📊 Checking for performance regressions (limit: {}%):", regression_limit);
127+
let regression_result = crate::comparison::check_regressions(&bench_names, regression_limit);
128+
129+
match regression_result {
130+
Ok(_) => {
131+
println!("\n✅ All benchmarks passed regression check!");
132+
}
133+
Err((error_msg, results)) => {
134+
// Some benchmarks exceeded the limit - print detailed results.
135+
println!("\nBenchmark Results:");
136+
for result in results {
137+
if result.exceeds_limit {
138+
println!(
139+
" ❌ {}: {:+.2}% (EXCEEDS {:.1}% limit)",
140+
result.name, result.change_percentage, regression_limit
141+
);
142+
} else {
143+
println!(
144+
" ✓ {}: {:+.2}% (within {:.1}% limit)",
145+
result.name, result.change_percentage, regression_limit
146+
);
147+
}
148+
}
149+
panic!("\n{}", error_msg);
150+
}
151+
}
152+
}

0 commit comments

Comments
 (0)