Skip to content

Commit b321dff

Browse files
bench_tools: run and compare benchmark (#9699)
1 parent d69f6e2 commit b321dff

File tree

4 files changed

+155
-0
lines changed

4 files changed

+155
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
use std::fs;
2+
use std::path::PathBuf;
3+
4+
use crate::types::estimates::Estimates;
5+
6+
/// Result of a benchmark comparison.
7+
#[derive(Debug)]
8+
pub struct BenchmarkComparison {
9+
pub name: String,
10+
pub change_percentage: f64,
11+
pub exceeds_limit: bool,
12+
}
13+
14+
type RegressionError = (String, Vec<BenchmarkComparison>);
15+
type BenchmarkComparisonsResult = Result<Vec<BenchmarkComparison>, RegressionError>;
16+
17+
/// Loads change estimates from criterion's change directory for a given benchmark.
18+
/// Panics if the change file doesn't exist.
19+
fn load_change_estimates(bench_name: &str) -> Estimates {
20+
let change_path =
21+
PathBuf::from("target/criterion").join(bench_name).join("change/estimates.json");
22+
23+
if !change_path.exists() {
24+
panic!(
25+
"Change file not found for benchmark '{}': {}\nThis likely means no baseline exists. \
26+
Run the benchmark at least once before using run-and-compare.",
27+
bench_name,
28+
change_path.display()
29+
);
30+
}
31+
32+
let data = fs::read_to_string(&change_path)
33+
.unwrap_or_else(|e| panic!("Failed to read {}: {}", change_path.display(), e));
34+
35+
serde_json::from_str(&data).unwrap_or_else(|e| {
36+
panic!("Failed to deserialize {}: {}\nContent: {}", change_path.display(), e, data)
37+
})
38+
}
39+
40+
/// Converts change estimates to percentage.
41+
/// The mean.point_estimate in change/estimates.json represents fractional change
42+
/// (e.g., 0.0706 = 7.06% change).
43+
pub(crate) fn get_regression_percentage(change_estimates: &Estimates) -> f64 {
44+
change_estimates.mean.point_estimate * 100.0
45+
}
46+
47+
/// Checks all benchmarks for regressions against a specified limit.
48+
/// Returns a vector of comparison results for all benchmarks.
49+
/// If any benchmark exceeds the regression limit, returns an error with detailed results.
50+
/// Panics if change file is not found for any benchmark.
51+
pub fn check_regressions(
52+
bench_names: &[&str],
53+
regression_limit: f64,
54+
) -> BenchmarkComparisonsResult {
55+
let mut results = Vec::new();
56+
let mut exceeded_count = 0;
57+
58+
for bench_name in bench_names {
59+
let change_estimates = load_change_estimates(bench_name);
60+
let change_percentage = get_regression_percentage(&change_estimates);
61+
let exceeds_limit = change_percentage > regression_limit;
62+
63+
if exceeds_limit {
64+
exceeded_count += 1;
65+
}
66+
67+
results.push(BenchmarkComparison {
68+
name: bench_name.to_string(),
69+
change_percentage,
70+
exceeds_limit,
71+
});
72+
}
73+
74+
if exceeded_count > 0 {
75+
let error_msg = format!("{} benchmark(s) exceeded regression threshold!", exceeded_count);
76+
Err((error_msg, results))
77+
} else {
78+
Ok(results)
79+
}
80+
}

crates/bench_tools/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#[cfg(test)]
22
pub(crate) mod benches;
3+
pub mod comparison;
34
pub mod gcs;
45
#[cfg(test)]
56
pub mod gcs_test;

crates/bench_tools/src/main.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,22 @@ enum Commands {
3030
#[arg(long)]
3131
input_dir: Option<String>,
3232
},
33+
/// Run benchmarks, compare to previous run, and fail if regression exceeds limit.
34+
RunAndCompare {
35+
/// Package name to run benchmarks for.
36+
#[arg(short, long)]
37+
package: String,
38+
/// Output directory for results.
39+
#[arg(short, long)]
40+
out: String,
41+
/// Optional: Local directory containing input files. If not provided, inputs will be
42+
/// downloaded from GCS for benchmarks that require them.
43+
#[arg(long)]
44+
input_dir: Option<String>,
45+
/// Maximum acceptable regression percentage (e.g., 5.0 for 5%).
46+
#[arg(long)]
47+
regression_limit: f64,
48+
},
3349
/// List benchmarks for a package.
3450
List {
3551
/// Package name to list benchmarks for. If not provided, lists all benchmarks.
@@ -59,6 +75,20 @@ fn main() {
5975

6076
bench_tools::runner::run_benchmarks(&benchmarks, input_dir.as_deref(), &out);
6177
}
78+
Commands::RunAndCompare { package, out, input_dir, regression_limit } => {
79+
let benchmarks = find_benchmarks_by_package(&package);
80+
81+
if benchmarks.is_empty() {
82+
panic!("No benchmarks found for package: {}", package);
83+
}
84+
85+
bench_tools::runner::run_and_compare_benchmarks(
86+
&benchmarks,
87+
input_dir.as_deref(),
88+
&out,
89+
regression_limit,
90+
);
91+
}
6292
Commands::List { package } => match package {
6393
Some(package_name) => {
6494
let benchmarks = find_benchmarks_by_package(&package_name);

crates/bench_tools/src/runner.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,47 @@ pub fn run_benchmarks(benchmarks: &[&BenchmarkConfig], input_dir: Option<&str>,
106106

107107
println!("\n✓ All benchmarks completed! Results saved to: {}", output_dir);
108108
}
109+
110+
/// Runs benchmarks and compares them against previous results, failing if regression exceeds limit.
111+
pub fn run_and_compare_benchmarks(
112+
benchmarks: &[&BenchmarkConfig],
113+
input_dir: Option<&str>,
114+
output_dir: &str,
115+
regression_limit: f64,
116+
) {
117+
// Run benchmarks first.
118+
run_benchmarks(benchmarks, input_dir, output_dir);
119+
120+
// Collect all criterion benchmark names from configs.
121+
let mut bench_names = Vec::new();
122+
for bench in benchmarks {
123+
bench_names.extend(bench.criterion_benchmark_names.unwrap_or(&[bench.name]));
124+
}
125+
126+
println!("\n📊 Checking for performance regressions (limit: {}%):", regression_limit);
127+
let regression_result = crate::comparison::check_regressions(&bench_names, regression_limit);
128+
129+
match regression_result {
130+
Ok(_) => {
131+
println!("\n✅ All benchmarks passed regression check!");
132+
}
133+
Err((error_msg, results)) => {
134+
// Some benchmarks exceeded the limit - print detailed results.
135+
println!("\nBenchmark Results:");
136+
for result in results {
137+
if result.exceeds_limit {
138+
println!(
139+
" ❌ {}: {:+.2}% (EXCEEDS {:.1}% limit)",
140+
result.name, result.change_percentage, regression_limit
141+
);
142+
} else {
143+
println!(
144+
" ✓ {}: {:+.2}% (within {:.1}% limit)",
145+
result.name, result.change_percentage, regression_limit
146+
);
147+
}
148+
}
149+
panic!("\n{}", error_msg);
150+
}
151+
}
152+
}

0 commit comments

Comments
 (0)